diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 06e893fabb3..5088bf90ede 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -13,3 +13,6 @@ # dbms/ → src/ # (though it is unlikely that you will see it in blame) 06446b4f08a142d6f1bc30664c47ded88ab51782 + +# Applied Black formatter for Python code +e6f5a3f98b21ba99cf274a9833797889e020a2b3 diff --git a/.gitattributes b/.gitattributes index 56d6fecf4b8..dd94a48f8e7 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2,3 +2,4 @@ contrib/* linguist-vendored *.h linguist-language=C++ tests/queries/0_stateless/data_json/* binary tests/queries/0_stateless/*.reference -crlf +src/Core/SettingsChangesHistory.cpp merge=union diff --git a/.github/actionlint.yml b/.github/actionlint.yml index 0f88f30d42c..4357bd3eb6b 100644 --- a/.github/actionlint.yml +++ b/.github/actionlint.yml @@ -7,3 +7,4 @@ self-hosted-runner: - stress-tester - style-checker - style-checker-aarch64 + - release-maker diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index 3988df3b2b1..972aff90195 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -6,8 +6,8 @@ concurrency: 'on': workflow_dispatch: inputs: - sha: - description: 'The SHA hash of the commit from which to create the release' + ref: + description: 'Git reference (branch or commit sha) from which to create the release' required: true type: string type: @@ -15,15 +15,152 @@ concurrency: required: true type: choice options: - - new - patch + - new + dry-run: + description: 'Dry run' + required: false + default: true + type: boolean jobs: - Release: - runs-on: [self-hosted, style-checker-aarch64] + CreateRelease: + env: + GH_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }} + runs-on: [self-hosted, release-maker] steps: + - name: DebugInfo + uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6 + - name: Set envs + # https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings + run: | + cat >> "$GITHUB_ENV" << 'EOF' + ROBOT_CLICKHOUSE_SSH_KEY<> "$GITHUB_ENV" + echo "COMMIT_SHA=$commit_sha" >> "$GITHUB_ENV" + - name: Download All Release Artifacts + if: ${{ inputs.type == 'patch' }} + run: | + python3 ./tests/ci/create_release.py --infile "$RELEASE_INFO_FILE" --download-packages ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Push Git Tag for the Release + run: | + python3 ./tests/ci/create_release.py --push-release-tag --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Push New Release Branch + if: ${{ inputs.type == 'new' }} + run: | + python3 ./tests/ci/create_release.py --push-new-release-branch --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Bump CH Version and Update Contributors' List + run: | + python3 ./tests/ci/create_release.py --create-bump-version-pr --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Checkout master + run: | + git checkout master + - name: Bump Docker versions, Changelog, Security + if: ${{ inputs.type == 'patch' }} + run: | + [ "$(git branch --show-current)" != "master" ] && echo "not on the master" && exit 1 + echo "List versions" + ./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv + echo "Update docker version" + ./utils/list-versions/update-docker-version.sh + echo "Generate ChangeLog" + export CI=1 + docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 -e CI=1 --network=host \ + --volume=".:/ClickHouse" clickhouse/style-test \ + /ClickHouse/tests/ci/changelog.py -v --debug-helpers \ + --gh-user-or-token="$GH_TOKEN" --jobs=5 \ + --output="/ClickHouse/docs/changelogs/${{ env.RELEASE_TAG }}.md" ${{ env.RELEASE_TAG }} + git add ./docs/changelogs/${{ env.RELEASE_TAG }}.md + echo "Generate Security" + python3 ./utils/security-generator/generate_security.py > SECURITY.md + git diff HEAD + - name: Generate ChangeLog + if: ${{ inputs.type == 'patch' && ! inputs.dry-run }} + uses: peter-evans/create-pull-request@v6 + with: + author: "robot-clickhouse " + token: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }} + committer: "robot-clickhouse " + commit-message: Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }} + branch: auto/${{ env.RELEASE_TAG }} + assignees: ${{ github.event.sender.login }} # assign the PR to the tag pusher + delete-branch: true + title: Update version_date.tsv and changelog after ${{ env.RELEASE_TAG }} + labels: do not test + body: | + Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }} + ### Changelog category (leave one): + - Not for changelog (changelog entry is not required) + - name: Reset changes if Dry-run + if: ${{ inputs.dry-run }} + run: | + git reset --hard HEAD + - name: Checkout back to GITHUB_REF + run: | + git checkout "$GITHUB_REF_NAME" + - name: Create GH Release + if: ${{ inputs.type == 'patch' }} + run: | + python3 ./tests/ci/create_release.py --create-gh-release \ + --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} + + - name: Export TGZ Packages + if: ${{ inputs.type == 'patch' }} + run: | + python3 ./tests/ci/artifactory.py --export-tgz --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Test TGZ Packages + if: ${{ inputs.type == 'patch' }} + run: | + python3 ./tests/ci/artifactory.py --test-tgz --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Export RPM Packages + if: ${{ inputs.type == 'patch' }} + run: | + python3 ./tests/ci/artifactory.py --export-rpm --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Test RPM Packages + if: ${{ inputs.type == 'patch' }} + run: | + python3 ./tests/ci/artifactory.py --test-rpm --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Export Debian Packages + if: ${{ inputs.type == 'patch' }} + run: | + python3 ./tests/ci/artifactory.py --export-debian --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Test Debian Packages + if: ${{ inputs.type == 'patch' }} + run: | + python3 ./tests/ci/artifactory.py --test-debian --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Docker clickhouse/clickhouse-server building + if: ${{ inputs.type == 'patch' }} + run: | + cd "./tests/ci" + export CHECK_NAME="Docker server image" + python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} + - name: Docker clickhouse/clickhouse-keeper building + if: ${{ inputs.type == 'patch' }} + run: | + cd "./tests/ci" + export CHECK_NAME="Docker keeper image" + python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} + - name: Post Slack Message + if: always() + run: | + echo Slack Message diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 4764e6d3c1a..c9f4f858825 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -172,7 +172,7 @@ jobs: ################################# Stage Final ################################# # FinishCheck: - if: ${{ !cancelled() }} + if: ${{ !failure() && !cancelled() }} needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3] runs-on: [self-hosted, style-checker-aarch64] steps: diff --git a/.yamllint b/.yamllint index 9d6550ac960..f144e2d47b1 100644 --- a/.yamllint +++ b/.yamllint @@ -13,5 +13,4 @@ rules: level: warning comments: min-spaces-from-content: 1 - document-start: - present: false + document-start: disable diff --git a/README.md b/README.md index dc253d4db2d..98f9108f14c 100644 --- a/README.md +++ b/README.md @@ -34,14 +34,12 @@ curl https://clickhouse.com/ | sh Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know. -* [v24.6 Community Call](https://clickhouse.com/company/events/v24-6-community-release-call) - Jul 2 +* [v24.7 Community Call](https://clickhouse.com/company/events/v24-7-community-release-call) - Jul 30 ## Upcoming Events Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `` clickhouse `` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc. -* [AWS Summit in DC](https://clickhouse.com/company/events/2024-06-aws-summit-dc) - Jun 26 -* [ClickHouse Meetup in Amsterdam](https://www.meetup.com/clickhouse-netherlands-user-group/events/300781068/) - Jun 27 * [ClickHouse Meetup in Paris](https://www.meetup.com/clickhouse-france-user-group/events/300783448/) - Jul 9 * [ClickHouse Cloud - Live Update Call](https://clickhouse.com/company/events/202407-cloud-update-live) - Jul 9 * [ClickHouse Meetup @ Ramp - New York City](https://www.meetup.com/clickhouse-new-york-user-group/events/300595845/) - Jul 9 diff --git a/base/base/demangle.h b/base/base/demangle.h index ddca264ecab..af9ccad16c1 100644 --- a/base/base/demangle.h +++ b/base/base/demangle.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include diff --git a/base/base/extended_types.h b/base/base/extended_types.h index 796167ab45d..3bf3f4ed31d 100644 --- a/base/base/extended_types.h +++ b/base/base/extended_types.h @@ -108,6 +108,14 @@ struct make_unsigned // NOLINT(readability-identifier-naming) using type = std::make_unsigned_t; }; +template <> struct make_unsigned { using type = UInt8; }; +template <> struct make_unsigned { using type = UInt8; }; +template <> struct make_unsigned { using type = UInt16; }; +template <> struct make_unsigned { using type = UInt16; }; +template <> struct make_unsigned { using type = UInt32; }; +template <> struct make_unsigned { using type = UInt32; }; +template <> struct make_unsigned { using type = UInt64; }; +template <> struct make_unsigned { using type = UInt64; }; template <> struct make_unsigned { using type = UInt128; }; template <> struct make_unsigned { using type = UInt128; }; template <> struct make_unsigned { using type = UInt256; }; @@ -121,6 +129,14 @@ struct make_signed // NOLINT(readability-identifier-naming) using type = std::make_signed_t; }; +template <> struct make_signed { using type = Int8; }; +template <> struct make_signed { using type = Int8; }; +template <> struct make_signed { using type = Int16; }; +template <> struct make_signed { using type = Int16; }; +template <> struct make_signed { using type = Int32; }; +template <> struct make_signed { using type = Int32; }; +template <> struct make_signed { using type = Int64; }; +template <> struct make_signed { using type = Int64; }; template <> struct make_signed { using type = Int128; }; template <> struct make_signed { using type = Int128; }; template <> struct make_signed { using type = Int256; }; diff --git a/base/base/isSharedPtrUnique.h b/base/base/isSharedPtrUnique.h new file mode 100644 index 00000000000..c153605ecb1 --- /dev/null +++ b/base/base/isSharedPtrUnique.h @@ -0,0 +1,9 @@ +#pragma once + +#include + +template +bool isSharedPtrUnique(const std::shared_ptr & ptr) +{ + return ptr.use_count() == 1; +} diff --git a/base/base/itoa.cpp b/base/base/itoa.cpp index 9a2d02e3388..60231507c96 100644 --- a/base/base/itoa.cpp +++ b/base/base/itoa.cpp @@ -1,32 +1,3 @@ -// Based on https://github.com/amdn/itoa and combined with our optimizations -// -//=== itoa.cpp - Fast integer to ascii conversion --*- C++ -*-// -// -// The MIT License (MIT) -// Copyright (c) 2016 Arturo Martin-de-Nicolas -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included -// in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. -//===----------------------------------------------------------------------===// - -#include -#include -#include #include #include #include @@ -34,99 +5,15 @@ namespace { -template -ALWAYS_INLINE inline constexpr T pow10(size_t x) -{ - return x ? 10 * pow10(x - 1) : 1; -} - -// Division by a power of 10 is implemented using a multiplicative inverse. -// This strength reduction is also done by optimizing compilers, but -// presently the fastest results are produced by using the values -// for the multiplication and the shift as given by the algorithm -// described by Agner Fog in "Optimizing Subroutines in Assembly Language" -// -// http://www.agner.org/optimize/optimizing_assembly.pdf -// -// "Integer division by a constant (all processors) -// A floating point number can be divided by a constant by multiplying -// with the reciprocal. If we want to do the same with integers, we have -// to scale the reciprocal by 2n and then shift the product to the right -// by n. There are various algorithms for finding a suitable value of n -// and compensating for rounding errors. The algorithm described below -// was invented by Terje Mathisen, Norway, and not published elsewhere." - -/// Division by constant is performed by: -/// 1. Adding 1 if needed; -/// 2. Multiplying by another constant; -/// 3. Shifting right by another constant. -template -struct Division -{ - static constexpr bool add{add_}; - static constexpr UInt multiplier{multiplier_}; - static constexpr unsigned shift{shift_}; -}; - -/// Select a type with appropriate number of bytes from the list of types. -/// First parameter is the number of bytes requested. Then goes a list of types with 1, 2, 4, ... number of bytes. -/// Example: SelectType<4, uint8_t, uint16_t, uint32_t, uint64_t> will select uint32_t. -template -struct SelectType -{ - using Result = typename SelectType::Result; -}; - -template -struct SelectType<1, T, Ts...> -{ - using Result = T; -}; - - -/// Division by 10^N where N is the size of the type. -template -using DivisionBy10PowN = typename SelectType< - N, - Division, /// divide by 10 - Division, /// divide by 100 - Division, /// divide by 10000 - Division /// divide by 100000000 - >::Result; - -template -using UnsignedOfSize = typename SelectType::Result; - -/// Holds the result of dividing an unsigned N-byte variable by 10^N resulting in -template -struct QuotientAndRemainder -{ - UnsignedOfSize quotient; // quotient with fewer than 2*N decimal digits - UnsignedOfSize remainder; // remainder with at most N decimal digits -}; - -template -QuotientAndRemainder inline split(UnsignedOfSize value) -{ - constexpr DivisionBy10PowN division; - - UnsignedOfSize quotient = (division.multiplier * (UnsignedOfSize<2 * N>(value) + division.add)) >> division.shift; - UnsignedOfSize remainder = static_cast>(value - quotient * pow10>(N)); - - return {quotient, remainder}; -} - -ALWAYS_INLINE inline char * outDigit(char * p, uint8_t value) +ALWAYS_INLINE inline char * outOneDigit(char * p, uint8_t value) { *p = '0' + value; - ++p; - return p; + return p + 1; } // Using a lookup table to convert binary numbers from 0 to 99 // into ascii characters as described by Andrei Alexandrescu in // https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/ - const char digits[201] = "00010203040506070809" "10111213141516171819" "20212223242526272829" @@ -137,7 +24,6 @@ const char digits[201] = "00010203040506070809" "70717273747576777879" "80818283848586878889" "90919293949596979899"; - ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value) { memcpy(p, &digits[value * 2], 2); @@ -145,153 +31,260 @@ ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value) return p; } -namespace convert +namespace jeaiii { -template -char * head(char * p, UInt u); -template -char * tail(char * p, UInt u); +/* + MIT License -//===----------------------------------------------------------===// -// head: find most significant digit, skip leading zeros -//===----------------------------------------------------------===// + Copyright (c) 2022 James Edward Anhalt III - https://github.com/jeaiii/itoa -// "x" contains quotient and remainder after division by 10^N -// quotient is less than 10^N -template -ALWAYS_INLINE inline char * head(char * p, QuotientAndRemainder x) + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ +struct pair { - p = head(p, UnsignedOfSize(x.quotient)); - p = tail(p, x.remainder); - return p; -} + char dd[2]; + constexpr pair(char c) : dd{c, '\0'} { } /// NOLINT(google-explicit-constructor) + constexpr pair(int n) : dd{"0123456789"[n / 10], "0123456789"[n % 10]} { } /// NOLINT(google-explicit-constructor) +}; -// "u" is less than 10^2*N -template -ALWAYS_INLINE inline char * head(char * p, UInt u) +constexpr struct { - return u < pow10>(N) ? head(p, UnsignedOfSize(u)) : head(p, split(u)); -} + pair dd[100]{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, // + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, // + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, // + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, // + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, // + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, // + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, // + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, // + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, // + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, // + }; + pair fd[100]{ + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', // + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, // + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, // + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, // + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, // + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, // + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, // + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, // + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, // + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, // + }; +} digits; -// recursion base case, selected when "u" is one byte -template <> -ALWAYS_INLINE inline char * head, 1>(char * p, UnsignedOfSize<1> u) +constexpr UInt64 mask24 = (UInt64(1) << 24) - 1; +constexpr UInt64 mask32 = (UInt64(1) << 32) - 1; +constexpr UInt64 mask57 = (UInt64(1) << 57) - 1; + +template +struct _cond { - return u < 10 ? outDigit(p, u) : outTwoDigits(p, u); -} - -//===----------------------------------------------------------===// -// tail: produce all digits including leading zeros -//===----------------------------------------------------------===// - -// recursive step, "u" is less than 10^2*N -template -ALWAYS_INLINE inline char * tail(char * p, UInt u) + using type = F; +}; +template +struct _cond { - QuotientAndRemainder x = split(u); - p = tail(p, UnsignedOfSize(x.quotient)); - p = tail(p, x.remainder); - return p; -} + using type = T; +}; +template +using cond = typename _cond::type; -// recursion base case, selected when "u" is one byte -template <> -ALWAYS_INLINE inline char * tail, 1>(char * p, UnsignedOfSize<1> u) +template +inline ALWAYS_INLINE char * to_text_from_integer(char * b, T i) { - return outTwoDigits(p, u); -} + constexpr auto q = sizeof(T); + using U = cond>>; -//===----------------------------------------------------------===// -// large values are >= 10^2*N -// where x contains quotient and remainder after division by 10^N -//===----------------------------------------------------------===// -template -ALWAYS_INLINE inline char * large(char * p, QuotientAndRemainder x) -{ - QuotientAndRemainder y = split(x.quotient); - p = head(p, UnsignedOfSize(y.quotient)); - p = tail(p, y.remainder); - p = tail(p, x.remainder); - return p; -} + // convert bool to int before test with unary + to silence warning if T happens to be bool + U const n = +i < 0 ? *b++ = '-', U(0) - U(i) : U(i); -//===----------------------------------------------------------===// -// handle values of "u" that might be >= 10^2*N -// where N is the size of "u" in bytes -//===----------------------------------------------------------===// -template -ALWAYS_INLINE inline char * uitoa(char * p, UInt u) -{ - if (u < pow10>(N)) - return head(p, UnsignedOfSize(u)); - QuotientAndRemainder x = split(u); + if (n < U(1e2)) + { + /// This is changed from the original jeaiii implementation + /// For small numbers the extra branch to call outOneDigit() is worth it as it saves some instructions + /// and a memory access (no need to read digits.fd[n]) + /// This is not true for pure random numbers, but that's not the common use case of a database + /// Original jeaii code + // *reinterpret_cast(b) = digits.fd[n]; + // return n < 10 ? b + 1 : b + 2; + return n < 10 ? outOneDigit(b, n) : outTwoDigits(b, n); + } + if (n < UInt32(1e6)) + { + if (sizeof(U) == 1 || n < U(1e4)) + { + auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * n; + *reinterpret_cast(b) = digits.fd[f0 >> 24]; + if constexpr (sizeof(U) == 1) + b -= 1; + else + b -= n < U(1e3); + auto f2 = (f0 & mask24) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; + return b + 4; + } + auto f0 = UInt64(10 * (1ull << 32ull) / 1e5 + 1) * n; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + if constexpr (sizeof(U) == 2) + b -= 1; + else + b -= n < U(1e5); + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + return b + 6; + } + if (sizeof(U) == 4 || n < UInt64(1ull << 32ull)) + { + if (n < U(1e8)) + { + auto f0 = UInt64(10 * (1ull << 48ull) / 1e7 + 1) * n >> 16; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + b -= n < U(1e7); + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + auto f6 = (f4 & mask32) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + return b + 8; + } + auto f0 = UInt64(10 * (1ull << 57ull) / 1e9 + 1) * n; + *reinterpret_cast(b) = digits.fd[f0 >> 57]; + b -= n < UInt32(1e9); + auto f2 = (f0 & mask57) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 57]; + auto f4 = (f2 & mask57) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 57]; + auto f6 = (f4 & mask57) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 57]; + auto f8 = (f6 & mask57) * 100; + *reinterpret_cast(b + 8) = digits.dd[f8 >> 57]; + return b + 10; + } - return u < pow10>(2 * N) ? head(p, x) : large(p, x); -} + // if we get here U must be UInt64 but some compilers don't know that, so reassign n to a UInt64 to avoid warnings + UInt32 z = n % UInt32(1e8); + UInt64 u = n / UInt32(1e8); -// selected when "u" is one byte -template <> -ALWAYS_INLINE inline char * uitoa, 1>(char * p, UnsignedOfSize<1> u) -{ - if (u < 10) - return outDigit(p, u); - else if (u < 100) - return outTwoDigits(p, u); + if (u < UInt32(1e2)) + { + // u can't be 1 digit (if u < 10 it would have been handled above as a 9 digit 32bit number) + *reinterpret_cast(b) = digits.dd[u]; + b += 2; + } + else if (u < UInt32(1e6)) + { + if (u < UInt32(1e4)) + { + auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 24]; + b -= u < UInt32(1e3); + auto f2 = (f0 & mask24) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; + b += 4; + } + else + { + auto f0 = UInt64(10 * (1ull << 32ull) / 1e5 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + b -= u < UInt32(1e5); + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + b += 6; + } + } + else if (u < UInt32(1e8)) + { + auto f0 = UInt64(10 * (1ull << 48ull) / 1e7 + 1) * u >> 16; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + b -= u < UInt32(1e7); + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + auto f6 = (f4 & mask32) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + b += 8; + } + else if (u < UInt64(1ull << 32ull)) + { + auto f0 = UInt64(10 * (1ull << 57ull) / 1e9 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 57]; + b -= u < UInt32(1e9); + auto f2 = (f0 & mask57) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 57]; + auto f4 = (f2 & mask57) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 57]; + auto f6 = (f4 & mask57) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 57]; + auto f8 = (f6 & mask57) * 100; + *reinterpret_cast(b + 8) = digits.dd[f8 >> 57]; + b += 10; + } else { - p = outDigit(p, u / 100); - p = outTwoDigits(p, u % 100); - return p; + UInt32 y = u % UInt32(1e8); + u /= UInt32(1e8); + + // u is 2, 3, or 4 digits (if u < 10 it would have been handled above) + if (u < UInt32(1e2)) + { + *reinterpret_cast(b) = digits.dd[u]; + b += 2; + } + else + { + auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 24]; + b -= u < UInt32(1e3); + auto f2 = (f0 & mask24) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; + b += 4; + } + // do 8 digits + auto f0 = (UInt64((1ull << 48ull) / 1e6 + 1) * y >> 16) + 1; + *reinterpret_cast(b) = digits.dd[f0 >> 32]; + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + auto f6 = (f4 & mask32) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + b += 8; } -} - -//===----------------------------------------------------------===// -// handle unsigned and signed integral operands -//===----------------------------------------------------------===// - -// itoa: handle unsigned integral operands (selected by SFINAE) -template -requires(!std::is_signed_v && std::is_integral_v) -ALWAYS_INLINE inline char * itoa(U u, char * p) -{ - return convert::uitoa(p, u); -} - -// itoa: handle signed integral operands (selected by SFINAE) -template -requires(std::is_signed_v && std::is_integral_v) -ALWAYS_INLINE inline char * itoa(I i, char * p) -{ - // Need "mask" to be filled with a copy of the sign bit. - // If "i" is a negative value, then the result of "operator >>" - // is implementation-defined, though usually it is an arithmetic - // right shift that replicates the sign bit. - // Use a conditional expression to be portable, - // a good optimizing compiler generates an arithmetic right shift - // and avoids the conditional branch. - UnsignedOfSize mask = i < 0 ? ~UnsignedOfSize(0) : 0; - // Now get the absolute value of "i" and cast to unsigned type UnsignedOfSize. - // Cannot use std::abs() because the result is undefined - // in 2's complement systems for the most-negative value. - // Want to avoid conditional branch for performance reasons since - // CPU branch prediction will be ineffective when negative values - // occur randomly. - // Let "u" be "i" cast to unsigned type UnsignedOfSize. - // Subtract "u" from 2*u if "i" is positive or 0 if "i" is negative. - // This yields the absolute value with the desired type without - // using a conditional branch and without invoking undefined or - // implementation defined behavior: - UnsignedOfSize u = ((2 * UnsignedOfSize(i)) & ~mask) - UnsignedOfSize(i); - // Unconditionally store a minus sign when producing digits - // in a forward direction and increment the pointer only if - // the value is in fact negative. - // This avoids a conditional branch and is safe because we will - // always produce at least one digit and it will overwrite the - // minus sign when the value is not negative. - *p = '-'; - p += (mask & 1); - p = convert::uitoa(p, u); - return p; + // do 8 digits + auto f0 = (UInt64((1ull << 48ull) / 1e6 + 1) * z >> 16) + 1; + *reinterpret_cast(b) = digits.dd[f0 >> 32]; + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + auto f6 = (f4 & mask32) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + return b + 8; } } @@ -303,7 +296,7 @@ ALWAYS_INLINE inline char * writeUIntText(UInt128 _x, char * p) { /// If we the highest 64bit item is empty, we can print just the lowest item as u64 if (_x.items[UInt128::_impl::little(1)] == 0) - return convert::itoa(_x.items[UInt128::_impl::little(0)], p); + return jeaiii::to_text_from_integer(p, _x.items[UInt128::_impl::little(0)]); /// Doing operations using __int128 is faster and we already rely on this feature using T = unsigned __int128; @@ -334,7 +327,7 @@ ALWAYS_INLINE inline char * writeUIntText(UInt128 _x, char * p) current_block += max_multiple_of_hundred_blocks; } - char * highest_part_print = convert::itoa(uint64_t(x), p); + char * highest_part_print = jeaiii::to_text_from_integer(p, uint64_t(x)); for (int i = 0; i < current_block; i++) { outTwoDigits(highest_part_print, two_values[current_block - 1 - i]); @@ -450,12 +443,12 @@ ALWAYS_INLINE inline char * writeSIntText(T x, char * pos) char * itoa(UInt8 i, char * p) { - return convert::itoa(uint8_t(i), p); + return jeaiii::to_text_from_integer(p, uint8_t(i)); } char * itoa(Int8 i, char * p) { - return convert::itoa(int8_t(i), p); + return jeaiii::to_text_from_integer(p, int8_t(i)); } char * itoa(UInt128 i, char * p) @@ -481,7 +474,7 @@ char * itoa(Int256 i, char * p) #define DEFAULT_ITOA(T) \ char * itoa(T i, char * p) \ { \ - return convert::itoa(i, p); \ + return jeaiii::to_text_from_integer(p, i); \ } #define FOR_MISSING_INTEGER_TYPES(M) \ diff --git a/base/poco/Foundation/include/Poco/Format.h b/base/poco/Foundation/include/Poco/Format.h index f84be16d3ad..4f91dd44ca5 100644 --- a/base/poco/Foundation/include/Poco/Format.h +++ b/base/poco/Foundation/include/Poco/Format.h @@ -232,7 +232,7 @@ void Foundation_API format( const Any & value10); -void Foundation_API format(std::string & result, const std::string & fmt, const std::vector & values); +void Foundation_API formatVector(std::string & result, const std::string & fmt, const std::vector & values); /// Supports a variable number of arguments and is used by /// all other variants of format(). diff --git a/base/poco/Foundation/include/Poco/Logger.h b/base/poco/Foundation/include/Poco/Logger.h index 2a1cb33b407..74ddceea9dd 100644 --- a/base/poco/Foundation/include/Poco/Logger.h +++ b/base/poco/Foundation/include/Poco/Logger.h @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include "Poco/Channel.h" diff --git a/base/poco/Foundation/include/Poco/Message.h b/base/poco/Foundation/include/Poco/Message.h index 9068e56a93c..756e427c5f5 100644 --- a/base/poco/Foundation/include/Poco/Message.h +++ b/base/poco/Foundation/include/Poco/Message.h @@ -19,6 +19,7 @@ #include +#include #include "Poco/Foundation.h" #include "Poco/Timestamp.h" diff --git a/base/poco/Foundation/include/Poco/RefCountedObject.h b/base/poco/Foundation/include/Poco/RefCountedObject.h index db966089e00..d0d964d8390 100644 --- a/base/poco/Foundation/include/Poco/RefCountedObject.h +++ b/base/poco/Foundation/include/Poco/RefCountedObject.h @@ -21,6 +21,8 @@ #include "Poco/AtomicCounter.h" #include "Poco/Foundation.h" +#include + namespace Poco { diff --git a/base/poco/Foundation/src/Format.cpp b/base/poco/Foundation/src/Format.cpp index 9872ddff042..94ab124510d 100644 --- a/base/poco/Foundation/src/Format.cpp +++ b/base/poco/Foundation/src/Format.cpp @@ -51,8 +51,8 @@ namespace } if (width != 0) str.width(width); } - - + + void parsePrec(std::ostream& str, std::string::const_iterator& itFmt, const std::string::const_iterator& endFmt) { if (itFmt != endFmt && *itFmt == '.') @@ -67,7 +67,7 @@ namespace if (prec >= 0) str.precision(prec); } } - + char parseMod(std::string::const_iterator& itFmt, const std::string::const_iterator& endFmt) { char mod = 0; @@ -77,13 +77,13 @@ namespace { case 'l': case 'h': - case 'L': + case 'L': case '?': mod = *itFmt++; break; } } return mod; } - + std::size_t parseIndex(std::string::const_iterator& itFmt, const std::string::const_iterator& endFmt) { int index = 0; @@ -110,8 +110,8 @@ namespace case 'f': str << std::fixed; break; } } - - + + void writeAnyInt(std::ostream& str, const Any& any) { if (any.type() == typeid(char)) @@ -201,7 +201,7 @@ namespace str << RefAnyCast(*itVal++); break; case 'z': - str << AnyCast(*itVal++); + str << AnyCast(*itVal++); break; case 'I': case 'D': @@ -303,7 +303,7 @@ void format(std::string& result, const std::string& fmt, const Any& value) { std::vector args; args.push_back(value); - format(result, fmt, args); + formatVector(result, fmt, args); } @@ -312,7 +312,7 @@ void format(std::string& result, const std::string& fmt, const Any& value1, cons std::vector args; args.push_back(value1); args.push_back(value2); - format(result, fmt, args); + formatVector(result, fmt, args); } @@ -322,7 +322,7 @@ void format(std::string& result, const std::string& fmt, const Any& value1, cons args.push_back(value1); args.push_back(value2); args.push_back(value3); - format(result, fmt, args); + formatVector(result, fmt, args); } @@ -333,7 +333,7 @@ void format(std::string& result, const std::string& fmt, const Any& value1, cons args.push_back(value2); args.push_back(value3); args.push_back(value4); - format(result, fmt, args); + formatVector(result, fmt, args); } @@ -345,7 +345,7 @@ void format(std::string& result, const std::string& fmt, const Any& value1, cons args.push_back(value3); args.push_back(value4); args.push_back(value5); - format(result, fmt, args); + formatVector(result, fmt, args); } @@ -358,7 +358,7 @@ void format(std::string& result, const std::string& fmt, const Any& value1, cons args.push_back(value4); args.push_back(value5); args.push_back(value6); - format(result, fmt, args); + formatVector(result, fmt, args); } @@ -372,7 +372,7 @@ void format(std::string& result, const std::string& fmt, const Any& value1, cons args.push_back(value5); args.push_back(value6); args.push_back(value7); - format(result, fmt, args); + formatVector(result, fmt, args); } @@ -387,7 +387,7 @@ void format(std::string& result, const std::string& fmt, const Any& value1, cons args.push_back(value6); args.push_back(value7); args.push_back(value8); - format(result, fmt, args); + formatVector(result, fmt, args); } @@ -403,7 +403,7 @@ void format(std::string& result, const std::string& fmt, const Any& value1, cons args.push_back(value7); args.push_back(value8); args.push_back(value9); - format(result, fmt, args); + formatVector(result, fmt, args); } @@ -420,16 +420,16 @@ void format(std::string& result, const std::string& fmt, const Any& value1, cons args.push_back(value8); args.push_back(value9); args.push_back(value10); - format(result, fmt, args); + formatVector(result, fmt, args); } -void format(std::string& result, const std::string& fmt, const std::vector& values) +void formatVector(std::string& result, const std::string& fmt, const std::vector& values) { std::string::const_iterator itFmt = fmt.begin(); std::string::const_iterator endFmt = fmt.end(); std::vector::const_iterator itVal = values.begin(); - std::vector::const_iterator endVal = values.end(); + std::vector::const_iterator endVal = values.end(); while (itFmt != endFmt) { switch (*itFmt) diff --git a/base/poco/MongoDB/src/ObjectId.cpp b/base/poco/MongoDB/src/ObjectId.cpp index 0125c246c2d..e360d129843 100644 --- a/base/poco/MongoDB/src/ObjectId.cpp +++ b/base/poco/MongoDB/src/ObjectId.cpp @@ -57,7 +57,7 @@ std::string ObjectId::toString(const std::string& fmt) const for (int i = 0; i < 12; ++i) { - s += format(fmt, (unsigned int) _id[i]); + s += Poco::format(fmt, (unsigned int) _id[i]); } return s; } diff --git a/base/poco/MongoDB/src/OpMsgCursor.cpp b/base/poco/MongoDB/src/OpMsgCursor.cpp index bc95851ae33..6abd45ecf76 100644 --- a/base/poco/MongoDB/src/OpMsgCursor.cpp +++ b/base/poco/MongoDB/src/OpMsgCursor.cpp @@ -43,9 +43,9 @@ namespace Poco { namespace MongoDB { -static const std::string keyCursor {"cursor"}; -static const std::string keyFirstBatch {"firstBatch"}; -static const std::string keyNextBatch {"nextBatch"}; +[[ maybe_unused ]] static const std::string keyCursor {"cursor"}; +[[ maybe_unused ]] static const std::string keyFirstBatch {"firstBatch"}; +[[ maybe_unused ]] static const std::string keyNextBatch {"nextBatch"}; static Poco::Int64 cursorIdFromResponse(const MongoDB::Document& doc); @@ -131,7 +131,7 @@ OpMsgMessage& OpMsgCursor::next(Connection& connection) connection.readResponse(_response); } else -#endif +#endif { _response.clear(); _query.setCursor(_cursorID, _batchSize); diff --git a/base/poco/Net/include/Poco/Net/NameValueCollection.h b/base/poco/Net/include/Poco/Net/NameValueCollection.h index be499838d0e..2337535bd11 100644 --- a/base/poco/Net/include/Poco/Net/NameValueCollection.h +++ b/base/poco/Net/include/Poco/Net/NameValueCollection.h @@ -79,7 +79,7 @@ namespace Net /// Returns the value of the first name-value pair with the given name. /// If no value with the given name has been found, the defaultValue is returned. - const std::vector> getAll(const std::string & name) const; + std::vector getAll(const std::string & name) const; /// Returns all values of all name-value pairs with the given name. /// /// Returns an empty vector if there are no name-value pairs with the given name. diff --git a/base/poco/Net/src/HTTPMessage.cpp b/base/poco/Net/src/HTTPMessage.cpp index c0083ec410c..b7ab5543a85 100644 --- a/base/poco/Net/src/HTTPMessage.cpp +++ b/base/poco/Net/src/HTTPMessage.cpp @@ -17,9 +17,9 @@ #include "Poco/NumberFormatter.h" #include "Poco/NumberParser.h" #include "Poco/String.h" +#include #include - using Poco::NumberFormatter; using Poco::NumberParser; using Poco::icompare; @@ -75,7 +75,7 @@ void HTTPMessage::setContentLength(std::streamsize length) erase(CONTENT_LENGTH); } - + std::streamsize HTTPMessage::getContentLength() const { const std::string& contentLength = get(CONTENT_LENGTH, EMPTY); @@ -98,7 +98,7 @@ void HTTPMessage::setContentLength64(Poco::Int64 length) erase(CONTENT_LENGTH); } - + Poco::Int64 HTTPMessage::getContentLength64() const { const std::string& contentLength = get(CONTENT_LENGTH, EMPTY); @@ -133,13 +133,13 @@ void HTTPMessage::setChunkedTransferEncoding(bool flag) setTransferEncoding(IDENTITY_TRANSFER_ENCODING); } - + bool HTTPMessage::getChunkedTransferEncoding() const { return icompare(getTransferEncoding(), CHUNKED_TRANSFER_ENCODING) == 0; } - + void HTTPMessage::setContentType(const std::string& mediaType) { if (mediaType.empty()) @@ -154,7 +154,7 @@ void HTTPMessage::setContentType(const MediaType& mediaType) setContentType(mediaType.toString()); } - + const std::string& HTTPMessage::getContentType() const { return get(CONTENT_TYPE, UNKNOWN_CONTENT_TYPE); diff --git a/base/poco/Net/src/NameValueCollection.cpp b/base/poco/Net/src/NameValueCollection.cpp index 783ed48cc30..e35d66d3bde 100644 --- a/base/poco/Net/src/NameValueCollection.cpp +++ b/base/poco/Net/src/NameValueCollection.cpp @@ -102,9 +102,9 @@ const std::string& NameValueCollection::get(const std::string& name, const std:: return defaultValue; } -const std::vector> NameValueCollection::getAll(const std::string& name) const +std::vector NameValueCollection::getAll(const std::string& name) const { - std::vector> values; + std::vector values; for (ConstIterator it = _map.find(name); it != _map.end(); it++) if (it->first == name) values.push_back(it->second); diff --git a/base/poco/NetSSL_OpenSSL/include/Poco/Net/SecureSocketImpl.h b/base/poco/NetSSL_OpenSSL/include/Poco/Net/SecureSocketImpl.h index 49c12b6b45f..890752c52da 100644 --- a/base/poco/NetSSL_OpenSSL/include/Poco/Net/SecureSocketImpl.h +++ b/base/poco/NetSSL_OpenSSL/include/Poco/Net/SecureSocketImpl.h @@ -235,8 +235,6 @@ namespace Net /// Note that simply closing a socket is not sufficient /// to be able to re-use it again. - Poco::Timespan getMaxTimeout(); - private: SecureSocketImpl(const SecureSocketImpl &); SecureSocketImpl & operator=(const SecureSocketImpl &); @@ -250,6 +248,9 @@ namespace Net Session::Ptr _pSession; friend class SecureStreamSocketImpl; + + Poco::Timespan getMaxTimeoutOrLimit(); + //// Return max(send, receive) if non zero, otherwise maximum timeout }; diff --git a/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp b/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp index efe25f65909..4873d259ae5 100644 --- a/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp +++ b/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp @@ -199,7 +199,7 @@ void SecureSocketImpl::connectSSL(bool performHandshake) if (performHandshake && _pSocket->getBlocking()) { int ret; - Poco::Timespan remaining_time = getMaxTimeout(); + Poco::Timespan remaining_time = getMaxTimeoutOrLimit(); do { RemainingTimeCounter counter(remaining_time); @@ -302,7 +302,7 @@ int SecureSocketImpl::sendBytes(const void* buffer, int length, int flags) return rc; } - Poco::Timespan remaining_time = getMaxTimeout(); + Poco::Timespan remaining_time = getMaxTimeoutOrLimit(); do { RemainingTimeCounter counter(remaining_time); @@ -338,7 +338,7 @@ int SecureSocketImpl::receiveBytes(void* buffer, int length, int flags) return rc; } - Poco::Timespan remaining_time = getMaxTimeout(); + Poco::Timespan remaining_time = getMaxTimeoutOrLimit(); do { /// SSL record may consist of several TCP packets, @@ -372,7 +372,7 @@ int SecureSocketImpl::completeHandshake() poco_check_ptr (_pSSL); int rc; - Poco::Timespan remaining_time = getMaxTimeout(); + Poco::Timespan remaining_time = getMaxTimeoutOrLimit(); do { RemainingTimeCounter counter(remaining_time); @@ -453,18 +453,29 @@ X509* SecureSocketImpl::peerCertificate() const return 0; } -Poco::Timespan SecureSocketImpl::getMaxTimeout() +Poco::Timespan SecureSocketImpl::getMaxTimeoutOrLimit() { std::lock_guard lock(_mutex); Poco::Timespan remaining_time = _pSocket->getReceiveTimeout(); Poco::Timespan send_timeout = _pSocket->getSendTimeout(); if (remaining_time < send_timeout) remaining_time = send_timeout; + /// zero SO_SNDTIMEO/SO_RCVTIMEO works as no timeout, let's replicate this + /// + /// NOTE: we cannot use INT64_MAX (std::numeric_limits::max()), + /// since it will be later passed to poll() which accept int timeout, and + /// even though poll() accepts milliseconds and Timespan() accepts + /// microseconds, let's use smaller maximum value just to avoid some possible + /// issues, this should be enough anyway (it is ~24 days). + if (remaining_time == 0) + remaining_time = Poco::Timespan(std::numeric_limits::max()); return remaining_time; } bool SecureSocketImpl::mustRetry(int rc, Poco::Timespan& remaining_time) { + if (remaining_time == 0) + return false; std::lock_guard lock(_mutex); if (rc <= 0) { @@ -475,9 +486,7 @@ bool SecureSocketImpl::mustRetry(int rc, Poco::Timespan& remaining_time) case SSL_ERROR_WANT_READ: if (_pSocket->getBlocking()) { - /// Level-triggered mode of epoll_wait is used, so if SSL_read don't read all available data from socket, - /// epoll_wait returns true without waiting for new data even if remaining_time == 0 - if (_pSocket->pollImpl(remaining_time, Poco::Net::Socket::SELECT_READ) && remaining_time != 0) + if (_pSocket->pollImpl(remaining_time, Poco::Net::Socket::SELECT_READ)) return true; else throw Poco::TimeoutException(); @@ -486,13 +495,15 @@ bool SecureSocketImpl::mustRetry(int rc, Poco::Timespan& remaining_time) case SSL_ERROR_WANT_WRITE: if (_pSocket->getBlocking()) { - /// The same as for SSL_ERROR_WANT_READ - if (_pSocket->pollImpl(remaining_time, Poco::Net::Socket::SELECT_WRITE) && remaining_time != 0) + if (_pSocket->pollImpl(remaining_time, Poco::Net::Socket::SELECT_WRITE)) return true; else throw Poco::TimeoutException(); } break; + /// NOTE: POCO_EINTR is the same as SSL_ERROR_WANT_READ (at least in + /// OpenSSL), so this likely dead code, but let's leave it for + /// compatibility with other implementations case SSL_ERROR_SYSCALL: return socketError == POCO_EAGAIN || socketError == POCO_EINTR; default: diff --git a/cmake/limit_jobs.cmake b/cmake/limit_jobs.cmake index 8e48fc9b9d8..17d8dd42a2c 100644 --- a/cmake/limit_jobs.cmake +++ b/cmake/limit_jobs.cmake @@ -42,9 +42,19 @@ endif () # But use 2 parallel jobs, since: # - this is what llvm does # - and I've verfied that lld-11 does not use all available CPU time (in peak) while linking one binary -if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO AND PARALLEL_LINK_JOBS GREATER 2) - message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.") - set (PARALLEL_LINK_JOBS 2) +if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO) + if (ARCH_AARCH64) + # aarch64 builds start to often fail with OOMs (reason not yet clear), for now let's limit the concurrency + message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 1.") + set (PARALLEL_LINK_JOBS 1) + if (LINKER_NAME MATCHES "lld") + math(EXPR LTO_JOBS ${NUMBER_OF_LOGICAL_CORES}/4) + set (CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} -Wl,--thinlto-jobs=${LTO_JOBS}") + endif() + elseif (PARALLEL_LINK_JOBS GREATER 2) + message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.") + set (PARALLEL_LINK_JOBS 2) + endif () endif() message(STATUS "Building sub-tree with ${PARALLEL_COMPILE_JOBS} compile jobs and ${PARALLEL_LINK_JOBS} linker jobs (system: ${NUMBER_OF_LOGICAL_CORES} cores, ${TOTAL_PHYSICAL_MEMORY} MB RAM, 'OFF' means the native core count).") diff --git a/cmake/target.cmake b/cmake/target.cmake index d6c497955f6..3d0ecd032f9 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -84,5 +84,5 @@ if (CMAKE_CROSSCOMPILING) message (FATAL_ERROR "Trying to cross-compile to unsupported system: ${CMAKE_SYSTEM_NAME}!") endif () - message (STATUS "Cross-compiling for target: ${CMAKE_CXX_COMPILE_TARGET}") + message (STATUS "Cross-compiling for target: ${CMAKE_CXX_COMPILER_TARGET}") endif () diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 08f58335d16..90ae5981a21 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -228,6 +228,8 @@ add_contrib (ulid-c-cmake ulid-c) add_contrib (libssh-cmake libssh) +add_contrib (prometheus-protobufs-cmake prometheus-protobufs prometheus-protobufs-gogo) + # Put all targets defined here and in subdirectories under "contrib/" folders in GUI-based IDEs. # Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear # in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually, diff --git a/contrib/avro b/contrib/avro index d43acc84d3d..545e7002683 160000 --- a/contrib/avro +++ b/contrib/avro @@ -1 +1 @@ -Subproject commit d43acc84d3d455b016f847d6666fbc3cd27f16a9 +Subproject commit 545e7002683cbc2198164d93088ac8e4955b4628 diff --git a/contrib/aws-cmake/CMakeLists.txt b/contrib/aws-cmake/CMakeLists.txt index abde20addaf..250b47b7c2c 100644 --- a/contrib/aws-cmake/CMakeLists.txt +++ b/contrib/aws-cmake/CMakeLists.txt @@ -125,7 +125,7 @@ configure_file("${AWS_SDK_CORE_DIR}/include/aws/core/SDKConfig.h.in" "${CMAKE_CURRENT_BINARY_DIR}/include/aws/core/SDKConfig.h" @ONLY) aws_get_version(AWS_CRT_CPP_VERSION_MAJOR AWS_CRT_CPP_VERSION_MINOR AWS_CRT_CPP_VERSION_PATCH FULL_VERSION GIT_HASH) -configure_file("${AWS_CRT_DIR}/include/aws/crt/Config.h.in" "${AWS_CRT_DIR}/include/aws/crt/Config.h" @ONLY) +configure_file("${AWS_CRT_DIR}/include/aws/crt/Config.h.in" "${CMAKE_CURRENT_BINARY_DIR}/include/aws/crt/Config.h" @ONLY) list(APPEND AWS_SOURCES ${AWS_SDK_CORE_SRC} ${AWS_SDK_CORE_NET_SRC} ${AWS_SDK_CORE_PLATFORM_SRC}) diff --git a/contrib/azure b/contrib/azure index 6262a76ef4c..ea3e19a7be0 160000 --- a/contrib/azure +++ b/contrib/azure @@ -1 +1 @@ -Subproject commit 6262a76ef4c4c330c84e58dd4f6f13f4e6230fcd +Subproject commit ea3e19a7be08519134c643177d56c7484dfec884 diff --git a/contrib/google-protobuf-cmake/protobuf_generate.cmake b/contrib/google-protobuf-cmake/protobuf_generate.cmake index 3e30b4e40fd..0731a81aeb8 100644 --- a/contrib/google-protobuf-cmake/protobuf_generate.cmake +++ b/contrib/google-protobuf-cmake/protobuf_generate.cmake @@ -157,15 +157,13 @@ function(protobuf_generate) set(_generated_srcs_all) foreach(_proto ${protobuf_generate_PROTOS}) - get_filename_component(_abs_file ${_proto} ABSOLUTE) - get_filename_component(_abs_dir ${_abs_file} DIRECTORY) - get_filename_component(_basename ${_proto} NAME_WE) - file(RELATIVE_PATH _rel_dir ${CMAKE_CURRENT_SOURCE_DIR} ${_abs_dir}) - - set(_possible_rel_dir) - if (NOT protobuf_generate_APPEND_PATH) - set(_possible_rel_dir ${_rel_dir}/) - endif() + # The protobuf compiler doesn't return paths to the files it generates so we have to calculate those paths here: + # _abs_file - absolute path to a .proto file, + # _possible_rel_dir - relative path to the .proto file from some import directory specified in Protobuf_IMPORT_DIRS, + # _basename - filename of the .proto file (without path and without extenstion). + get_proto_absolute_path(_abs_file "${_proto}" ${_protobuf_include_path}) + get_proto_relative_path(_possible_rel_dir "${_abs_file}" ${_protobuf_include_path}) + get_filename_component(_basename "${_abs_file}" NAME_WE) set(_generated_srcs) foreach(_ext ${protobuf_generate_GENERATE_EXTENSIONS}) @@ -173,7 +171,7 @@ function(protobuf_generate) endforeach() if(protobuf_generate_DESCRIPTORS AND protobuf_generate_LANGUAGE STREQUAL cpp) - set(_descriptor_file "${CMAKE_CURRENT_BINARY_DIR}/${_basename}.desc") + set(_descriptor_file "${protobuf_generate_PROTOC_OUT_DIR}/${_possible_rel_dir}${_basename}.desc") set(_dll_desc_out "--descriptor_set_out=${_descriptor_file}") list(APPEND _generated_srcs ${_descriptor_file}) endif() @@ -196,3 +194,36 @@ function(protobuf_generate) target_sources(${protobuf_generate_TARGET} PRIVATE ${_generated_srcs_all}) endif() endfunction() + +# Calculates the absolute path to a .proto file. +function(get_proto_absolute_path result proto) + cmake_path(IS_ABSOLUTE proto _is_abs_path) + if(_is_abs_path) + set(${result} "${proto}" PARENT_SCOPE) + return() + endif() + foreach(_include_dir ${ARGN}) + if(EXISTS "${_include_dir}/${proto}") + set(${result} "${_include_dir}/${proto}" PARENT_SCOPE) + return() + endif() + endforeach() + message(SEND_ERROR "Not found protobuf ${proto} in Protobuf_IMPORT_DIRS: ${ARGN}") +endfunction() + +# Calculates a relative path to a .proto file. The returned path is relative to one of include directories. +function(get_proto_relative_path result abs_path) + set(${result} "" PARENT_SCOPE) + get_filename_component(_abs_dir "${abs_path}" DIRECTORY) + foreach(_include_dir ${ARGN}) + cmake_path(IS_PREFIX _include_dir "${_abs_dir}" _is_prefix) + if(_is_prefix) + file(RELATIVE_PATH _rel_dir "${_include_dir}" "${_abs_dir}") + if(NOT _rel_dir STREQUAL "") + set(${result} "${_rel_dir}/" PARENT_SCOPE) + endif() + return() + endif() + endforeach() + message(WARNING "Not found protobuf ${abs_path} in Protobuf_IMPORT_DIRS: ${ARGN}") +endfunction() diff --git a/contrib/grpc b/contrib/grpc index 77b2737a709..f5b7fdc2dff 160000 --- a/contrib/grpc +++ b/contrib/grpc @@ -1 +1 @@ -Subproject commit 77b2737a709d43d8c6895e3f03ca62b00bd9201c +Subproject commit f5b7fdc2dff09ada06dbf6c75df298fb40f898df diff --git a/contrib/icu-cmake/CMakeLists.txt b/contrib/icu-cmake/CMakeLists.txt index a54bd8c1de2..0a650f2bcc0 100644 --- a/contrib/icu-cmake/CMakeLists.txt +++ b/contrib/icu-cmake/CMakeLists.txt @@ -5,7 +5,7 @@ else () endif () if (NOT ENABLE_ICU) - message(STATUS "Not using icu") + message(STATUS "Not using ICU") return() endif() diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 023fdcf103a..191886981c3 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -175,12 +175,19 @@ endif () target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_PROF=1) -# jemalloc provides support for two different libunwind flavors: the original HP libunwind and the one coming with gcc / g++ / libstdc++. -# The latter is identified by `JEMALLOC_PROF_LIBGCC` and uses `_Unwind_Backtrace` method instead of `unw_backtrace`. -# At the time ClickHouse uses LLVM libunwind which follows libgcc's way of backtracking. +# jemalloc provides support two unwind flavors: +# - JEMALLOC_PROF_LIBUNWIND - unw_backtrace() - gnu libunwind (compatible with llvm libunwind) +# - JEMALLOC_PROF_LIBGCC - _Unwind_Backtrace() - the original HP libunwind and the one coming with gcc / g++ / libstdc++. # -# ClickHouse has to provide `unw_backtrace` method by the means of [commit 8e2b31e](https://github.com/ClickHouse/libunwind/commit/8e2b31e766dd502f6df74909e04a7dbdf5182eb1). -target_compile_definitions (_jemalloc PRIVATE -DJEMALLOC_PROF_LIBGCC=1) +# But for JEMALLOC_PROF_LIBGCC it also calls _Unwind_Backtrace() during +# bootstraping of jemalloc, which may lead to deadlock, if the dlsym will do +# allocations somewhere (like glibc does prio 2.34, see [1]). +# +# [1]: https://sourceware.org/git/?p=glibc.git;a=commit;h=fada9018199c21c469ff0e731ef75c6020074ac9 +# +# And since ClickHouse unwind already supports unw_backtrace() we can safely +# switch to it to avoid this deadlock. +target_compile_definitions (_jemalloc PRIVATE -DJEMALLOC_PROF_LIBUNWIND=1) target_link_libraries (_jemalloc PRIVATE unwind) # for RTLD_NEXT diff --git a/contrib/jemalloc-cmake/README b/contrib/jemalloc-cmake/README index 91b58448c1f..89d7c818445 100644 --- a/contrib/jemalloc-cmake/README +++ b/contrib/jemalloc-cmake/README @@ -4,3 +4,14 @@ It allows to integrate JEMalloc into CMake project. - Added JEMALLOC_CONFIG_MALLOC_CONF substitution - Add musl support (USE_MUSL) - Also note, that darwin build requires JEMALLOC_PREFIX, while others do not +- JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE should be disabled + + CLOCK_MONOTONIC_COARSE can go backwards after clock_adjtime(ADJ_FREQUENCY) + Let's disable it for now, and this menas that CLOCK_MONOTONIC will be used, + and this, should not be a problem, since: + - jemalloc do not call clock_gettime() that frequently + - the difference is CLOCK_MONOTONIC 20ns and CLOCK_MONOTONIC_COARSE 4ns + + This can be done with the following command: + + gg JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE | cut -d: -f1 | xargs sed -i 's@#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE@/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */@' diff --git a/contrib/jemalloc-cmake/include_freebsd_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_freebsd_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in index e027ee97d1a..b58d5c1eb8e 100644 --- a/contrib/jemalloc-cmake/include_freebsd_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/contrib/jemalloc-cmake/include_freebsd_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in @@ -96,7 +96,7 @@ /* * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. */ -#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */ /* * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in index 597f2d59933..8bfc4b60f3d 100644 --- a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in @@ -98,7 +98,7 @@ /* * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. */ -#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */ /* * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. diff --git a/contrib/jemalloc-cmake/include_linux_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in index 0a72e71496b..d5197c6127f 100644 --- a/contrib/jemalloc-cmake/include_linux_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/contrib/jemalloc-cmake/include_linux_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in @@ -98,7 +98,7 @@ /* * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. */ -#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */ /* * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. diff --git a/contrib/jemalloc-cmake/include_linux_riscv64/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_riscv64/jemalloc/internal/jemalloc_internal_defs.h.in index 597f2d59933..8bfc4b60f3d 100644 --- a/contrib/jemalloc-cmake/include_linux_riscv64/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/contrib/jemalloc-cmake/include_linux_riscv64/jemalloc/internal/jemalloc_internal_defs.h.in @@ -98,7 +98,7 @@ /* * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. */ -#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */ /* * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. diff --git a/contrib/jemalloc-cmake/include_linux_s390x/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_s390x/jemalloc/internal/jemalloc_internal_defs.h.in index 531f2bca0c2..b3f3a489a83 100644 --- a/contrib/jemalloc-cmake/include_linux_s390x/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/contrib/jemalloc-cmake/include_linux_s390x/jemalloc/internal/jemalloc_internal_defs.h.in @@ -96,7 +96,7 @@ /* * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. */ -#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */ /* * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in index d21098a4dcc..cf9acdb0b80 100644 --- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in @@ -98,7 +98,7 @@ /* * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. */ -#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */ /* * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. diff --git a/contrib/jemalloc-cmake/include_linux_x86_64_musl/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_x86_64_musl/jemalloc/internal/jemalloc_internal_defs.h.in index e08a2bed2ec..729bd82c2fa 100644 --- a/contrib/jemalloc-cmake/include_linux_x86_64_musl/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/contrib/jemalloc-cmake/include_linux_x86_64_musl/jemalloc/internal/jemalloc_internal_defs.h.in @@ -99,7 +99,7 @@ /* * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. */ -#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */ /* * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. diff --git a/contrib/libpq-cmake/CMakeLists.txt b/contrib/libpq-cmake/CMakeLists.txt index 6a0012c01bf..246e19593f6 100644 --- a/contrib/libpq-cmake/CMakeLists.txt +++ b/contrib/libpq-cmake/CMakeLists.txt @@ -54,7 +54,6 @@ set(SRCS "${LIBPQ_SOURCE_DIR}/port/pgstrcasecmp.c" "${LIBPQ_SOURCE_DIR}/port/thread.c" "${LIBPQ_SOURCE_DIR}/port/path.c" - "${LIBPQ_SOURCE_DIR}/port/explicit_bzero.c" ) add_library(_libpq ${SRCS}) diff --git a/contrib/llvm-project b/contrib/llvm-project index d2142eed980..2a8967b60cb 160000 --- a/contrib/llvm-project +++ b/contrib/llvm-project @@ -1 +1 @@ -Subproject commit d2142eed98046a47ff7112e3cc1e197c8a5cd80f +Subproject commit 2a8967b60cbe5bc2df253712bac343cc5263c5fc diff --git a/contrib/openssl b/contrib/openssl index 5d81fa7068f..66deddc1e53 160000 --- a/contrib/openssl +++ b/contrib/openssl @@ -1 +1 @@ -Subproject commit 5d81fa7068fc8c07f4d0997d5b703f3c541a637c +Subproject commit 66deddc1e53cda8706604a019777259372d1bd62 diff --git a/contrib/orc b/contrib/orc index 947cebaf943..bcc025c0982 160000 --- a/contrib/orc +++ b/contrib/orc @@ -1 +1 @@ -Subproject commit 947cebaf9432d708253ac08dc3012daa6b4ede6f +Subproject commit bcc025c09828c556f54cfbdf83a66b9acae7d17f diff --git a/contrib/pocketfft b/contrib/pocketfft index 9efd4da52cf..f4c1aa8aa9c 160000 --- a/contrib/pocketfft +++ b/contrib/pocketfft @@ -1 +1 @@ -Subproject commit 9efd4da52cf8d28d14531d14e43ad9d913807546 +Subproject commit f4c1aa8aa9ce79ad39e80f2c9c41b92ead90fda3 diff --git a/contrib/prometheus-protobufs-cmake/CMakeLists.txt b/contrib/prometheus-protobufs-cmake/CMakeLists.txt new file mode 100644 index 00000000000..8c939902be7 --- /dev/null +++ b/contrib/prometheus-protobufs-cmake/CMakeLists.txt @@ -0,0 +1,34 @@ +option(ENABLE_PROMETHEUS_PROTOBUFS "Enable Prometheus Protobufs" ${ENABLE_PROTOBUF}) + +if(NOT ENABLE_PROMETHEUS_PROTOBUFS) + message(STATUS "Not using prometheus-protobufs") + return() +endif() + +set(Protobuf_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/google-protobuf/src") +set(Prometheus_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/prometheus-protobufs") +set(GogoProto_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/prometheus-protobufs-gogo") + +# Protobuf_IMPORT_DIRS specify where the protobuf compiler will look for .proto files. +set(Old_Protobuf_IMPORT_DIRS ${Protobuf_IMPORT_DIRS}) +list(APPEND Protobuf_IMPORT_DIRS "${Protobuf_INCLUDE_DIR}" "${Prometheus_INCLUDE_DIR}" "${GogoProto_INCLUDE_DIR}") + +PROTOBUF_GENERATE_CPP(prometheus_protobufs_sources prometheus_protobufs_headers + "prompb/remote.proto" + "prompb/types.proto" + "gogoproto/gogo.proto" +) + +set(Protobuf_IMPORT_DIRS ${Old_Protobuf_IMPORT_DIRS}) + +# Ignore warnings while compiling protobuf-generated *.pb.h and *.pb.cpp files. +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w") + +# Disable clang-tidy for protobuf-generated *.pb.h and *.pb.cpp files. +set (CMAKE_CXX_CLANG_TIDY "") + +add_library(_prometheus_protobufs ${prometheus_protobufs_sources} ${prometheus_protobufs_headers}) +target_include_directories(_prometheus_protobufs SYSTEM PUBLIC "${CMAKE_CURRENT_BINARY_DIR}") +target_link_libraries (_prometheus_protobufs PUBLIC ch_contrib::protobuf) + +add_library (ch_contrib::prometheus_protobufs ALIAS _prometheus_protobufs) diff --git a/contrib/prometheus-protobufs-gogo/LICENSE b/contrib/prometheus-protobufs-gogo/LICENSE new file mode 100644 index 00000000000..16be18e5c50 --- /dev/null +++ b/contrib/prometheus-protobufs-gogo/LICENSE @@ -0,0 +1,35 @@ +Copyright (c) 2022, The Cosmos SDK Authors. All rights reserved. +Copyright (c) 2013, The GoGo Authors. All rights reserved. + +Protocol Buffers for Go with Gadgets + +Go support for Protocol Buffers - Google's data interchange format + +Copyright 2010 The Go Authors. All rights reserved. +https://github.com/golang/protobuf + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/contrib/prometheus-protobufs-gogo/README b/contrib/prometheus-protobufs-gogo/README new file mode 100644 index 00000000000..c40bc42df66 --- /dev/null +++ b/contrib/prometheus-protobufs-gogo/README @@ -0,0 +1,4 @@ +File "gogoproto/gogo.proto" was downloaded from the "Protocol Buffers for Go with Gadgets" project: +https://github.com/cosmos/gogoproto/blob/main/gogoproto/gogo.proto + +File "gogoproto/gogo.proto" is used in ClickHouse to compile prometheus protobufs. diff --git a/contrib/prometheus-protobufs-gogo/gogoproto/gogo.proto b/contrib/prometheus-protobufs-gogo/gogoproto/gogo.proto new file mode 100644 index 00000000000..974b36a7ccd --- /dev/null +++ b/contrib/prometheus-protobufs-gogo/gogoproto/gogo.proto @@ -0,0 +1,145 @@ +// Protocol Buffers for Go with Gadgets +// +// Copyright (c) 2013, The GoGo Authors. All rights reserved. +// http://github.com/cosmos/gogoproto +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +syntax = "proto2"; +package gogoproto; + +import "google/protobuf/descriptor.proto"; + +option java_package = "com.google.protobuf"; +option java_outer_classname = "GoGoProtos"; +option go_package = "github.com/cosmos/gogoproto/gogoproto"; + +extend google.protobuf.EnumOptions { + optional bool goproto_enum_prefix = 62001; + optional bool goproto_enum_stringer = 62021; + optional bool enum_stringer = 62022; + optional string enum_customname = 62023; + optional bool enumdecl = 62024; +} + +extend google.protobuf.EnumValueOptions { + optional string enumvalue_customname = 66001; +} + +extend google.protobuf.FileOptions { + optional bool goproto_getters_all = 63001; + optional bool goproto_enum_prefix_all = 63002; + optional bool goproto_stringer_all = 63003; + optional bool verbose_equal_all = 63004; + optional bool face_all = 63005; + optional bool gostring_all = 63006; + optional bool populate_all = 63007; + optional bool stringer_all = 63008; + optional bool onlyone_all = 63009; + + optional bool equal_all = 63013; + optional bool description_all = 63014; + optional bool testgen_all = 63015; + optional bool benchgen_all = 63016; + optional bool marshaler_all = 63017; + optional bool unmarshaler_all = 63018; + optional bool stable_marshaler_all = 63019; + + optional bool sizer_all = 63020; + + optional bool goproto_enum_stringer_all = 63021; + optional bool enum_stringer_all = 63022; + + optional bool unsafe_marshaler_all = 63023; + optional bool unsafe_unmarshaler_all = 63024; + + optional bool goproto_extensions_map_all = 63025; + optional bool goproto_unrecognized_all = 63026; + optional bool gogoproto_import = 63027; + optional bool protosizer_all = 63028; + optional bool compare_all = 63029; + optional bool typedecl_all = 63030; + optional bool enumdecl_all = 63031; + + optional bool goproto_registration = 63032; + optional bool messagename_all = 63033; + + optional bool goproto_sizecache_all = 63034; + optional bool goproto_unkeyed_all = 63035; +} + +extend google.protobuf.MessageOptions { + optional bool goproto_getters = 64001; + optional bool goproto_stringer = 64003; + optional bool verbose_equal = 64004; + optional bool face = 64005; + optional bool gostring = 64006; + optional bool populate = 64007; + optional bool stringer = 67008; + optional bool onlyone = 64009; + + optional bool equal = 64013; + optional bool description = 64014; + optional bool testgen = 64015; + optional bool benchgen = 64016; + optional bool marshaler = 64017; + optional bool unmarshaler = 64018; + optional bool stable_marshaler = 64019; + + optional bool sizer = 64020; + + optional bool unsafe_marshaler = 64023; + optional bool unsafe_unmarshaler = 64024; + + optional bool goproto_extensions_map = 64025; + optional bool goproto_unrecognized = 64026; + + optional bool protosizer = 64028; + optional bool compare = 64029; + + optional bool typedecl = 64030; + + optional bool messagename = 64033; + + optional bool goproto_sizecache = 64034; + optional bool goproto_unkeyed = 64035; +} + +extend google.protobuf.FieldOptions { + optional bool nullable = 65001; + optional bool embed = 65002; + optional string customtype = 65003; + optional string customname = 65004; + optional string jsontag = 65005; + optional string moretags = 65006; + optional string casttype = 65007; + optional string castkey = 65008; + optional string castvalue = 65009; + + optional bool stdtime = 65010; + optional bool stdduration = 65011; + optional bool wktpointer = 65012; + + optional string castrepeated = 65013; +} diff --git a/contrib/prometheus-protobufs/LICENSE b/contrib/prometheus-protobufs/LICENSE new file mode 100644 index 00000000000..261eeb9e9f8 --- /dev/null +++ b/contrib/prometheus-protobufs/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/contrib/prometheus-protobufs/README b/contrib/prometheus-protobufs/README new file mode 100644 index 00000000000..c557e59bb93 --- /dev/null +++ b/contrib/prometheus-protobufs/README @@ -0,0 +1,2 @@ +Files "prompb/remote.proto" and "prompb/types.proto" were downloaded from the Prometheus repository: +https://github.com/prometheus/prometheus/tree/main/prompb diff --git a/contrib/prometheus-protobufs/prompb/remote.proto b/contrib/prometheus-protobufs/prompb/remote.proto new file mode 100644 index 00000000000..50bb25e7fac --- /dev/null +++ b/contrib/prometheus-protobufs/prompb/remote.proto @@ -0,0 +1,88 @@ +// Copyright 2016 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; +package prometheus; + +option go_package = "prompb"; + +import "prompb/types.proto"; +import "gogoproto/gogo.proto"; + +message WriteRequest { + repeated prometheus.TimeSeries timeseries = 1 [(gogoproto.nullable) = false]; + // Cortex uses this field to determine the source of the write request. + // We reserve it to avoid any compatibility issues. + reserved 2; + repeated prometheus.MetricMetadata metadata = 3 [(gogoproto.nullable) = false]; +} + +// ReadRequest represents a remote read request. +message ReadRequest { + repeated Query queries = 1; + + enum ResponseType { + // Server will return a single ReadResponse message with matched series that includes list of raw samples. + // It's recommended to use streamed response types instead. + // + // Response headers: + // Content-Type: "application/x-protobuf" + // Content-Encoding: "snappy" + SAMPLES = 0; + // Server will stream a delimited ChunkedReadResponse message that + // contains XOR or HISTOGRAM(!) encoded chunks for a single series. + // Each message is following varint size and fixed size bigendian + // uint32 for CRC32 Castagnoli checksum. + // + // Response headers: + // Content-Type: "application/x-streamed-protobuf; proto=prometheus.ChunkedReadResponse" + // Content-Encoding: "" + STREAMED_XOR_CHUNKS = 1; + } + + // accepted_response_types allows negotiating the content type of the response. + // + // Response types are taken from the list in the FIFO order. If no response type in `accepted_response_types` is + // implemented by server, error is returned. + // For request that do not contain `accepted_response_types` field the SAMPLES response type will be used. + repeated ResponseType accepted_response_types = 2; +} + +// ReadResponse is a response when response_type equals SAMPLES. +message ReadResponse { + // In same order as the request's queries. + repeated QueryResult results = 1; +} + +message Query { + int64 start_timestamp_ms = 1; + int64 end_timestamp_ms = 2; + repeated prometheus.LabelMatcher matchers = 3; + prometheus.ReadHints hints = 4; +} + +message QueryResult { + // Samples within a time series must be ordered by time. + repeated prometheus.TimeSeries timeseries = 1; +} + +// ChunkedReadResponse is a response when response_type equals STREAMED_XOR_CHUNKS. +// We strictly stream full series after series, optionally split by time. This means that a single frame can contain +// partition of the single series, but once a new series is started to be streamed it means that no more chunks will +// be sent for previous one. Series are returned sorted in the same way TSDB block are internally. +message ChunkedReadResponse { + repeated prometheus.ChunkedSeries chunked_series = 1; + + // query_index represents an index of the query from ReadRequest.queries these chunks relates to. + int64 query_index = 2; +} diff --git a/contrib/prometheus-protobufs/prompb/types.proto b/contrib/prometheus-protobufs/prompb/types.proto new file mode 100644 index 00000000000..61fc1e0143e --- /dev/null +++ b/contrib/prometheus-protobufs/prompb/types.proto @@ -0,0 +1,187 @@ +// Copyright 2017 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; +package prometheus; + +option go_package = "prompb"; + +import "gogoproto/gogo.proto"; + +message MetricMetadata { + enum MetricType { + UNKNOWN = 0; + COUNTER = 1; + GAUGE = 2; + HISTOGRAM = 3; + GAUGEHISTOGRAM = 4; + SUMMARY = 5; + INFO = 6; + STATESET = 7; + } + + // Represents the metric type, these match the set from Prometheus. + // Refer to github.com/prometheus/common/model/metadata.go for details. + MetricType type = 1; + string metric_family_name = 2; + string help = 4; + string unit = 5; +} + +message Sample { + double value = 1; + // timestamp is in ms format, see model/timestamp/timestamp.go for + // conversion from time.Time to Prometheus timestamp. + int64 timestamp = 2; +} + +message Exemplar { + // Optional, can be empty. + repeated Label labels = 1 [(gogoproto.nullable) = false]; + double value = 2; + // timestamp is in ms format, see model/timestamp/timestamp.go for + // conversion from time.Time to Prometheus timestamp. + int64 timestamp = 3; +} + +// A native histogram, also known as a sparse histogram. +// Original design doc: +// https://docs.google.com/document/d/1cLNv3aufPZb3fNfaJgdaRBZsInZKKIHo9E6HinJVbpM/edit +// The appendix of this design doc also explains the concept of float +// histograms. This Histogram message can represent both, the usual +// integer histogram as well as a float histogram. +message Histogram { + enum ResetHint { + UNKNOWN = 0; // Need to test for a counter reset explicitly. + YES = 1; // This is the 1st histogram after a counter reset. + NO = 2; // There was no counter reset between this and the previous Histogram. + GAUGE = 3; // This is a gauge histogram where counter resets don't happen. + } + + oneof count { // Count of observations in the histogram. + uint64 count_int = 1; + double count_float = 2; + } + double sum = 3; // Sum of observations in the histogram. + // The schema defines the bucket schema. Currently, valid numbers + // are -4 <= n <= 8. They are all for base-2 bucket schemas, where 1 + // is a bucket boundary in each case, and then each power of two is + // divided into 2^n logarithmic buckets. Or in other words, each + // bucket boundary is the previous boundary times 2^(2^-n). In the + // future, more bucket schemas may be added using numbers < -4 or > + // 8. + sint32 schema = 4; + double zero_threshold = 5; // Breadth of the zero bucket. + oneof zero_count { // Count in zero bucket. + uint64 zero_count_int = 6; + double zero_count_float = 7; + } + + // Negative Buckets. + repeated BucketSpan negative_spans = 8 [(gogoproto.nullable) = false]; + // Use either "negative_deltas" or "negative_counts", the former for + // regular histograms with integer counts, the latter for float + // histograms. + repeated sint64 negative_deltas = 9; // Count delta of each bucket compared to previous one (or to zero for 1st bucket). + repeated double negative_counts = 10; // Absolute count of each bucket. + + // Positive Buckets. + repeated BucketSpan positive_spans = 11 [(gogoproto.nullable) = false]; + // Use either "positive_deltas" or "positive_counts", the former for + // regular histograms with integer counts, the latter for float + // histograms. + repeated sint64 positive_deltas = 12; // Count delta of each bucket compared to previous one (or to zero for 1st bucket). + repeated double positive_counts = 13; // Absolute count of each bucket. + + ResetHint reset_hint = 14; + // timestamp is in ms format, see model/timestamp/timestamp.go for + // conversion from time.Time to Prometheus timestamp. + int64 timestamp = 15; +} + +// A BucketSpan defines a number of consecutive buckets with their +// offset. Logically, it would be more straightforward to include the +// bucket counts in the Span. However, the protobuf representation is +// more compact in the way the data is structured here (with all the +// buckets in a single array separate from the Spans). +message BucketSpan { + sint32 offset = 1; // Gap to previous span, or starting point for 1st span (which can be negative). + uint32 length = 2; // Length of consecutive buckets. +} + +// TimeSeries represents samples and labels for a single time series. +message TimeSeries { + // For a timeseries to be valid, and for the samples and exemplars + // to be ingested by the remote system properly, the labels field is required. + repeated Label labels = 1 [(gogoproto.nullable) = false]; + repeated Sample samples = 2 [(gogoproto.nullable) = false]; + repeated Exemplar exemplars = 3 [(gogoproto.nullable) = false]; + repeated Histogram histograms = 4 [(gogoproto.nullable) = false]; +} + +message Label { + string name = 1; + string value = 2; +} + +message Labels { + repeated Label labels = 1 [(gogoproto.nullable) = false]; +} + +// Matcher specifies a rule, which can match or set of labels or not. +message LabelMatcher { + enum Type { + EQ = 0; + NEQ = 1; + RE = 2; + NRE = 3; + } + Type type = 1; + string name = 2; + string value = 3; +} + +message ReadHints { + int64 step_ms = 1; // Query step size in milliseconds. + string func = 2; // String representation of surrounding function or aggregation. + int64 start_ms = 3; // Start time in milliseconds. + int64 end_ms = 4; // End time in milliseconds. + repeated string grouping = 5; // List of label names used in aggregation. + bool by = 6; // Indicate whether it is without or by. + int64 range_ms = 7; // Range vector selector range in milliseconds. +} + +// Chunk represents a TSDB chunk. +// Time range [min, max] is inclusive. +message Chunk { + int64 min_time_ms = 1; + int64 max_time_ms = 2; + + // We require this to match chunkenc.Encoding. + enum Encoding { + UNKNOWN = 0; + XOR = 1; + HISTOGRAM = 2; + FLOAT_HISTOGRAM = 3; + } + Encoding type = 3; + bytes data = 4; +} + +// ChunkedSeries represents single, encoded time series. +message ChunkedSeries { + // Labels should be sorted. + repeated Label labels = 1 [(gogoproto.nullable) = false]; + // Chunks will be in start time order and may overlap. + repeated Chunk chunks = 2 [(gogoproto.nullable) = false]; +} diff --git a/contrib/rocksdb b/contrib/rocksdb index 3a0b80ca9d6..be366233921 160000 --- a/contrib/rocksdb +++ b/contrib/rocksdb @@ -1 +1 @@ -Subproject commit 3a0b80ca9d6eebb38fad7ea3f41dfc9db4f6a984 +Subproject commit be366233921293bd07a84dc4ea6991858665f202 diff --git a/contrib/rocksdb-cmake/CMakeLists.txt b/contrib/rocksdb-cmake/CMakeLists.txt index c4220ba90ac..3a14407166c 100644 --- a/contrib/rocksdb-cmake/CMakeLists.txt +++ b/contrib/rocksdb-cmake/CMakeLists.txt @@ -1,24 +1,17 @@ -option (ENABLE_ROCKSDB "Enable rocksdb library" ${ENABLE_LIBRARIES}) +option (ENABLE_ROCKSDB "Enable RocksDB" ${ENABLE_LIBRARIES}) if (NOT ENABLE_ROCKSDB) - message (STATUS "Not using rocksdb") + message (STATUS "Not using RocksDB") return() endif() -## this file is extracted from `contrib/rocksdb/CMakeLists.txt` -set(ROCKSDB_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rocksdb") -list(APPEND CMAKE_MODULE_PATH "${ROCKSDB_SOURCE_DIR}/cmake/modules/") - -set(PORTABLE ON) -## always disable jemalloc for rocksdb by default -## because it introduces non-standard jemalloc APIs +# Always disable jemalloc for rocksdb by default because it introduces non-standard jemalloc APIs option(WITH_JEMALLOC "build with JeMalloc" OFF) -set(USE_SNAPPY OFF) -if (TARGET ch_contrib::snappy) - set(USE_SNAPPY ON) -endif() -option(WITH_SNAPPY "build with SNAPPY" ${USE_SNAPPY}) -## lz4, zlib, zstd is enabled in ClickHouse by default + +option(WITH_LIBURING "build with liburing" OFF) # TODO could try to enable this conditionally, depending on ClickHouse's ENABLE_LIBURING + +# ClickHouse cannot be compiled without snappy, lz4, zlib, zstd +option(WITH_SNAPPY "build with SNAPPY" ON) option(WITH_LZ4 "build with lz4" ON) option(WITH_ZLIB "build with zlib" ON) option(WITH_ZSTD "build with zstd" ON) @@ -26,78 +19,46 @@ option(WITH_ZSTD "build with zstd" ON) # third-party/folly is only validated to work on Linux and Windows for now. # So only turn it on there by default. if(CMAKE_SYSTEM_NAME MATCHES "Linux|Windows") - if(MSVC AND MSVC_VERSION LESS 1910) - # Folly does not compile with MSVC older than VS2017 - option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" OFF) - else() - option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" ON) - endif() + option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" ON) else() option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" OFF) endif() -if( NOT DEFINED CMAKE_CXX_STANDARD ) - set(CMAKE_CXX_STANDARD 11) +if(WITH_SNAPPY) + add_definitions(-DSNAPPY) + list(APPEND THIRDPARTY_LIBS ch_contrib::snappy) endif() -if(MSVC) - option(WITH_XPRESS "build with windows built in compression" OFF) - include("${ROCKSDB_SOURCE_DIR}/thirdparty.inc") -else() - if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD" AND NOT CMAKE_SYSTEM_NAME MATCHES "kFreeBSD") - # FreeBSD has jemalloc as default malloc - # but it does not have all the jemalloc files in include/... - set(WITH_JEMALLOC ON) - else() - if(WITH_JEMALLOC AND TARGET ch_contrib::jemalloc) - add_definitions(-DROCKSDB_JEMALLOC -DJEMALLOC_NO_DEMANGLE) - list(APPEND THIRDPARTY_LIBS ch_contrib::jemalloc) - endif() - endif() - - if(WITH_SNAPPY) - add_definitions(-DSNAPPY) - list(APPEND THIRDPARTY_LIBS ch_contrib::snappy) - endif() - - if(WITH_ZLIB) - add_definitions(-DZLIB) - list(APPEND THIRDPARTY_LIBS ch_contrib::zlib) - endif() - - if(WITH_LZ4) - add_definitions(-DLZ4) - list(APPEND THIRDPARTY_LIBS ch_contrib::lz4) - endif() - - if(WITH_ZSTD) - add_definitions(-DZSTD) - list(APPEND THIRDPARTY_LIBS ch_contrib::zstd) - endif() +if(WITH_ZLIB) + add_definitions(-DZLIB) + list(APPEND THIRDPARTY_LIBS ch_contrib::zlib) endif() -if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") - if(POWER9) - set(HAS_POWER9 1) - set(HAS_ALTIVEC 1) - else() - set(HAS_POWER8 1) - set(HAS_ALTIVEC 1) - endif(POWER9) -endif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") +if(WITH_LZ4) + add_definitions(-DLZ4) + list(APPEND THIRDPARTY_LIBS ch_contrib::lz4) +endif() -if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64|arm64|ARM64") - set(HAS_ARMV8_CRC 1) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function") -endif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64|arm64|ARM64") +if(WITH_ZSTD) + add_definitions(-DZSTD) + list(APPEND THIRDPARTY_LIBS ch_contrib::zstd) +endif() +option(PORTABLE "build a portable binary" ON) -if(ENABLE_AVX2 AND ENABLE_PCLMULQDQ) +if(ENABLE_SSE42 AND ENABLE_PCLMULQDQ) add_definitions(-DHAVE_SSE42) add_definitions(-DHAVE_PCLMUL) endif() +if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64|AARCH64") + set (HAS_ARMV8_CRC 1) + # the original build descriptions set specific flags for ARM. These flags are already subsumed by ClickHouse's general + # ARM flags, see cmake/cpu_features.cmake + # set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function") + # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function") +endif() + set (HAVE_THREAD_LOCAL 1) if(HAVE_THREAD_LOCAL) add_definitions(-DROCKSDB_SUPPORT_THREAD_LOCAL) @@ -107,8 +68,6 @@ if(CMAKE_SYSTEM_NAME MATCHES "Darwin") add_definitions(-DOS_MACOSX) elseif(CMAKE_SYSTEM_NAME MATCHES "Linux") add_definitions(-DOS_LINUX) -elseif(CMAKE_SYSTEM_NAME MATCHES "SunOS") - add_definitions(-DOS_SOLARIS) elseif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") add_definitions(-DOS_FREEBSD) elseif(CMAKE_SYSTEM_NAME MATCHES "Android") @@ -123,12 +82,10 @@ endif() if (OS_LINUX) add_definitions(-DROCKSDB_SCHED_GETCPU_PRESENT) - add_definitions(-DROCKSDB_AUXV_SYSAUXV_PRESENT) add_definitions(-DROCKSDB_AUXV_GETAUXVAL_PRESENT) -elseif (OS_FREEBSD) - add_definitions(-DROCKSDB_AUXV_SYSAUXV_PRESENT) endif() +set(ROCKSDB_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rocksdb") include_directories(${ROCKSDB_SOURCE_DIR}) include_directories("${ROCKSDB_SOURCE_DIR}/include") @@ -136,11 +93,11 @@ if(WITH_FOLLY_DISTRIBUTED_MUTEX) include_directories("${ROCKSDB_SOURCE_DIR}/third-party/folly") endif() -# Main library source code - set(SOURCES ${ROCKSDB_SOURCE_DIR}/cache/cache.cc ${ROCKSDB_SOURCE_DIR}/cache/cache_entry_roles.cc + ${ROCKSDB_SOURCE_DIR}/cache/cache_key.cc + ${ROCKSDB_SOURCE_DIR}/cache/cache_reservation_manager.cc ${ROCKSDB_SOURCE_DIR}/cache/clock_cache.cc ${ROCKSDB_SOURCE_DIR}/cache/lru_cache.cc ${ROCKSDB_SOURCE_DIR}/cache/sharded_cache.cc @@ -156,6 +113,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_format.cc ${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_sequential_reader.cc ${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_writer.cc + ${ROCKSDB_SOURCE_DIR}/db/blob/prefetch_buffer_collection.cc ${ROCKSDB_SOURCE_DIR}/db/builder.cc ${ROCKSDB_SOURCE_DIR}/db/c.cc ${ROCKSDB_SOURCE_DIR}/db/column_family.cc @@ -229,6 +187,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/env/file_system_tracer.cc ${ROCKSDB_SOURCE_DIR}/env/fs_remap.cc ${ROCKSDB_SOURCE_DIR}/env/mock_env.cc + ${ROCKSDB_SOURCE_DIR}/env/unique_id_gen.cc ${ROCKSDB_SOURCE_DIR}/file/delete_scheduler.cc ${ROCKSDB_SOURCE_DIR}/file/file_prefetch_buffer.cc ${ROCKSDB_SOURCE_DIR}/file/file_util.cc @@ -247,6 +206,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/memory/concurrent_arena.cc ${ROCKSDB_SOURCE_DIR}/memory/jemalloc_nodump_allocator.cc ${ROCKSDB_SOURCE_DIR}/memory/memkind_kmem_allocator.cc + ${ROCKSDB_SOURCE_DIR}/memory/memory_allocator.cc ${ROCKSDB_SOURCE_DIR}/memtable/alloc_tracker.cc ${ROCKSDB_SOURCE_DIR}/memtable/hash_linklist_rep.cc ${ROCKSDB_SOURCE_DIR}/memtable/hash_skiplist_rep.cc @@ -322,6 +282,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/table/table_factory.cc ${ROCKSDB_SOURCE_DIR}/table/table_properties.cc ${ROCKSDB_SOURCE_DIR}/table/two_level_iterator.cc + ${ROCKSDB_SOURCE_DIR}/table/unique_id.cc ${ROCKSDB_SOURCE_DIR}/test_util/sync_point.cc ${ROCKSDB_SOURCE_DIR}/test_util/sync_point_impl.cc ${ROCKSDB_SOURCE_DIR}/test_util/testutil.cc @@ -333,9 +294,12 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/tools/ldb_tool.cc ${ROCKSDB_SOURCE_DIR}/tools/sst_dump_tool.cc ${ROCKSDB_SOURCE_DIR}/tools/trace_analyzer_tool.cc - ${ROCKSDB_SOURCE_DIR}/trace_replay/trace_replay.cc ${ROCKSDB_SOURCE_DIR}/trace_replay/block_cache_tracer.cc ${ROCKSDB_SOURCE_DIR}/trace_replay/io_tracer.cc + ${ROCKSDB_SOURCE_DIR}/trace_replay/trace_record_handler.cc + ${ROCKSDB_SOURCE_DIR}/trace_replay/trace_record_result.cc + ${ROCKSDB_SOURCE_DIR}/trace_replay/trace_record.cc + ${ROCKSDB_SOURCE_DIR}/trace_replay/trace_replay.cc ${ROCKSDB_SOURCE_DIR}/util/coding.cc ${ROCKSDB_SOURCE_DIR}/util/compaction_job_stats_impl.cc ${ROCKSDB_SOURCE_DIR}/util/comparator.cc @@ -347,6 +311,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/util/murmurhash.cc ${ROCKSDB_SOURCE_DIR}/util/random.cc ${ROCKSDB_SOURCE_DIR}/util/rate_limiter.cc + ${ROCKSDB_SOURCE_DIR}/util/regex.cc ${ROCKSDB_SOURCE_DIR}/util/ribbon_config.cc ${ROCKSDB_SOURCE_DIR}/util/slice.cc ${ROCKSDB_SOURCE_DIR}/util/file_checksum_helper.cc @@ -362,18 +327,23 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_db_impl_filesnapshot.cc ${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_dump_tool.cc ${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_file.cc + ${ROCKSDB_SOURCE_DIR}/utilities/cache_dump_load.cc + ${ROCKSDB_SOURCE_DIR}/utilities/cache_dump_load_impl.cc ${ROCKSDB_SOURCE_DIR}/utilities/cassandra/cassandra_compaction_filter.cc ${ROCKSDB_SOURCE_DIR}/utilities/cassandra/format.cc ${ROCKSDB_SOURCE_DIR}/utilities/cassandra/merge_operator.cc ${ROCKSDB_SOURCE_DIR}/utilities/checkpoint/checkpoint_impl.cc + ${ROCKSDB_SOURCE_DIR}/utilities/compaction_filters.cc ${ROCKSDB_SOURCE_DIR}/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc ${ROCKSDB_SOURCE_DIR}/utilities/debug.cc ${ROCKSDB_SOURCE_DIR}/utilities/env_mirror.cc ${ROCKSDB_SOURCE_DIR}/utilities/env_timed.cc ${ROCKSDB_SOURCE_DIR}/utilities/fault_injection_env.cc ${ROCKSDB_SOURCE_DIR}/utilities/fault_injection_fs.cc + ${ROCKSDB_SOURCE_DIR}/utilities/fault_injection_secondary_cache.cc ${ROCKSDB_SOURCE_DIR}/utilities/leveldb_options/leveldb_options.cc ${ROCKSDB_SOURCE_DIR}/utilities/memory/memory_util.cc + ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators.cc ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/bytesxor.cc ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/max.cc ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/put.cc @@ -393,6 +363,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/utilities/simulator_cache/sim_cache.cc ${ROCKSDB_SOURCE_DIR}/utilities/table_properties_collectors/compact_on_deletion_collector.cc ${ROCKSDB_SOURCE_DIR}/utilities/trace/file_trace_reader_writer.cc + ${ROCKSDB_SOURCE_DIR}/utilities/trace/replayer_impl.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/lock_manager.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/point/point_lock_tracker.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/point/point_lock_manager.cc @@ -411,6 +382,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_unprepared_txn.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_unprepared_txn_db.cc ${ROCKSDB_SOURCE_DIR}/utilities/ttl/db_ttl_impl.cc + ${ROCKSDB_SOURCE_DIR}/utilities/wal_filter.cc ${ROCKSDB_SOURCE_DIR}/utilities/write_batch_with_index/write_batch_with_index.cc ${ROCKSDB_SOURCE_DIR}/utilities/write_batch_with_index/write_batch_with_index_internal.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.cc @@ -425,7 +397,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/standalone_port.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/util/dbt.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/util/memarena.cc - rocksdb_build_version.cc) + build_version.cc) # generated by hand if(ENABLE_SSE42 AND ENABLE_PCLMULQDQ) set_source_files_properties( @@ -462,5 +434,6 @@ endif() add_library(_rocksdb ${SOURCES}) add_library(ch_contrib::rocksdb ALIAS _rocksdb) target_link_libraries(_rocksdb PRIVATE ${THIRDPARTY_LIBS} ${SYSTEM_LIBS}) + # SYSTEM is required to overcome some issues target_include_directories(_rocksdb SYSTEM BEFORE INTERFACE "${ROCKSDB_SOURCE_DIR}/include") diff --git a/contrib/rocksdb-cmake/rocksdb_build_version.cc b/contrib/rocksdb-cmake/build_version.cc similarity index 100% rename from contrib/rocksdb-cmake/rocksdb_build_version.cc rename to contrib/rocksdb-cmake/build_version.cc diff --git a/contrib/s2geometry b/contrib/s2geometry index 0547c383717..6522a40338d 160000 --- a/contrib/s2geometry +++ b/contrib/s2geometry @@ -1 +1 @@ -Subproject commit 0547c38371777a1c1c8be263a6f05c3bf71bb05b +Subproject commit 6522a40338d58752c2a4227a3fc2bc4107c73e43 diff --git a/contrib/s2geometry-cmake/CMakeLists.txt b/contrib/s2geometry-cmake/CMakeLists.txt index 6632f9c27d5..48562b8cead 100644 --- a/contrib/s2geometry-cmake/CMakeLists.txt +++ b/contrib/s2geometry-cmake/CMakeLists.txt @@ -1,7 +1,7 @@ -option(ENABLE_S2_GEOMETRY "Enable S2 geometry library" ${ENABLE_LIBRARIES}) +option(ENABLE_S2_GEOMETRY "Enable S2 Geometry" ${ENABLE_LIBRARIES}) if (NOT ENABLE_S2_GEOMETRY) - message(STATUS "Not using S2 geometry") + message(STATUS "Not using S2 Geometry") return() endif() @@ -38,6 +38,7 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2cell_index.cc" "${S2_SOURCE_DIR}/s2/s2cell_union.cc" "${S2_SOURCE_DIR}/s2/s2centroids.cc" + "${S2_SOURCE_DIR}/s2/s2chain_interpolation_query.cc" "${S2_SOURCE_DIR}/s2/s2closest_cell_query.cc" "${S2_SOURCE_DIR}/s2/s2closest_edge_query.cc" "${S2_SOURCE_DIR}/s2/s2closest_point_query.cc" @@ -46,6 +47,7 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2coords.cc" "${S2_SOURCE_DIR}/s2/s2crossing_edge_query.cc" "${S2_SOURCE_DIR}/s2/s2debug.cc" + "${S2_SOURCE_DIR}/s2/s2density_tree.cc" "${S2_SOURCE_DIR}/s2/s2earth.cc" "${S2_SOURCE_DIR}/s2/s2edge_clipping.cc" "${S2_SOURCE_DIR}/s2/s2edge_crosser.cc" @@ -53,8 +55,10 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2edge_distances.cc" "${S2_SOURCE_DIR}/s2/s2edge_tessellator.cc" "${S2_SOURCE_DIR}/s2/s2error.cc" + "${S2_SOURCE_DIR}/s2/s2fractal.cc" "${S2_SOURCE_DIR}/s2/s2furthest_edge_query.cc" "${S2_SOURCE_DIR}/s2/s2hausdorff_distance_query.cc" + "${S2_SOURCE_DIR}/s2/s2index_cell_data.cc" "${S2_SOURCE_DIR}/s2/s2latlng.cc" "${S2_SOURCE_DIR}/s2/s2latlng_rect.cc" "${S2_SOURCE_DIR}/s2/s2latlng_rect_bounder.cc" @@ -63,10 +67,10 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2lax_polyline_shape.cc" "${S2_SOURCE_DIR}/s2/s2loop.cc" "${S2_SOURCE_DIR}/s2/s2loop_measures.cc" + "${S2_SOURCE_DIR}/s2/s2max_distance_targets.cc" "${S2_SOURCE_DIR}/s2/s2measures.cc" "${S2_SOURCE_DIR}/s2/s2memory_tracker.cc" "${S2_SOURCE_DIR}/s2/s2metrics.cc" - "${S2_SOURCE_DIR}/s2/s2max_distance_targets.cc" "${S2_SOURCE_DIR}/s2/s2min_distance_targets.cc" "${S2_SOURCE_DIR}/s2/s2padded_cell.cc" "${S2_SOURCE_DIR}/s2/s2point_compression.cc" @@ -80,10 +84,11 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2predicates.cc" "${S2_SOURCE_DIR}/s2/s2projections.cc" "${S2_SOURCE_DIR}/s2/s2r2rect.cc" - "${S2_SOURCE_DIR}/s2/s2region.cc" - "${S2_SOURCE_DIR}/s2/s2region_term_indexer.cc" + "${S2_SOURCE_DIR}/s2/s2random.cc" "${S2_SOURCE_DIR}/s2/s2region_coverer.cc" "${S2_SOURCE_DIR}/s2/s2region_intersection.cc" + "${S2_SOURCE_DIR}/s2/s2region_sharder.cc" + "${S2_SOURCE_DIR}/s2/s2region_term_indexer.cc" "${S2_SOURCE_DIR}/s2/s2region_union.cc" "${S2_SOURCE_DIR}/s2/s2shape_index.cc" "${S2_SOURCE_DIR}/s2/s2shape_index_buffered_region.cc" @@ -94,9 +99,12 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2shapeutil_coding.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_contains_brute_force.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_conversion.cc" + "${S2_SOURCE_DIR}/s2/s2shapeutil_count_vertices.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_edge_iterator.cc" + "${S2_SOURCE_DIR}/s2/s2shapeutil_edge_wrap.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_get_reference_point.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_visit_crossing_edge_pairs.cc" + "${S2_SOURCE_DIR}/s2/s2testing.cc" "${S2_SOURCE_DIR}/s2/s2text_format.cc" "${S2_SOURCE_DIR}/s2/s2wedge_relations.cc" "${S2_SOURCE_DIR}/s2/s2winding_operation.cc" @@ -140,6 +148,7 @@ target_link_libraries(_s2 PRIVATE absl::strings absl::type_traits absl::utility + absl::vlog_is_on ) target_include_directories(_s2 SYSTEM BEFORE PUBLIC "${S2_SOURCE_DIR}/") diff --git a/contrib/sysroot b/contrib/sysroot index 39c4713334f..cc385041b22 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit 39c4713334f9f156dbf508f548d510d9129a657c +Subproject commit cc385041b226d1fc28ead14dbab5d40a5f821dd8 diff --git a/contrib/vectorscan b/contrib/vectorscan index 38431d11178..d29730e1cb9 160000 --- a/contrib/vectorscan +++ b/contrib/vectorscan @@ -1 +1 @@ -Subproject commit 38431d111781843741a781a57a6381a527d900a4 +Subproject commit d29730e1cb9daaa66bda63426cdce83505d2c809 diff --git a/contrib/vectorscan-cmake/CMakeLists.txt b/contrib/vectorscan-cmake/CMakeLists.txt index d6c626c1612..35d5fd3dc82 100644 --- a/contrib/vectorscan-cmake/CMakeLists.txt +++ b/contrib/vectorscan-cmake/CMakeLists.txt @@ -1,11 +1,8 @@ -# We use vectorscan, a portable and API/ABI-compatible drop-in replacement for hyperscan. - +# Vectorscan is drop-in replacement for Hyperscan. if ((ARCH_AMD64 AND NOT NO_SSE3_OR_HIGHER) OR ARCH_AARCH64) - option (ENABLE_VECTORSCAN "Enable vectorscan library" ${ENABLE_LIBRARIES}) + option (ENABLE_VECTORSCAN "Enable vectorscan" ${ENABLE_LIBRARIES}) endif() -# TODO PPC should generally work but needs manual generation of ppc/config.h file on a PPC machine - if (NOT ENABLE_VECTORSCAN) message (STATUS "Not using vectorscan") return() @@ -272,34 +269,24 @@ if (ARCH_AARCH64) ) endif() -# TODO -# if (ARCH_PPC64LE) -# list(APPEND SRCS -# "${LIBRARY_DIR}/src/util/supervector/arch/ppc64el/impl.cpp" -# ) -# endif() - add_library (_vectorscan ${SRCS}) -target_compile_options (_vectorscan PRIVATE - -fno-sanitize=undefined # assume the library takes care of itself - -O2 -fno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden # options from original build system -) # library has too much debug information if (OMIT_HEAVY_DEBUG_SYMBOLS) target_compile_options (_vectorscan PRIVATE -g0) endif() -# Include version header manually generated by running the original build system -target_include_directories (_vectorscan SYSTEM PRIVATE common) +target_include_directories (_vectorscan SYSTEM PUBLIC "${LIBRARY_DIR}/src") + +# Makes the version header visible. It was generated by running the native build system manually. +# Please update whenever you update vectorscan. +target_include_directories (_vectorscan SYSTEM PUBLIC common) # vectorscan inherited some patched in-source versions of boost headers to fix a bug in # boost 1.69. This bug has been solved long ago but vectorscan's source code still # points to the patched versions, so include it here. target_include_directories (_vectorscan SYSTEM PRIVATE "${LIBRARY_DIR}/include") -target_include_directories (_vectorscan SYSTEM PUBLIC "${LIBRARY_DIR}/src") - # Include platform-specific config header generated by manually running the original build system # Please regenerate these files if you update vectorscan. diff --git a/contrib/vectorscan-cmake/common/hs_version.h b/contrib/vectorscan-cmake/common/hs_version.h index 8315b44fb2a..3d266484095 100644 --- a/contrib/vectorscan-cmake/common/hs_version.h +++ b/contrib/vectorscan-cmake/common/hs_version.h @@ -32,8 +32,12 @@ /** * A version string to identify this release of Hyperscan. */ -#define HS_VERSION_STRING "5.4.7 2022-06-20" +#define HS_VERSION_STRING "5.4.11 2024-07-04" #define HS_VERSION_32BIT ((5 << 24) | (1 << 16) | (7 << 8) | 0) +#define HS_MAJOR 5 +#define HS_MINOR 4 +#define HS_PATCH 11 + #endif /* HS_VERSION_H_C6428FAF8E3713 */ diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 018fe57bf56..c59ef1b919a 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.6.1.4423" +ARG VERSION="24.6.2.17" ARG PACKAGES="clickhouse-keeper" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/packager/binary-builder/build.sh b/docker/packager/binary-builder/build.sh index 032aceb0af3..bd5f2fe8466 100755 --- a/docker/packager/binary-builder/build.sh +++ b/docker/packager/binary-builder/build.sh @@ -111,6 +111,7 @@ fi mv ./programs/clickhouse* /output || mv ./programs/*_fuzzer /output [ -x ./programs/self-extracting/clickhouse ] && mv ./programs/self-extracting/clickhouse /output [ -x ./programs/self-extracting/clickhouse-stripped ] && mv ./programs/self-extracting/clickhouse-stripped /output +[ -x ./programs/self-extracting/clickhouse-keeper ] && mv ./programs/self-extracting/clickhouse-keeper /output mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds mv ./programs/*.dict ./programs/*.options ./programs/*_seed_corpus.zip /output ||: # libFuzzer oss-fuzz compatible infrastructure diff --git a/docker/packager/packager b/docker/packager/packager index 2dcbd8d695e..da4af7fc1be 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -276,10 +276,7 @@ def parse_env_variables( if is_release_build(debug_build, package_type, sanitizer, coverage): cmake_flags.append("-DSPLIT_DEBUG_SYMBOLS=ON") result.append("WITH_PERFORMANCE=1") - if is_cross_arm: - cmake_flags.append("-DBUILD_STANDALONE_KEEPER=1") - else: - result.append("BUILD_MUSL_KEEPER=1") + cmake_flags.append("-DBUILD_STANDALONE_KEEPER=1") elif package_type == "fuzzers": cmake_flags.append("-DENABLE_FUZZING=1") cmake_flags.append("-DENABLE_PROTOBUF=1") diff --git a/docker/reqgenerator.py b/docker/reqgenerator.py new file mode 100644 index 00000000000..6c1d89ac0ac --- /dev/null +++ b/docker/reqgenerator.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +# To run this script you must install docker and piddeptree python package +# + +import subprocess +import os +import sys + + +def build_docker_deps(image_name, imagedir): + cmd = f"""docker run --entrypoint "/bin/bash" {image_name} -c "pip install pipdeptree 2>/dev/null 1>/dev/null && pipdeptree --freeze --warn silence | sed 's/ \+//g' | sort | uniq" > {imagedir}/requirements.txt""" + subprocess.check_call(cmd, shell=True) + + +def check_docker_file_install_with_pip(filepath): + image_name = None + with open(filepath, "r") as f: + for line in f: + if "docker build" in line: + arr = line.split(" ") + if len(arr) > 4: + image_name = arr[4] + if "pip3 install" in line or "pip install" in line: + return image_name, True + return image_name, False + + +def process_affected_images(images_dir): + for root, _dirs, files in os.walk(images_dir): + for f in files: + if f == "Dockerfile": + docker_file_path = os.path.join(root, f) + print("Checking image on path", docker_file_path) + image_name, has_pip = check_docker_file_install_with_pip( + docker_file_path + ) + if has_pip: + print("Find pip in", image_name) + try: + build_docker_deps(image_name, root) + except Exception as ex: + print(ex) + else: + print("Pip not found in", docker_file_path) + + +process_affected_images(sys.argv[1]) diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index a86406e5129..240df79aeb1 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.6.1.4423" +ARG VERSION="24.6.2.17" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 25f3273a648..ac64655991a 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="24.6.1.4423" +ARG VERSION="24.6.2.17" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" #docker-official-library:off diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index e0be261d5e8..2512268be0f 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -19,10 +19,7 @@ RUN apt-get update \ odbcinst \ psmisc \ python3 \ - python3-lxml \ python3-pip \ - python3-requests \ - python3-termcolor \ unixodbc \ pv \ jq \ @@ -31,7 +28,8 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3 +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt # This symlink is required by gcc to find the lld linker RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld @@ -39,6 +37,10 @@ RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld # https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake +# LLVM changes paths for compiler-rt libraries. For some reason clang-18.1.8 cannot catch up libraries from default install path. +# It's very dirty workaround, better to build compiler and LLVM ourself and use it. Details: https://github.com/llvm/llvm-project/issues/95792 +RUN test ! -d /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu || ln -s /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/x86_64-unknown-linux-gnu + ARG CCACHE_VERSION=4.6.1 RUN mkdir /tmp/ccache \ && cd /tmp/ccache \ diff --git a/docker/test/fasttest/requirements.txt b/docker/test/fasttest/requirements.txt new file mode 100644 index 00000000000..993ea22e5ae --- /dev/null +++ b/docker/test/fasttest/requirements.txt @@ -0,0 +1,41 @@ +Jinja2==3.1.3 +MarkupSafe==2.1.5 +PyJWT==2.3.0 +PyYAML==6.0.1 +Pygments==2.11.2 +SecretStorage==3.3.1 +blinker==1.4 +certifi==2020.6.20 +chardet==4.0.0 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +httplib2==0.20.2 +idna==3.3 +importlib-metadata==4.6.4 +jeepney==0.7.1 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +lxml==4.8.0 +more-itertools==8.10.0 +numpy==1.26.3 +oauthlib==3.2.0 +packaging==24.1 +pandas==1.5.3 +pip==24.1.1 +pipdeptree==2.23.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +python-dateutil==2.9.0.post0 +pytz==2024.1 +requests==2.32.3 +scipy==1.12.0 +setuptools==59.6.0 +six==1.16.0 +termcolor==1.1.0 +urllib3==1.26.5 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 4d5159cfa9e..0d975d64010 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -84,6 +84,8 @@ function start_server echo "ClickHouse server pid '$server_pid' started and responded" } +export -f start_server + function clone_root { [ "$UID" -eq 0 ] && git config --global --add safe.directory "$FASTTEST_SOURCE" @@ -254,6 +256,19 @@ function configure rm -f "$FASTTEST_DATA/config.d/secure_ports.xml" } +function timeout_with_logging() { + local exit_code=0 + + timeout -s TERM --preserve-status "${@}" || exit_code="${?}" + + if [[ "${exit_code}" -eq "124" ]] + then + echo "The command 'timeout ${*}' has been killed by timeout" + fi + + return $exit_code +} + function run_tests { clickhouse-server --version @@ -269,6 +284,11 @@ function run_tests NPROC=1 fi + export CLICKHOUSE_CONFIG_DIR=$FASTTEST_DATA + export CLICKHOUSE_CONFIG="$FASTTEST_DATA/config.xml" + export CLICKHOUSE_USER_FILES="$FASTTEST_DATA/user_files" + export CLICKHOUSE_SCHEMA_FILES="$FASTTEST_DATA/format_schemas" + local test_opts=( --hung-check --fast-tests-only @@ -292,6 +312,8 @@ function run_tests clickhouse stop --pid-path "$FASTTEST_DATA" } +export -f run_tests + case "$stage" in "") ls -la @@ -315,7 +337,7 @@ case "$stage" in configure 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/install_log.txt" ;& "run_tests") - run_tests + timeout_with_logging 35m bash -c run_tests ||: /process_functional_tests_result.py --in-results-dir "$FASTTEST_OUTPUT/" \ --out-results-file "$FASTTEST_OUTPUT/test_results.tsv" \ --out-status-file "$FASTTEST_OUTPUT/check_status.tsv" || echo -e "failure\tCannot parse results" > "$FASTTEST_OUTPUT/check_status.tsv" diff --git a/docker/test/fuzzer/Dockerfile b/docker/test/fuzzer/Dockerfile index d3f78ac1d95..e1fb09b8ed5 100644 --- a/docker/test/fuzzer/Dockerfile +++ b/docker/test/fuzzer/Dockerfile @@ -31,7 +31,8 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN pip3 install Jinja2 +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt COPY * / diff --git a/docker/test/fuzzer/requirements.txt b/docker/test/fuzzer/requirements.txt new file mode 100644 index 00000000000..3dce93e023b --- /dev/null +++ b/docker/test/fuzzer/requirements.txt @@ -0,0 +1,27 @@ +blinker==1.4 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +httplib2==0.20.2 +importlib-metadata==4.6.4 +jeepney==0.7.1 +Jinja2==3.1.4 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +MarkupSafe==2.1.5 +more-itertools==8.10.0 +oauthlib==3.2.0 +packaging==24.1 +pip==24.1.1 +pipdeptree==2.23.0 +PyJWT==2.3.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/integration/base/Dockerfile b/docker/test/integration/base/Dockerfile index 270b40e23a6..469251f648c 100644 --- a/docker/test/integration/base/Dockerfile +++ b/docker/test/integration/base/Dockerfile @@ -33,7 +33,8 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN pip3 install pycurl +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r requirements.txt && rm -rf /root/.cache/pip # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH diff --git a/docker/test/integration/base/requirements.txt b/docker/test/integration/base/requirements.txt new file mode 100644 index 00000000000..d195d8deaf6 --- /dev/null +++ b/docker/test/integration/base/requirements.txt @@ -0,0 +1,26 @@ +blinker==1.4 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +httplib2==0.20.2 +importlib-metadata==4.6.4 +jeepney==0.7.1 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +more-itertools==8.10.0 +oauthlib==3.2.0 +packaging==24.1 +pip==24.1.1 +pipdeptree==2.23.0 +pycurl==7.45.3 +PyJWT==2.3.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/integration/resolver/Dockerfile b/docker/test/integration/resolver/Dockerfile index 01b9b777614..b35a7262651 100644 --- a/docker/test/integration/resolver/Dockerfile +++ b/docker/test/integration/resolver/Dockerfile @@ -2,4 +2,5 @@ # Helper docker container to run python bottle apps FROM python:3 -RUN python -m pip install bottle +COPY requirements.txt / +RUN python -m pip install --no-cache-dir -r requirements.txt diff --git a/docker/test/integration/resolver/requirements.txt b/docker/test/integration/resolver/requirements.txt new file mode 100644 index 00000000000..fbf85295329 --- /dev/null +++ b/docker/test/integration/resolver/requirements.txt @@ -0,0 +1,6 @@ +bottle==0.12.25 +packaging==24.1 +pip==23.2.1 +pipdeptree==2.23.0 +setuptools==69.0.3 +wheel==0.42.0 diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 23d8a37d822..d250b746e7d 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -26,7 +26,6 @@ RUN apt-get update \ libicu-dev \ bsdutils \ curl \ - python3-pika \ liblua5.1-dev \ luajit \ libssl-dev \ @@ -61,49 +60,8 @@ RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - \ # kazoo 2.10.0 is broken # https://s3.amazonaws.com/clickhouse-test-reports/59337/524625a1d2f4cc608a3f1059e3df2c30f353a649/integration_tests__asan__analyzer__[5_6].html -RUN python3 -m pip install --no-cache-dir \ - PyMySQL==1.1.0 \ - asyncio==3.4.3 \ - avro==1.10.2 \ - azure-storage-blob==12.19.0 \ - boto3==1.34.24 \ - cassandra-driver==3.29.0 \ - confluent-kafka==2.3.0 \ - delta-spark==2.3.0 \ - dict2xml==1.7.4 \ - dicttoxml==1.7.16 \ - docker==6.1.3 \ - docker-compose==1.29.2 \ - grpcio==1.60.0 \ - grpcio-tools==1.60.0 \ - kafka-python==2.0.2 \ - lz4==4.3.3 \ - minio==7.2.3 \ - nats-py==2.6.0 \ - protobuf==4.25.2 \ - kazoo==2.9.0 \ - psycopg2-binary==2.9.6 \ - pyhdfs==0.3.1 \ - pymongo==3.11.0 \ - pyspark==3.3.2 \ - pytest==7.4.4 \ - pytest-order==1.0.0 \ - pytest-random==0.2 \ - pytest-repeat==0.9.3 \ - pytest-timeout==2.2.0 \ - pytest-xdist==3.5.0 \ - pytest-reportlog==0.4.0 \ - pytz==2023.3.post1 \ - pyyaml==5.3.1 \ - redis==5.0.1 \ - requests-kerberos==0.14.0 \ - tzlocal==2.1 \ - retry==0.9.2 \ - bs4==0.0.2 \ - lxml==5.1.0 \ - urllib3==2.0.7 \ - jwcrypto==1.5.6 -# bs4, lxml are for cloud tests, do not delete +COPY requirements.txt / +RUN python3 -m pip install --no-cache-dir -r requirements.txt # Hudi supports only spark 3.3.*, not 3.4 RUN curl -fsSL -O https://archive.apache.org/dist/spark/spark-3.3.2/spark-3.3.2-bin-hadoop3.tgz \ diff --git a/docker/test/integration/runner/requirements.txt b/docker/test/integration/runner/requirements.txt new file mode 100644 index 00000000000..8a77d8abf77 --- /dev/null +++ b/docker/test/integration/runner/requirements.txt @@ -0,0 +1,113 @@ +PyHDFS==0.3.1 +PyJWT==2.3.0 +PyMySQL==1.1.0 +PyNaCl==1.5.0 +PyYAML==5.3.1 +SecretStorage==3.3.1 +argon2-cffi-bindings==21.2.0 +argon2-cffi==23.1.0 +async-timeout==4.0.3 +asyncio==3.4.3 +attrs==23.2.0 +avro==1.10.2 +azure-core==1.30.1 +azure-storage-blob==12.19.0 +bcrypt==4.1.3 +beautifulsoup4==4.12.3 +blinker==1.4 +boto3==1.34.24 +botocore==1.34.101 +bs4==0.0.2 +cassandra-driver==3.29.0 +certifi==2024.2.2 +cffi==1.16.0 +charset-normalizer==3.3.2 +click==8.1.7 +confluent-kafka==2.3.0 +cryptography==3.4.8 +dbus-python==1.2.18 +decorator==5.1.1 +delta-spark==2.3.0 +dict2xml==1.7.4 +dicttoxml==1.7.16 +distro-info==1.1+ubuntu0.2 +distro==1.7.0 +docker-compose==1.29.2 +docker==6.1.3 +dockerpty==0.4.1 +docopt==0.6.2 +exceptiongroup==1.2.1 +execnet==2.1.1 +geomet==0.2.1.post1 +grpcio-tools==1.60.0 +grpcio==1.60.0 +gssapi==1.8.3 +httplib2==0.20.2 +idna==3.7 +importlib-metadata==4.6.4 +iniconfig==2.0.0 +isodate==0.6.1 +jeepney==0.7.1 +jmespath==1.0.1 +jsonschema==3.2.0 +jwcrypto==1.5.6 +kafka-python==2.0.2 +kazoo==2.9.0 +keyring==23.5.0 +krb5==0.5.1 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +lxml==5.1.0 +lz4==4.3.3 +minio==7.2.3 +more-itertools==8.10.0 +nats-py==2.6.0 +oauthlib==3.2.0 +packaging==24.0 +paramiko==3.4.0 +pika==1.2.0 +pip==24.1.1 +pipdeptree==2.23.0 +pluggy==1.5.0 +protobuf==4.25.2 +psycopg2-binary==2.9.6 +py4j==0.10.9.5 +py==1.11.0 +pycparser==2.22 +pycryptodome==3.20.0 +pymongo==3.11.0 +pyparsing==2.4.7 +pyrsistent==0.20.0 +pyspark==3.3.2 +pyspnego==0.10.2 +pytest-order==1.0.0 +pytest-random==0.2 +pytest-repeat==0.9.3 +pytest-reportlog==0.4.0 +pytest-timeout==2.2.0 +pytest-xdist==3.5.0 +pytest==7.4.4 +python-apt==2.4.0+ubuntu3 +python-dateutil==2.9.0.post0 +python-dotenv==0.21.1 +pytz==2023.3.post1 +redis==5.0.1 +requests-kerberos==0.14.0 +requests==2.31.0 +retry==0.9.2 +s3transfer==0.10.1 +setuptools==59.6.0 +simplejson==3.19.2 +six==1.16.0 +soupsieve==2.5 +texttable==1.7.0 +tomli==2.0.1 +typing_extensions==4.11.0 +tzlocal==2.1 +unattended-upgrades==0.1 +urllib3==2.0.7 +wadllib==1.3.6 +websocket-client==0.59.0 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/libfuzzer/Dockerfile b/docker/test/libfuzzer/Dockerfile index c9802a0e44e..e6eb2ae336e 100644 --- a/docker/test/libfuzzer/Dockerfile +++ b/docker/test/libfuzzer/Dockerfile @@ -1,3 +1,4 @@ +# docker build -t clickhouse/libfuzzer . ARG FROM_TAG=latest FROM clickhouse/test-base:$FROM_TAG @@ -29,7 +30,8 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN pip3 install Jinja2 +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt COPY * / diff --git a/docker/test/libfuzzer/requirements.txt b/docker/test/libfuzzer/requirements.txt new file mode 100644 index 00000000000..3dce93e023b --- /dev/null +++ b/docker/test/libfuzzer/requirements.txt @@ -0,0 +1,27 @@ +blinker==1.4 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +httplib2==0.20.2 +importlib-metadata==4.6.4 +jeepney==0.7.1 +Jinja2==3.1.4 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +MarkupSafe==2.1.5 +more-itertools==8.10.0 +oauthlib==3.2.0 +packaging==24.1 +pip==24.1.1 +pipdeptree==2.23.0 +PyJWT==2.3.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/libfuzzer/run_libfuzzer.py b/docker/test/libfuzzer/run_libfuzzer.py index 5ed019490d5..fa67805dfa5 100755 --- a/docker/test/libfuzzer/run_libfuzzer.py +++ b/docker/test/libfuzzer/run_libfuzzer.py @@ -27,19 +27,19 @@ def run_fuzzer(fuzzer: str): parser.read(path) if parser.has_section("asan"): - os.environ[ - "ASAN_OPTIONS" - ] = f"{os.environ['ASAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['asan'].items())}" + os.environ["ASAN_OPTIONS"] = ( + f"{os.environ['ASAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['asan'].items())}" + ) if parser.has_section("msan"): - os.environ[ - "MSAN_OPTIONS" - ] = f"{os.environ['MSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['msan'].items())}" + os.environ["MSAN_OPTIONS"] = ( + f"{os.environ['MSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['msan'].items())}" + ) if parser.has_section("ubsan"): - os.environ[ - "UBSAN_OPTIONS" - ] = f"{os.environ['UBSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['ubsan'].items())}" + os.environ["UBSAN_OPTIONS"] = ( + f"{os.environ['UBSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['ubsan'].items())}" + ) if parser.has_section("libfuzzer"): custom_libfuzzer_options = " ".join( diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index 1835900b316..c68a39f6f70 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -23,7 +23,6 @@ RUN apt-get update \ python3 \ python3-dev \ python3-pip \ - python3-setuptools \ rsync \ tree \ tzdata \ @@ -33,12 +32,14 @@ RUN apt-get update \ cargo \ ripgrep \ zstd \ - && pip3 --no-cache-dir install 'clickhouse-driver==0.2.1' scipy \ && apt-get purge --yes python3-dev g++ \ && apt-get autoremove --yes \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* +COPY requirements.txt / +RUN pip3 --no-cache-dir install -r requirements.txt + COPY run.sh / CMD ["bash", "/run.sh"] diff --git a/docker/test/performance-comparison/requirements.txt b/docker/test/performance-comparison/requirements.txt new file mode 100644 index 00000000000..932527cc022 --- /dev/null +++ b/docker/test/performance-comparison/requirements.txt @@ -0,0 +1,32 @@ +blinker==1.4 +clickhouse-driver==0.2.7 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +httplib2==0.20.2 +importlib-metadata==4.6.4 +jeepney==0.7.1 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +more-itertools==8.10.0 +numpy==1.26.3 +oauthlib==3.2.0 +packaging==24.1 +pip==24.1.1 +pipdeptree==2.23.0 +Pygments==2.11.2 +PyJWT==2.3.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +pytz==2023.4 +PyYAML==6.0.1 +scipy==1.12.0 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +tzlocal==2.1 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/sqllogic/Dockerfile b/docker/test/sqllogic/Dockerfile index 1ea1e52e6fa..1425e12cd84 100644 --- a/docker/test/sqllogic/Dockerfile +++ b/docker/test/sqllogic/Dockerfile @@ -18,11 +18,8 @@ RUN apt-get update --yes \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN pip3 install \ - numpy \ - pyodbc \ - deepdiff \ - sqlglot +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.6.20200320/clickhouse-odbc-1.1.6-Linux.tar.gz" diff --git a/docker/test/sqllogic/requirements.txt b/docker/test/sqllogic/requirements.txt new file mode 100644 index 00000000000..abc0a368659 --- /dev/null +++ b/docker/test/sqllogic/requirements.txt @@ -0,0 +1,30 @@ +blinker==1.4 +cryptography==3.4.8 +dbus-python==1.2.18 +deepdiff==7.0.1 +distro==1.7.0 +httplib2==0.20.2 +importlib-metadata==4.6.4 +jeepney==0.7.1 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +more-itertools==8.10.0 +numpy==1.26.4 +oauthlib==3.2.0 +ordered-set==4.1.0 +packaging==24.1 +pip==24.1.1 +pipdeptree==2.23.0 +PyJWT==2.3.0 +pyodbc==5.1.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +sqlglot==23.16.0 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/sqltest/Dockerfile b/docker/test/sqltest/Dockerfile index 7f59f65761f..71d915b0c7a 100644 --- a/docker/test/sqltest/Dockerfile +++ b/docker/test/sqltest/Dockerfile @@ -14,9 +14,8 @@ RUN apt-get update --yes \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN pip3 install \ - pyyaml \ - clickhouse-driver +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt ARG sqltest_repo="https://github.com/elliotchance/sqltest/" diff --git a/docker/test/sqltest/requirements.txt b/docker/test/sqltest/requirements.txt new file mode 100644 index 00000000000..4a0ae3edbac --- /dev/null +++ b/docker/test/sqltest/requirements.txt @@ -0,0 +1,29 @@ +blinker==1.4 +clickhouse-driver==0.2.7 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +httplib2==0.20.2 +importlib-metadata==4.6.4 +jeepney==0.7.1 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +more-itertools==8.10.0 +oauthlib==3.2.0 +packaging==24.1 +pip==24.1.1 +pipdeptree==2.23.0 +PyJWT==2.3.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +pytz==2024.1 +PyYAML==6.0.1 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +tzlocal==5.2 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/stateful/Dockerfile b/docker/test/stateful/Dockerfile index 355e70f180e..0daf88cad7e 100644 --- a/docker/test/stateful/Dockerfile +++ b/docker/test/stateful/Dockerfile @@ -6,7 +6,6 @@ FROM clickhouse/stateless-test:$FROM_TAG RUN apt-get update -y \ && env DEBIAN_FRONTEND=noninteractive \ apt-get install --yes --no-install-recommends \ - python3-requests \ nodejs \ npm \ && apt-get clean \ diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 09a9f51084b..35ffeee5438 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -16,11 +16,17 @@ dpkg -i package_folder/clickhouse-client_*.deb ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test +# shellcheck disable=SC1091 +source /utils.lib + # install test configs /usr/share/clickhouse-test/config/install.sh azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --silent --inMemoryPersistence & + ./setup_minio.sh stateful +./mc admin trace clickminio > /test_output/minio.log & +MC_ADMIN_PID=$! config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml @@ -213,6 +219,10 @@ function run_tests() ADDITIONAL_OPTIONS+=('--s3-storage') fi + if [[ -n "$USE_AZURE_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then + ADDITIONAL_OPTIONS+=('--azure-blob-storage') + fi + if [[ -n "$USE_DATABASE_ORDINARY" ]] && [[ "$USE_DATABASE_ORDINARY" -eq 1 ]]; then ADDITIONAL_OPTIONS+=('--db-engine=Ordinary') fi @@ -247,6 +257,8 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] sudo clickhouse stop --pid-path /var/run/clickhouse-server2 ||: fi +# Kill minio admin client to stop collecting logs +kill $MC_ADMIN_PID rg -Fa "" /var/log/clickhouse-server/clickhouse-server.log ||: zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst ||: @@ -268,3 +280,5 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] mv /var/log/clickhouse-server/stderr1.log /test_output/ ||: mv /var/log/clickhouse-server/stderr2.log /test_output/ ||: fi + +collect_core_dumps diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index c3d80a7334b..a0e5513a3a2 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -25,10 +25,7 @@ RUN apt-get update -y \ openssl \ postgresql-client \ python3 \ - python3-lxml \ python3-pip \ - python3-requests \ - python3-termcolor \ qemu-user-static \ sqlite3 \ sudo \ @@ -51,7 +48,8 @@ RUN curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v${PR && unzip protoc-${PROTOC_VERSION}-linux-x86_64.zip -d /usr/local \ && rm protoc-${PROTOC_VERSION}-linux-x86_64.zip -RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3 pyarrow==15.0.0 +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && cd /tmp/clickhouse-odbc-tmp \ @@ -88,6 +86,7 @@ RUN curl -L --no-verbose -O 'https://archive.apache.org/dist/hadoop/common/hadoo ENV MINIO_ROOT_USER="clickhouse" ENV MINIO_ROOT_PASSWORD="clickhouse" ENV EXPORT_S3_STORAGE_POLICIES=1 +ENV CLICKHOUSE_GRPC_CLIENT="/usr/share/clickhouse-utils/grpc-client/clickhouse-grpc-client.py" RUN npm install -g azurite@3.30.0 \ && npm install -g tslib && npm install -g node diff --git a/docker/test/stateless/requirements.txt b/docker/test/stateless/requirements.txt new file mode 100644 index 00000000000..74860d5fec3 --- /dev/null +++ b/docker/test/stateless/requirements.txt @@ -0,0 +1,53 @@ +awscli==1.22.34 +blinker==1.4 +botocore==1.23.34 +certifi==2020.6.20 +chardet==4.0.0 +colorama==0.4.4 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +docutils==0.17.1 +grpcio==1.47.0 +gyp==0.1 +httplib2==0.20.2 +idna==3.3 +importlib-metadata==4.6.4 +jeepney==0.7.1 +Jinja2==3.1.3 +jmespath==0.10.0 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +lxml==4.8.0 +MarkupSafe==2.1.5 +more-itertools==8.10.0 +numpy==1.26.3 +oauthlib==3.2.0 +packaging==24.1 +pandas==1.5.3 +pip==24.1.1 +pipdeptree==2.23.0 +protobuf==4.25.3 +pyarrow==15.0.0 +pyasn1==0.4.8 +PyJWT==2.3.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +python-dateutil==2.8.1 +pytz==2024.1 +PyYAML==6.0.1 +requests==2.32.3 +roman==3.3 +rsa==4.8 +s3transfer==0.5.0 +scipy==1.12.0 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +termcolor==1.1.0 +urllib3==1.26.5 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 3ce489b9e0e..c0bfc12bc75 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -6,19 +6,30 @@ source /setup_export_logs.sh # fail on errors, verbose and export all env variables set -e -x -a +MAX_RUN_TIME=${MAX_RUN_TIME:-7200} +MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 7200 : MAX_RUN_TIME)) + +USE_DATABASE_REPLICATED=${USE_DATABASE_REPLICATED:=0} +USE_SHARED_CATALOG=${USE_SHARED_CATALOG:=0} + +RUN_SEQUENTIAL_TESTS_IN_PARALLEL=1 + +if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] || [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then + RUN_SEQUENTIAL_TESTS_IN_PARALLEL=0 +fi + # Choose random timezone for this test run. # # NOTE: that clickhouse-test will randomize session_timezone by itself as well # (it will choose between default server timezone and something specific). TZ="$(rg -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)" -echo "Choosen random timezone $TZ" +echo "Chosen random timezone $TZ" ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone dpkg -i package_folder/clickhouse-common-static_*.deb dpkg -i package_folder/clickhouse-common-static-dbg_*.deb -# Accept failure in the next two commands until 24.4 is released (for compatibility and Bugfix validation run) -dpkg -i package_folder/clickhouse-odbc-bridge_*.deb || true -dpkg -i package_folder/clickhouse-library-bridge_*.deb || true +dpkg -i package_folder/clickhouse-odbc-bridge_*.deb +dpkg -i package_folder/clickhouse-library-bridge_*.deb dpkg -i package_folder/clickhouse-server_*.deb dpkg -i package_folder/clickhouse-client_*.deb @@ -43,6 +54,9 @@ source /utils.lib /usr/share/clickhouse-test/config/install.sh ./setup_minio.sh stateless +./mc admin trace clickminio > /test_output/minio.log & +MC_ADMIN_PID=$! + ./setup_hdfs_minicluster.sh config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml @@ -55,12 +69,6 @@ if [[ -n "$BUGFIX_VALIDATE_CHECK" ]] && [[ "$BUGFIX_VALIDATE_CHECK" -eq 1 ]]; th rm /etc/clickhouse-server/users.d/s3_cache_new.xml rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml - #todo: remove these after 24.3 released. - sudo sed -i "s|azure<|azure_blob_storage<|" /etc/clickhouse-server/config.d/azure_storage_conf.xml - - #todo: remove these after 24.3 released. - sudo sed -i "s|local<|local_blob_storage<|" /etc/clickhouse-server/config.d/storage_conf.xml - function remove_keeper_config() { sudo sed -i "/<$1>$2<\/$1>/d" /etc/clickhouse-server/config.d/keeper_port.xml @@ -93,10 +101,57 @@ if [ "$NUM_TRIES" -gt "1" ]; then mkdir -p /var/run/clickhouse-server fi +# Run a CH instance to execute sequential tests on it in parallel with all other tests. +if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then + mkdir -p /var/run/clickhouse-server3 /etc/clickhouse-server3 /var/lib/clickhouse3 + cp -r -L /etc/clickhouse-server/* /etc/clickhouse-server3/ + + sudo chown clickhouse:clickhouse /var/run/clickhouse-server3 /var/lib/clickhouse3 /etc/clickhouse-server3/ + sudo chown -R clickhouse:clickhouse /etc/clickhouse-server3/* + + function replace(){ + sudo find /etc/clickhouse-server3/ -type f -name '*.xml' -exec sed -i "$1" {} \; + } + + replace "s|9000|19000|g" + replace "s|9440|19440|g" + replace "s|9988|19988|g" + replace "s|9234|19234|g" + replace "s|9181|19181|g" + replace "s|8443|18443|g" + replace "s|9000|19000|g" + replace "s|9181|19181|g" + replace "s|9440|19440|g" + replace "s|9010|19010|g" + replace "s|9004|19004|g" + replace "s|9005|19005|g" + replace "s|9009|19009|g" + replace "s|8123|18123|g" + replace "s|/var/lib/clickhouse/|/var/lib/clickhouse3/|g" + replace "s|/etc/clickhouse-server/|/etc/clickhouse-server3/|g" + # distributed cache + replace "s|10001|10003|g" + replace "s|10002|10004|g" + + sudo -E -u clickhouse /usr/bin/clickhouse server --daemon --config /etc/clickhouse-server3/config.xml \ + --pid-file /var/run/clickhouse-server3/clickhouse-server.pid \ + -- --path /var/lib/clickhouse3/ --logger.stderr /var/log/clickhouse-server/stderr3.log \ + --logger.log /var/log/clickhouse-server/clickhouse-server3.log --logger.errorlog /var/log/clickhouse-server/clickhouse-server3.err.log \ + --tcp_port 19000 --tcp_port_secure 19440 --http_port 18123 --https_port 18443 --interserver_http_port 19009 --tcp_with_proxy_port 19010 \ + --prometheus.port 19988 --keeper_server.raft_configuration.server.port 19234 --keeper_server.tcp_port 19181 \ + --mysql_port 19004 --postgresql_port 19005 + + for _ in {1..100} + do + clickhouse-client --port 19000 --query "SELECT 1" && break + sleep 1 + done +fi + # simplest way to forward env variables to server sudo -E -u clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml --daemon --pid-file /var/run/clickhouse-server/clickhouse-server.pid -if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then +if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then sudo sed -i "s|/var/lib/clickhouse/filesystem_caches/|/var/lib/clickhouse/filesystem_caches_1/|" /etc/clickhouse-server1/config.d/filesystem_caches_path.xml sudo sed -i "s|/var/lib/clickhouse/filesystem_caches/|/var/lib/clickhouse/filesystem_caches_2/|" /etc/clickhouse-server2/config.d/filesystem_caches_path.xml @@ -133,7 +188,7 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] MAX_RUN_TIME=$((MAX_RUN_TIME != 0 ? MAX_RUN_TIME : 9000)) # set to 2.5 hours if 0 (unlimited) fi -if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then +if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then sudo cat /etc/clickhouse-server1/config.d/filesystem_caches_path.xml \ | sed "s|/var/lib/clickhouse/filesystem_caches/|/var/lib/clickhouse/filesystem_caches_1/|" \ > /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp @@ -207,21 +262,21 @@ function run_tests() if [[ -n "$USE_AZURE_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then # to disable the same tests - ADDITIONAL_OPTIONS+=('--s3-storage') + ADDITIONAL_OPTIONS+=('--azure-blob-storage') # azurite is slow, but with these two settings it can be super slow ADDITIONAL_OPTIONS+=('--no-random-settings') ADDITIONAL_OPTIONS+=('--no-random-merge-tree-settings') fi - if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then + if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then ADDITIONAL_OPTIONS+=('--shared-catalog') fi - if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then + if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then ADDITIONAL_OPTIONS+=('--replicated-database') # Too many tests fail for DatabaseReplicated in parallel. ADDITIONAL_OPTIONS+=('--jobs') - ADDITIONAL_OPTIONS+=('2') + ADDITIONAL_OPTIONS+=('3') elif [[ 1 == $(clickhouse-client --query "SELECT value LIKE '%SANITIZE_COVERAGE%' FROM system.build_options WHERE name = 'CXX_FLAGS'") ]]; then # Coverage on a per-test basis could only be collected sequentially. # Do not set the --jobs parameter. @@ -229,7 +284,11 @@ function run_tests() else # All other configurations are OK. ADDITIONAL_OPTIONS+=('--jobs') - ADDITIONAL_OPTIONS+=('8') + ADDITIONAL_OPTIONS+=('5') + fi + + if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then + ADDITIONAL_OPTIONS+=('--run-sequential-tests-in-parallel') fi if [[ -n "$RUN_BY_HASH_NUM" ]] && [[ -n "$RUN_BY_HASH_TOTAL" ]]; then @@ -253,7 +312,7 @@ function run_tests() try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" set +e - clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ + timeout -k 60m -s TERM --preserve-status 140m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ --no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ | tee -a test_output/test_result.txt @@ -262,14 +321,17 @@ function run_tests() export -f run_tests + +# This should be enough to setup job and collect artifacts +TIMEOUT=$((MAX_RUN_TIME - 700)) if [ "$NUM_TRIES" -gt "1" ]; then # We don't run tests with Ordinary database in PRs, only in master. # So run new/changed tests with Ordinary at least once in flaky check. - timeout_with_logging "$MAX_RUN_TIME" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \ + timeout_with_logging "$TIMEOUT" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \ | sed 's/All tests have finished//' | sed 's/No tests were run//' ||: fi -timeout_with_logging "$MAX_RUN_TIME" bash -c run_tests ||: +timeout_with_logging "$TIMEOUT" bash -c run_tests ||: echo "Files in current directory" ls -la ./ @@ -290,7 +352,7 @@ do err=$(clickhouse-client -q "select * from system.$table into outfile '/test_output/$table.tsv.gz' format TSVWithNamesAndTypes") echo "$err" [[ "0" != "${#err}" ]] && failed_to_save_logs=1 - if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then + if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then err=$( { clickhouse-client --port 19000 -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst; } 2>&1 ) echo "$err" [[ "0" != "${#err}" ]] && failed_to_save_logs=1 @@ -299,7 +361,7 @@ do [[ "0" != "${#err}" ]] && failed_to_save_logs=1 fi - if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then + if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then err=$( { clickhouse-client --port 19000 -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst; } 2>&1 ) echo "$err" [[ "0" != "${#err}" ]] && failed_to_save_logs=1 @@ -310,19 +372,33 @@ done # Why do we read data with clickhouse-local? # Because it's the simplest way to read it when server has crashed. sudo clickhouse stop ||: -if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then + +if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then + sudo clickhouse stop --pid-path /var/run/clickhouse-server3 ||: +fi + +if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then sudo clickhouse stop --pid-path /var/run/clickhouse-server1 ||: sudo clickhouse stop --pid-path /var/run/clickhouse-server2 ||: fi -if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then +if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then sudo clickhouse stop --pid-path /var/run/clickhouse-server1 ||: fi +# Kill minio admin client to stop collecting logs +kill $MC_ADMIN_PID + rg -Fa "" /var/log/clickhouse-server/clickhouse-server.log ||: rg -A50 -Fa "============" /var/log/clickhouse-server/stderr.log ||: zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst & +if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then + rg -Fa "" /var/log/clickhouse-server3/clickhouse-server.log ||: + rg -A50 -Fa "============" /var/log/clickhouse-server3/stderr.log ||: + zstd --threads=0 < /var/log/clickhouse-server3/clickhouse-server.log > /test_output/clickhouse-server3.log.zst & +fi + data_path_config="--path=/var/lib/clickhouse/" if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then # We need s3 storage configuration (but it's more likely that clickhouse-local will fail for some reason) @@ -342,12 +418,17 @@ if [ $failed_to_save_logs -ne 0 ]; then for table in query_log zookeeper_log trace_log transactions_info_log metric_log blob_storage_log error_log do clickhouse-local "$data_path_config" --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||: - if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then + + if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then + clickhouse-local --path /var/lib/clickhouse3/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.3.tsv.zst ||: + fi + + if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||: clickhouse-local --path /var/lib/clickhouse2/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst ||: fi - if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then + if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||: fi done @@ -383,7 +464,14 @@ rm -rf /var/lib/clickhouse/data/system/*/ tar -chf /test_output/store.tar /var/lib/clickhouse/store ||: tar -chf /test_output/metadata.tar /var/lib/clickhouse/metadata/*.sql ||: -if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then +if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then + rm -rf /var/lib/clickhouse3/data/system/*/ + tar -chf /test_output/store.tar /var/lib/clickhouse3/store ||: + tar -chf /test_output/metadata.tar /var/lib/clickhouse3/metadata/*.sql ||: +fi + + +if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then rg -Fa "" /var/log/clickhouse-server/clickhouse-server1.log ||: rg -Fa "" /var/log/clickhouse-server/clickhouse-server2.log ||: zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server1.log > /test_output/clickhouse-server1.log.zst ||: @@ -394,9 +482,11 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] tar -chf /test_output/coordination2.tar /var/lib/clickhouse2/coordination ||: fi -if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then +if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then rg -Fa "" /var/log/clickhouse-server/clickhouse-server1.log ||: zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server1.log > /test_output/clickhouse-server1.log.zst ||: mv /var/log/clickhouse-server/stderr1.log /test_output/ ||: tar -chf /test_output/coordination1.tar /var/lib/clickhouse1/coordination ||: fi + +collect_core_dumps diff --git a/docker/test/stateless/setup_hdfs_minicluster.sh b/docker/test/stateless/setup_hdfs_minicluster.sh index 6671e73562a..15a54f59096 100755 --- a/docker/test/stateless/setup_hdfs_minicluster.sh +++ b/docker/test/stateless/setup_hdfs_minicluster.sh @@ -10,7 +10,7 @@ cd hadoop-3.3.1 export JAVA_HOME=/usr mkdir -p target/test/data chown clickhouse ./target/test/data -sudo -E -u clickhouse bin/mapred minicluster -format -nomr -nnport 12222 >> /test_output/garbage.log 2>&1 & +sudo -E -u clickhouse bin/mapred minicluster -format -nomr -nnport 12222 >> /test_output/hdfs_minicluster.log 2>&1 & while ! nc -z localhost 12222; do sleep 1 diff --git a/docker/test/stateless/stress_tests.lib b/docker/test/stateless/stress_tests.lib index c069ccbdd8d..682da1df837 100644 --- a/docker/test/stateless/stress_tests.lib +++ b/docker/test/stateless/stress_tests.lib @@ -1,8 +1,5 @@ #!/bin/bash -# core.COMM.PID-TID -sysctl kernel.core_pattern='core.%e.%p-%P' - OK="\tOK\t\\N\t" FAIL="\tFAIL\t\\N\t" @@ -315,12 +312,4 @@ function collect_query_and_trace_logs() done } -function collect_core_dumps() -{ - find . -type f -maxdepth 1 -name 'core.*' | while read -r core; do - zstd --threads=0 "$core" - mv "$core.zst" /test_output/ - done -} - # vi: ft=bash diff --git a/docker/test/stateless/utils.lib b/docker/test/stateless/utils.lib index 9b6ab535a90..c3bb8ae9ea4 100644 --- a/docker/test/stateless/utils.lib +++ b/docker/test/stateless/utils.lib @@ -1,5 +1,10 @@ #!/bin/bash +# core.COMM.PID-TID +sysctl kernel.core_pattern='core.%e.%p-%P' +# ASAN doesn't work with suid_dumpable=2 +sysctl fs.suid_dumpable=1 + function run_with_retry() { if [[ $- =~ e ]]; then @@ -38,7 +43,7 @@ function fn_exists() { function timeout_with_logging() { local exit_code=0 - timeout "${@}" || exit_code="${?}" + timeout -s TERM --preserve-status "${@}" || exit_code="${?}" if [[ "${exit_code}" -eq "124" ]] then @@ -48,4 +53,12 @@ function timeout_with_logging() { return $exit_code } +function collect_core_dumps() +{ + find . -type f -maxdepth 1 -name 'core.*' | while read -r core; do + zstd --threads=0 "$core" + mv "$core.zst" /test_output/ + done +} + # vi: ft=bash diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 6d121ba4142..86467394513 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -21,6 +21,9 @@ source /attach_gdb.lib # shellcheck source=../stateless/stress_tests.lib source /stress_tests.lib +# shellcheck disable=SC1091 +source /utils.lib + install_packages package_folder # Thread Fuzzer allows to check more permutations of possible thread scheduling @@ -110,6 +113,15 @@ start_server clickhouse-client --query "SHOW TABLES FROM datasets" clickhouse-client --query "SHOW TABLES FROM test" +if [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" == "1" ]]; then + TEMP_POLICY="s3_cache" +elif [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" == "1" ]]; then + TEMP_POLICY="azure_cache" +else + TEMP_POLICY="default" +fi + + clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, @@ -135,7 +147,7 @@ clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnabl URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) - ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" + ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='$TEMP_POLICY'" clickhouse-client --query "CREATE TABLE test.hits (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, @@ -161,7 +173,7 @@ clickhouse-client --query "CREATE TABLE test.hits (WatchID UInt64, JavaEnable U URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) - ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" + ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='$TEMP_POLICY'" clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDate Date, Sign Int8, IsNew UInt8, VisitID UInt64, UserID UInt64, StartTime DateTime, Duration UInt32, UTCStartTime DateTime, PageViews Int32, Hits Int32, IsBounce UInt8, Referer String, StartURL String, RefererDomain String, StartURLDomain String, @@ -195,7 +207,7 @@ clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDat Market Nested(Type UInt8, GoalID UInt32, OrderID String, OrderPrice Int64, PP UInt32, DirectPlaceID UInt32, DirectOrderID UInt32, DirectBannerID UInt32, GoodID String, GoodName String, GoodQuantity Int32, GoodPrice Int64), IslandID FixedString(16)) ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) - SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" + SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='$TEMP_POLICY'" clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" @@ -211,19 +223,29 @@ clickhouse-client --query "SYSTEM STOP THREAD FUZZER" stop_server # Let's enable S3 storage by default -export USE_S3_STORAGE_FOR_MERGE_TREE=1 export RANDOMIZE_OBJECT_KEY_TYPE=1 export ZOOKEEPER_FAULT_INJECTION=1 export THREAD_POOL_FAULT_INJECTION=1 configure -# But we still need default disk because some tables loaded only into it -sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \ - | sed "s|
s3
|
s3
default|" \ - > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp -mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml -sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml -sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml +if [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" == "1" ]]; then + # But we still need default disk because some tables loaded only into it + sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \ + | sed "s|
s3
|
s3
default|" \ + > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp + mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml + sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml + sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml +elif [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" == "1" ]]; then + # But we still need default disk because some tables loaded only into it + sudo cat /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml \ + | sed "s|
azure
|
azure
default|" \ + > /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml.tmp + mv /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml + sudo chown clickhouse /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml + sudo chgrp clickhouse /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml +fi + sudo cat /etc/clickhouse-server/config.d/logger_trace.xml \ | sed "s|trace|test|" \ diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index 7cd712b73f6..cdc1d1fa095 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -23,22 +23,8 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* # python-magic is the same version as in Ubuntu 22.04 -RUN pip3 install \ - PyGithub \ - black==23.12.0 \ - boto3 \ - codespell==2.2.1 \ - mypy==1.8.0 \ - pylint==3.1.0 \ - python-magic==0.4.24 \ - flake8==4.0.1 \ - requests \ - thefuzz \ - tqdm==4.66.4 \ - types-requests \ - unidiff \ - jwt \ - && rm -rf /root/.cache/pip +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r requirements.txt RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8 ENV LC_ALL en_US.UTF-8 diff --git a/docker/test/style/requirements.txt b/docker/test/style/requirements.txt new file mode 100644 index 00000000000..ed73d0d3636 --- /dev/null +++ b/docker/test/style/requirements.txt @@ -0,0 +1,58 @@ +aiohttp==3.9.5 +aiosignal==1.3.1 +astroid==3.1.0 +async-timeout==4.0.3 +attrs==23.2.0 +black==24.4.2 +boto3==1.34.131 +botocore==1.34.131 +certifi==2024.6.2 +cffi==1.16.0 +charset-normalizer==3.3.2 +click==8.1.7 +codespell==2.2.1 +cryptography==42.0.8 +Deprecated==1.2.14 +dill==0.3.8 +flake8==4.0.1 +frozenlist==1.4.1 +idna==3.7 +isort==5.13.2 +jmespath==1.0.1 +jwt==1.3.1 +mccabe==0.6.1 +multidict==6.0.5 +mypy==1.8.0 +mypy-extensions==1.0.0 +packaging==24.1 +pathspec==0.9.0 +pip==24.1.1 +pipdeptree==2.23.0 +platformdirs==4.2.2 +pycodestyle==2.8.0 +pycparser==2.22 +pyflakes==2.4.0 +PyGithub==2.3.0 +PyJWT==2.8.0 +pylint==3.1.0 +PyNaCl==1.5.0 +python-dateutil==2.9.0.post0 +python-magic==0.4.24 +PyYAML==6.0.1 +rapidfuzz==3.9.3 +requests==2.32.3 +s3transfer==0.10.1 +setuptools==59.6.0 +six==1.16.0 +thefuzz==0.22.1 +tomli==2.0.1 +tomlkit==0.12.5 +tqdm==4.66.4 +types-requests==2.32.0.20240622 +typing_extensions==4.12.2 +unidiff==0.7.5 +urllib3==2.2.2 +wheel==0.37.1 +wrapt==1.16.0 +yamllint==1.26.3 +yarl==1.9.4 diff --git a/docker/test/util/process_functional_tests_result.py b/docker/test/util/process_functional_tests_result.py index fd4cc9f4bf7..4442c9d7d9e 100755 --- a/docker/test/util/process_functional_tests_result.py +++ b/docker/test/util/process_functional_tests_result.py @@ -11,6 +11,7 @@ TIMEOUT_SIGN = "[ Timeout! " UNKNOWN_SIGN = "[ UNKNOWN " SKIPPED_SIGN = "[ SKIPPED " HUNG_SIGN = "Found hung queries in processlist" +SERVER_DIED_SIGN = "Server died, terminating all processes" DATABASE_SIGN = "Database: " SUCCESS_FINISH_SIGNS = ["All tests have finished", "No tests were run"] @@ -25,6 +26,7 @@ def process_test_log(log_path, broken_tests): failed = 0 success = 0 hung = False + server_died = False retries = False success_finish = False test_results = [] @@ -41,6 +43,8 @@ def process_test_log(log_path, broken_tests): if HUNG_SIGN in line: hung = True break + if SERVER_DIED_SIGN in line: + server_died = True if RETRIES_SIGN in line: retries = True if any( @@ -123,6 +127,7 @@ def process_test_log(log_path, broken_tests): failed, success, hung, + server_died, success_finish, retries, test_results, @@ -150,6 +155,7 @@ def process_result(result_path, broken_tests): failed, success, hung, + server_died, success_finish, retries, test_results, @@ -165,6 +171,10 @@ def process_result(result_path, broken_tests): description = "Some queries hung, " state = "failure" test_results.append(("Some queries hung", "FAIL", "0", "")) + elif server_died: + description = "Server died, " + state = "failure" + test_results.append(("Server died", "FAIL", "0", "")) elif not success_finish: description = "Tests are not finished, " state = "failure" @@ -218,5 +228,20 @@ if __name__ == "__main__": state, description, test_results = process_result(args.in_results_dir, broken_tests) logging.info("Result parsed") status = (state, description) + + def test_result_comparator(item): + # sort by status then by check name + order = { + "FAIL": 0, + "Timeout": 1, + "NOT_FAILED": 2, + "BROKEN": 3, + "OK": 4, + "SKIPPED": 5, + } + return order.get(item[1], 10), str(item[0]), item[1] + + test_results.sort(key=test_result_comparator) + write_results(args.out_results_file, args.out_status_file, test_results, status) logging.info("Result written") diff --git a/docs/changelogs/v24.6.2.17-stable.md b/docs/changelogs/v24.6.2.17-stable.md new file mode 100644 index 00000000000..820937f6291 --- /dev/null +++ b/docs/changelogs/v24.6.2.17-stable.md @@ -0,0 +1,26 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.6.2.17-stable (5710a8b5c0c) FIXME as compared to v24.6.1.4423-stable (dcced7c8478) + +#### New Feature +* Backported in [#66002](https://github.com/ClickHouse/ClickHouse/issues/66002): Add AzureQueue storage. [#65458](https://github.com/ClickHouse/ClickHouse/pull/65458) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Improvement +* Backported in [#65898](https://github.com/ClickHouse/ClickHouse/issues/65898): Respect cgroup CPU limit in Keeper. [#65819](https://github.com/ClickHouse/ClickHouse/pull/65819) ([Antonio Andelic](https://github.com/antonio2368)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#65935](https://github.com/ClickHouse/ClickHouse/issues/65935): For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#65907](https://github.com/ClickHouse/ClickHouse/issues/65907): Fix bug with session closing in Keeper. [#65735](https://github.com/ClickHouse/ClickHouse/pull/65735) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#65962](https://github.com/ClickHouse/ClickHouse/issues/65962): Add missing workload identity changes. [#65848](https://github.com/ClickHouse/ClickHouse/pull/65848) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Backported in [#66033](https://github.com/ClickHouse/ClickHouse/issues/66033): Follow up to [#65046](https://github.com/ClickHouse/ClickHouse/issues/65046). [#65928](https://github.com/ClickHouse/ClickHouse/pull/65928) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#66076](https://github.com/ClickHouse/ClickHouse/issues/66076): Fix support of non-const scale arguments in rounding functions. [#65983](https://github.com/ClickHouse/ClickHouse/pull/65983) ([Mikhail Gorshkov](https://github.com/mgorshkov)). +* Backported in [#66017](https://github.com/ClickHouse/ClickHouse/issues/66017): Fix race in s3queue. [#65986](https://github.com/ClickHouse/ClickHouse/pull/65986) ([Kseniia Sumarokova](https://github.com/kssenii)). + diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index 0a1fe58b16f..4b11fb71230 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -226,15 +226,59 @@ Other IDEs you can use are [Sublime Text](https://www.sublimetext.com/), [Visual ## Writing Code {#writing-code} -The description of ClickHouse architecture can be found here: https://clickhouse.com/docs/en/development/architecture/ +Below you can find some quick links which may be useful when writing code for ClickHouse: -The Code Style Guide: https://clickhouse.com/docs/en/development/style/ +- [ClickHouse architecture description](https://clickhouse.com/docs/en/development/architecture/). +- [The code style guide](https://clickhouse.com/docs/en/development/style/). +- [Adding third-party libraries](https://clickhouse.com/docs/en/development/contrib/#adding-third-party-libraries) +- [Writing tests](https://clickhouse.com/docs/en/development/tests/) +- [List of open issues](https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aopen+is%3Aissue+label%3Ahacktoberfest) -Adding third-party libraries: https://clickhouse.com/docs/en/development/contrib/#adding-third-party-libraries +## Writing Documentation {#writing-documentation} -Writing tests: https://clickhouse.com/docs/en/development/tests/ +As part of every pull request which adds a new feature, it is necessary to write documentation for it. If you'd like to preview your documentation changes the instructions for how to build the documentation page locally are available in the README.md file [here](https://github.com/ClickHouse/clickhouse-docs). When adding a new function to ClickHouse you can use the template below as a guide: -List of tasks: https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aopen+is%3Aissue+label%3Ahacktoberfest +```markdown +# newFunctionName + +A short description of the function goes here. It should describe briefly what it does and a typical usage case. + +**Syntax** + +\```sql +newFunctionName(arg1, arg2[, arg3]) +\``` + +**Arguments** + +- `arg1` — Description of the argument. [DataType](../data-types/float.md) +- `arg2` — Description of the argument. [DataType](../data-types/float.md) +- `arg3` — Description of optional argument (optional). [DataType](../data-types/float.md) + +**Implementation Details** + +A description of implementation details if relevant. + +**Returned value** + +- Returns {insert what the function returns here}. [DataType](../data-types/float.md) + +**Example** + +Query: + +\```sql +SELECT 'write your example query here'; +\``` + +Response: + +\```response +┌───────────────────────────────────┐ +│ the result of the query │ +└───────────────────────────────────┘ +\``` +``` ## Test Data {#test-data} diff --git a/docs/en/engines/table-engines/integrations/s3queue.md b/docs/en/engines/table-engines/integrations/s3queue.md index 11181703645..06325fa15fb 100644 --- a/docs/en/engines/table-engines/integrations/s3queue.md +++ b/docs/en/engines/table-engines/integrations/s3queue.md @@ -75,7 +75,7 @@ SETTINGS Possible values: - unordered — With unordered mode, the set of all already processed files is tracked with persistent nodes in ZooKeeper. -- ordered — With ordered mode, only the max name of the successfully consumed file, and the names of files that will be retried after unsuccessful loading attempt are being stored in ZooKeeper. +- ordered — With ordered mode, the files are processed in lexicographic order. It means that if file named 'BBB' was processed at some point and later on a file named 'AA' is added to the bucket, it will be ignored. Only the max name (in lexicographic sense) of the successfully consumed file, and the names of files that will be retried after unsuccessful loading attempt are being stored in ZooKeeper. Default value: `ordered` in versions before 24.6. Starting with 24.6 there is no default value, the setting becomes required to be specified manually. For tables created on earlier versions the default value will remain `Ordered` for compatibility. diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index f0c4e1b0e34..3826e4e9c94 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -993,11 +993,11 @@ They can be used for prewhere optimization only if we enable `set allow_statisti - `TDigest` - Stores distribution of values from numeric columns in [TDigest](https://github.com/tdunning/t-digest) sketch. + [TDigest](https://github.com/tdunning/t-digest) sketches which allow to compute approximate percentiles (e.g. the 90th percentile) for numeric columns. - `Uniq` - - Estimate the number of distinct values of a column by HyperLogLog. + + [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) sketches which provide an estimation how many distinct values a column contains. ## Column-level Settings {#column-level-settings} diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 98e73dec451..6209ef3c8ee 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -15,7 +15,7 @@ You have four options for getting up and running with ClickHouse: - **[ClickHouse Cloud](https://clickhouse.com/cloud/):** The official ClickHouse as a service, - built by, maintained and supported by the creators of ClickHouse - **[Quick Install](#quick-install):** an easy-to-download binary for testing and developing with ClickHouse -- **[Production Deployments](#available-installation-options):** ClickHouse can run on any Linux, FreeBSD, or macOS with x86-64, ARM, or PowerPC64LE CPU architecture +- **[Production Deployments](#available-installation-options):** ClickHouse can run on any Linux, FreeBSD, or macOS with x86-64, modern ARM (ARMv8.2-A up), or PowerPC64LE CPU architecture - **[Docker Image](https://hub.docker.com/r/clickhouse/clickhouse-server/):** use the official Docker image in Docker Hub ## ClickHouse Cloud diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index a81a17e65d6..b91b794d2d6 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1535,6 +1535,10 @@ the columns from input data will be mapped to the columns from the table by thei Otherwise, the first row will be skipped. If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_types_use_header) is set to 1, the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. +If setting [output_format_binary_encode_types_in_binary_format](/docs/en/operations/settings/settings-formats.md/#output_format_binary_encode_types_in_binary_format) is set to 1, +the types in header will be written using [binary encoding](/docs/en/sql-reference/data-types/data-types-binary-encoding.md) instead of strings with type names in RowBinaryWithNamesAndTypes output format. +If setting [input_format_binary_encode_types_in_binary_format](/docs/en/operations/settings/settings-formats.md/#input_format_binary_encode_types_in_binary_format) is set to 1, +the types in header will be read using [binary encoding](/docs/en/sql-reference/data-types/data-types-binary-encoding.md) instead of strings with type names in RowBinaryWithNamesAndTypes input format. ::: ## RowBinaryWithDefaults {#rowbinarywithdefaults} diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index 46c24ad8491..fc861e25e9f 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -84,6 +84,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des - [`compression_method`](/docs/en/sql-reference/statements/create/table.md/#column-compression-codecs) and compression_level - `password` for the file on disk - `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')` + - `use_same_s3_credentials_for_base_backup`: whether base backup to S3 should inherit credentials from the query. Only works with `S3`. - `structure_only`: if enabled, allows to only backup or restore the CREATE statements without the data of tables - `storage_policy`: storage policy for the tables being restored. See [Using Multiple Block Devices for Data Storage](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes). This setting is only applicable to the `RESTORE` command. The specified storage policy applies only to tables with an engine from the `MergeTree` family. - `s3_storage_class`: the storage class used for S3 backup. For example, `STANDARD` diff --git a/docs/en/operations/opentelemetry.md b/docs/en/operations/opentelemetry.md index 70f64d08ba3..fe60ceedc0b 100644 --- a/docs/en/operations/opentelemetry.md +++ b/docs/en/operations/opentelemetry.md @@ -2,15 +2,11 @@ slug: /en/operations/opentelemetry sidebar_position: 62 sidebar_label: Tracing ClickHouse with OpenTelemetry -title: "[experimental] Tracing ClickHouse with OpenTelemetry" +title: "Tracing ClickHouse with OpenTelemetry" --- [OpenTelemetry](https://opentelemetry.io/) is an open standard for collecting traces and metrics from the distributed application. ClickHouse has some support for OpenTelemetry. -:::note -This is an experimental feature that will change in backwards-incompatible ways in future releases. -::: - ## Supplying Trace Context to ClickHouse ClickHouse accepts trace context HTTP headers, as described by the [W3C recommendation](https://www.w3.org/TR/trace-context/). It also accepts trace context over a native protocol that is used for communication between ClickHouse servers or between the client and server. For manual testing, trace context headers conforming to the Trace Context recommendation can be supplied to `clickhouse-client` using `--opentelemetry-traceparent` and `--opentelemetry-tracestate` flags. diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 9879ee35612..7278b91f90d 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -974,6 +974,13 @@ Default value: false - [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge) setting +## use_compact_variant_discriminators_serialization {#use_compact_variant_discriminators_serialization} + +Enables compact mode for binary serialization of discriminators in Variant data type. +This mode allows to use significantly less memory for storing discriminators in parts when there is mostly one variant or a lot of NULL values. + +Default value: true + ## merge_workload Used to regulate how resources are utilized and shared between merges and other workloads. Specified value is used as `workload` setting value for background merges of this table. If not specified (empty string), then server setting `merge_workload` is used instead. @@ -1023,7 +1030,7 @@ A table with no primary key represents the extreme case of a single equivalence The fewer and the larger the equivalence classes are, the higher the degree of freedom when re-shuffling rows. -The heuristics applied to find the best row order within each equivalence class is suggested by D. Lemir, O. Kaser in [Reordering columns for smaller indexes](https://doi.org/10.1016/j.ins.2011.02.002) and based on sorting the rows within each equivalence class by ascending cardinality of the non-primary key columns. +The heuristics applied to find the best row order within each equivalence class is suggested by D. Lemire, O. Kaser in [Reordering columns for smaller indexes](https://doi.org/10.1016/j.ins.2011.02.002) and based on sorting the rows within each equivalence class by ascending cardinality of the non-primary key columns. It performs three steps: 1. Find all equivalence classes based on the row values in primary key columns. 2. For each equivalence class, calculate (usually estimate) the cardinalities of the non-primary-key columns. diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 530023df5b7..f8b40cd81ac 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -1951,6 +1951,18 @@ The maximum allowed size for String in RowBinary format. It prevents allocating Default value: `1GiB`. +### output_format_binary_encode_types_in_binary_format {#output_format_binary_encode_types_in_binary_format} + +Write data types in [binary format](../../sql-reference/data-types/data-types-binary-encoding.md) instead of type names in RowBinaryWithNamesAndTypes output format. + +Disabled by default. + +### input_format_binary_decode_types_in_binary_format {#input_format_binary_decode_types_in_binary_format} + +Read data types in [binary format](../../sql-reference/data-types/data-types-binary-encoding.md) instead of type names in RowBinaryWithNamesAndTypes input format. + +Disabled by default. + ## Native format settings {#native-format-settings} ### input_format_native_allow_types_conversion {#input_format_native_allow_types_conversion} @@ -1958,3 +1970,15 @@ Default value: `1GiB`. Allow types conversion in Native input format between columns from input data and requested columns. Enabled by default. + +### output_format_native_encode_types_in_binary_format {#output_format_native_encode_types_in_binary_format} + +Write data types in [binary format](../../sql-reference/data-types/data-types-binary-encoding.md) instead of type names in Native output format. + +Disabled by default. + +### input_format_native_decode_types_in_binary_format {#input_format_native_decode_types_in_binary_format} + +Read data types in [binary format](../../sql-reference/data-types/data-types-binary-encoding.md) instead of type names in Native input format. + +Disabled by default. \ No newline at end of file diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 3d6d776f4da..c3f697c3bdc 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1170,6 +1170,10 @@ Data in the VALUES clause of INSERT queries is processed by a separate stream pa Default value: 262144 (= 256 KiB). +:::note +`max_query_size` cannot be set within an SQL query (e.g., `SELECT now() SETTINGS max_query_size=10000`) because ClickHouse needs to allocate a buffer to parse the query, and this buffer size is determined by the `max_query_size` setting, which must be configured before the query is executed. +::: + ## max_parser_depth {#max_parser_depth} Limits maximum recursion depth in the recursive descent parser. Allows controlling the stack size. @@ -1354,12 +1358,25 @@ Connection pool size for PostgreSQL table engine and database engine. Default value: 16 +## postgresql_connection_attempt_timeout {#postgresql-connection-attempt-timeout} + +Connection timeout in seconds of a single attempt to connect PostgreSQL end-point. +The value is passed as a `connect_timeout` parameter of the connection URL. + +Default value: `2`. + ## postgresql_connection_pool_wait_timeout {#postgresql-connection-pool-wait-timeout} Connection pool push/pop timeout on empty pool for PostgreSQL table engine and database engine. By default it will block on empty pool. Default value: 5000 +## postgresql_connection_pool_retries {#postgresql-connection-pool-retries} + +The maximum number of retries to establish a connection with the PostgreSQL end-point. + +Default value: `2`. + ## postgresql_connection_pool_auto_close_connection {#postgresql-connection-pool-auto-close-connection} Close connection before returning connection to the pool. @@ -2536,7 +2553,7 @@ Possible values: - 0 — Optimization disabled. - 1 — Optimization enabled. -Default value: `0`. +Default value: `1`. ## optimize_trivial_count_query {#optimize-trivial-count-query} diff --git a/docs/en/operations/startup-scripts.md b/docs/en/operations/startup-scripts.md new file mode 100644 index 00000000000..91aa4772bcf --- /dev/null +++ b/docs/en/operations/startup-scripts.md @@ -0,0 +1,30 @@ +--- +slug: /en/operations/startup-scripts +sidebar_label: Startup Scripts +--- + +# Startup Scripts + +ClickHouse can run arbitrary SQL queries from the server configuration during startup. This can be useful for migrations or automatic schema creation. + +```xml + + + + CREATE ROLE OR REPLACE test_role + + + CREATE TABLE TestTable (id UInt64) ENGINE=TinyLog + SELECT 1; + + + +``` + +ClickHouse executes all queries from the `startup_scripts` sequentially in the specified order. If any of the queries fail, the execution of the following queries won't be interrupted. + +You can specify a conditional query in the config. In that case, the corresponding query executes only when the condition query returns the value `1` or `true`. + +:::note +If the condition query returns any other value than `1` or `true`, the result will be interpreted as `false`, and the corresponding won't be executed. +::: diff --git a/docs/en/operations/system-tables/metrics.md b/docs/en/operations/system-tables/metrics.md index 83ce817b7db..f253b164e2a 100644 --- a/docs/en/operations/system-tables/metrics.md +++ b/docs/en/operations/system-tables/metrics.md @@ -357,7 +357,7 @@ Number of currently running inserts to Kafka Number of alive connections -### KeeperOutstandingRequets +### KeeperOutstandingRequests Number of outstanding requests diff --git a/docs/en/operations/utilities/clickhouse-disks.md b/docs/en/operations/utilities/clickhouse-disks.md index 76db9e41836..e22bc06b641 100644 --- a/docs/en/operations/utilities/clickhouse-disks.md +++ b/docs/en/operations/utilities/clickhouse-disks.md @@ -4,35 +4,56 @@ sidebar_position: 59 sidebar_label: clickhouse-disks --- -# clickhouse-disks +# Clickhouse-disks -A utility providing filesystem-like operations for ClickHouse disks. +A utility providing filesystem-like operations for ClickHouse disks. It can work in both interactive and not interactive modes. -Program-wide options: +## Program-wide options * `--config-file, -C` -- path to ClickHouse config, defaults to `/etc/clickhouse-server/config.xml`. * `--save-logs` -- Log progress of invoked commands to `/var/log/clickhouse-server/clickhouse-disks.log`. * `--log-level` -- What [type](../server-configuration-parameters/settings#server_configuration_parameters-logger) of events to log, defaults to `none`. * `--disk` -- what disk to use for `mkdir, move, read, write, remove` commands. Defaults to `default`. +* `--query, -q` -- single query that can be executed without launching interactive mode +* `--help, -h` -- print all the options and commands with description + +## Default Disks +After the launch two disks are initialized. The first one is a disk `local` that is supposed to imitate local file system from which clickhouse-disks utility was launched. The second one is a disk `default` that is mounted to the local filesystem in the directory that can be found in config as a parameter `clickhouse/path` (default value is `/var/lib/clickhouse`). + +## Clickhouse-disks state +For each disk that was added the utility stores current directory (as in a usual filesystem). User can change current directory and switch between disks. + +State is reflected in a prompt "`disk_name`:`path_name`" ## Commands -* `copy [--disk-from d1] [--disk-to d2] `. - Recursively copy data from `FROM_PATH` at disk `d1` (defaults to `disk` value if not provided) - to `TO_PATH` at disk `d2` (defaults to `disk` value if not provided). -* `move `. - Move file or directory from `FROM_PATH` to `TO_PATH`. -* `remove `. - Remove `PATH` recursively. -* `link `. - Create a hardlink from `FROM_PATH` to `TO_PATH`. -* `list [--recursive] ...` - List files at `PATH`s. Non-recursive by default. -* `list-disks`. +In these documentation file all mandatory positional arguments are referred as ``, named arguments are referred as `[--parameter value]`. All positional parameters could be mentioned as a named parameter with a corresponding name. + +* `cd (change-dir, change_dir) [--disk disk] ` + Change directory to path `path` on disk `disk` (default value is a current disk). No disk switching happens. +* `copy (cp) [--disk-from disk_1] [--disk-to disk_2] `. + Recursively copy data from `path-from` at disk `disk_1` (default value is a current disk (parameter `disk` in a non-interactive mode)) + to `path-to` at disk `disk_2` (default value is a current disk (parameter `disk` in a non-interactive mode)). +* `current_disk_with_path (current, current_disk, current_path)` + Print current state in format: + `Disk: "current_disk" Path: "current path on current disk"` +* `help []` + Print help message about command `command`. If `command` is not specified print information about all commands. +* `move (mv) `. + Move file or directory from `path-from` to `path-to` within current disk. +* `remove (rm, delete) `. + Remove `path` recursively on a current disk. +* `link (ln) `. + Create a hardlink from `path-from` to `path-to` on a current disk. +* `list (ls) [--recursive] ` + List files at `path`s on a current disk. Non-recursive by default. +* `list-disks (list_disks, ls-disks, ls_disks)`. List disks names. -* `mkdir [--recursive] `. +* `mkdir [--recursive] ` on a current disk. Create a directory. Non-recursive by default. -* `read: []` - Read a file from `FROM_PATH` to `TO_PATH` (`stdout` if not supplied). -* `write [FROM_PATH] `. - Write a file from `FROM_PATH` (`stdin` if not supplied) to `TO_PATH`. +* `read (r) [--path-to path]` + Read a file from `path-from` to `path` (`stdout` if not supplied). +* `switch-disk [--path path] ` + Switch to disk `disk` on path `path` (if `path` is not specified default value is a previous path on disk `disk`). +* `write (w) [--path-from path] `. + Write a file from `path` (`stdin` if `path` is not supplied, input must finish by Ctrl+D) to `path-to`. diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md index f19643a3fa5..c20e4fc3b09 100644 --- a/docs/en/operations/utilities/clickhouse-local.md +++ b/docs/en/operations/utilities/clickhouse-local.md @@ -16,7 +16,7 @@ sidebar_label: clickhouse-local While `clickhouse-local` is a great tool for development and testing purposes, and for processing files, it is not suitable for serving end users or applications. In these scenarios, it is recommended to use the open-source [ClickHouse](https://clickhouse.com/docs/en/install). ClickHouse is a powerful OLAP database that is designed to handle large-scale analytical workloads. It provides fast and efficient processing of complex queries on large datasets, making it ideal for use in production environments where high-performance is critical. Additionally, ClickHouse offers a wide range of features such as replication, sharding, and high availability, which are essential for scaling up to handle large datasets and serving applications. If you need to handle larger datasets or serve end users or applications, we recommend using open-source ClickHouse instead of `clickhouse-local`. -Please read the docs below that show example use cases for `clickhouse-local`, such as [querying local CSVs](#query-data-in-a-csv-file-using-sql) or [reading a parquet file in S3](#query-data-in-a-parquet-file-in-aws-s3). +Please read the docs below that show example use cases for `clickhouse-local`, such as [querying local file](#query_data_in_file) or [reading a parquet file in S3](#query-data-in-a-parquet-file-in-aws-s3). ## Download clickhouse-local diff --git a/docs/en/sql-reference/aggregate-functions/index.md b/docs/en/sql-reference/aggregate-functions/index.md index 96bf0c5d93b..5056ef2c7aa 100644 --- a/docs/en/sql-reference/aggregate-functions/index.md +++ b/docs/en/sql-reference/aggregate-functions/index.md @@ -18,7 +18,7 @@ ClickHouse also supports: During aggregation, all `NULL` arguments are skipped. If the aggregation has several arguments it will ignore any row in which one or more of them are NULL. -There is an exception to this rule, which are the functions [`first_value`](../../sql-reference/aggregate-functions/reference/first_value.md), [`last_value`](../../sql-reference/aggregate-functions/reference/last_value.md) and their aliases when followed by the modifier `RESPECT NULLS`: `FIRST_VALUE(b) RESPECT NULLS`. +There is an exception to this rule, which are the functions [`first_value`](../../sql-reference/aggregate-functions/reference/first_value.md), [`last_value`](../../sql-reference/aggregate-functions/reference/last_value.md) and their aliases (`any` and `anyLast` respectively) when followed by the modifier `RESPECT NULLS`. For example, `FIRST_VALUE(b) RESPECT NULLS`. **Examples:** diff --git a/docs/en/sql-reference/aggregate-functions/reference/aggthrow.md b/docs/en/sql-reference/aggregate-functions/reference/aggthrow.md new file mode 100644 index 00000000000..fdbfd5b9e41 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/aggthrow.md @@ -0,0 +1,37 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/aggthrow +sidebar_position: 101 +--- + +# aggThrow + +This function can be used for the purpose of testing exception safety. It will throw an exception on creation with the specified probability. + +**Syntax** + +```sql +aggThrow(throw_prob) +``` + +**Arguments** + +- `throw_prob` — Probability to throw on creation. [Float64](../../data-types/float.md). + +**Returned value** + +- An exception: `Code: 503. DB::Exception: Aggregate function aggThrow has thrown exception successfully`. + +**Example** + +Query: + +```sql +SELECT number % 2 AS even, aggThrow(number) FROM numbers(10) GROUP BY even; +``` + +Result: + +```response +Received exception: +Code: 503. DB::Exception: Aggregate function aggThrow has thrown exception successfully: While executing AggregatingTransform. (AGGREGATE_FUNCTION_THROW) +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/any.md b/docs/en/sql-reference/aggregate-functions/reference/any.md index cdff7dde4a9..972263585f2 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/any.md +++ b/docs/en/sql-reference/aggregate-functions/reference/any.md @@ -5,12 +5,12 @@ sidebar_position: 102 # any -Selects the first encountered value of a column. +Selects the first encountered value of a column, ignoring any `NULL` values. **Syntax** ```sql -any(column) +any(column) [RESPECT NULLS] ``` Aliases: `any_value`, [`first_value`](../reference/first_value.md). @@ -20,7 +20,9 @@ Aliases: `any_value`, [`first_value`](../reference/first_value.md). **Returned value** -By default, it ignores NULL values and returns the first NOT NULL value found in the column. Like [`first_value`](../../../sql-reference/aggregate-functions/reference/first_value.md) it supports `RESPECT NULLS`, in which case it will select the first value passed, independently on whether it's NULL or not. +:::note +Supports the `RESPECT NULLS` modifier after the function name. Using this modifier will ensure the function selects the first value passed, regardless of whether it is `NULL` or not. +::: :::note The return type of the function is the same as the input, except for LowCardinality which is discarded. This means that given no rows as input it will return the default value of that type (0 for integers, or Null for a Nullable() column). You might use the `-OrNull` [combinator](../../../sql-reference/aggregate-functions/combinators.md) ) to modify this behaviour. diff --git a/docs/en/sql-reference/aggregate-functions/reference/any_respect_nulls.md b/docs/en/sql-reference/aggregate-functions/reference/any_respect_nulls.md deleted file mode 100644 index 99104a9b8c7..00000000000 --- a/docs/en/sql-reference/aggregate-functions/reference/any_respect_nulls.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -slug: /en/sql-reference/aggregate-functions/reference/any_respect_nulls -sidebar_position: 103 ---- - -# any_respect_nulls - -Selects the first encountered value of a column, irregardless of whether it is a `NULL` value or not. - -Alias: `any_value_respect_nulls`, `first_value_repect_nulls`. - -**Syntax** - -```sql -any_respect_nulls(column) -``` - -**Parameters** -- `column`: The column name. - -**Returned value** - -- The last value encountered, irregardless of whether it is a `NULL` value or not. - -**Example** - -Query: - -```sql -CREATE TABLE any_nulls (city Nullable(String)) ENGINE=Log; - -INSERT INTO any_nulls (city) VALUES (NULL), ('Amsterdam'), ('New York'), ('Tokyo'), ('Valencia'), (NULL); - -SELECT any(city), any_respect_nulls(city) FROM any_nulls; -``` - -```response -┌─any(city)─┬─any_respect_nulls(city)─┐ -│ Amsterdam │ ᴺᵁᴸᴸ │ -└───────────┴─────────────────────────┘ -``` - -**See Also** -- [any](../reference/any.md) diff --git a/docs/en/sql-reference/aggregate-functions/reference/anylast.md b/docs/en/sql-reference/aggregate-functions/reference/anylast.md index e43bc07fbdc..202d2e9fb10 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/anylast.md +++ b/docs/en/sql-reference/aggregate-functions/reference/anylast.md @@ -5,17 +5,21 @@ sidebar_position: 105 # anyLast -Selects the last value encountered. The result is just as indeterminate as for the [any](../../../sql-reference/aggregate-functions/reference/any.md) function. +Selects the last value encountered, ignoring any `NULL` values by default. The result is just as indeterminate as for the [any](../../../sql-reference/aggregate-functions/reference/any.md) function. **Syntax** ```sql -anyLast(column) +anyLast(column) [RESPECT NULLS] ``` **Parameters** - `column`: The column name. +:::note +Supports the `RESPECT NULLS` modifier after the function name. Using this modifier will ensure the function selects the first value passed, regardless of whether it is `NULL` or not. +::: + **Returned value** - The last value encountered. diff --git a/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md b/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md deleted file mode 100644 index 8f093cfdb61..00000000000 --- a/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -slug: /en/sql-reference/aggregate-functions/reference/anylast_respect_nulls -sidebar_position: 106 ---- - -# anyLast_respect_nulls - -Selects the last value encountered, irregardless of whether it is `NULL` or not. - -**Syntax** - -```sql -anyLast_respect_nulls(column) -``` - -**Parameters** -- `column`: The column name. - -**Returned value** - -- The last value encountered, irregardless of whether it is `NULL` or not. - -**Example** - -Query: - -```sql -CREATE TABLE any_last_nulls (city Nullable(String)) ENGINE=Log; - -INSERT INTO any_last_nulls (city) VALUES ('Amsterdam'),(NULL),('New York'),('Tokyo'),('Valencia'),(NULL); - -SELECT anyLast(city), anyLast_respect_nulls(city) FROM any_last_nulls; -``` - -```response -┌─anyLast(city)─┬─anyLast_respect_nulls(city)─┐ -│ Valencia │ ᴺᵁᴸᴸ │ -└───────────────┴─────────────────────────────┘ -``` \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md index e3725b6a430..2dce0afe2e1 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/index.md +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -43,11 +43,11 @@ Standard aggregate functions: ClickHouse-specific aggregate functions: +- [aggThrow](../reference/aggthrow.md) - [analysisOfVariance](../reference/analysis_of_variance.md) -- [any](../reference/any_respect_nulls.md) +- [any](../reference/any.md) - [anyHeavy](../reference/anyheavy.md) - [anyLast](../reference/anylast.md) -- [anyLast](../reference/anylast_respect_nulls.md) - [boundingRatio](../reference/boundrat.md) - [first_value](../reference/first_value.md) - [last_value](../reference/last_value.md) diff --git a/docs/en/sql-reference/aggregate-functions/reference/maxmap.md b/docs/en/sql-reference/aggregate-functions/reference/maxmap.md index c9c6913249c..73075c0823d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/maxmap.md +++ b/docs/en/sql-reference/aggregate-functions/reference/maxmap.md @@ -5,23 +5,45 @@ sidebar_position: 165 # maxMap -Syntax: `maxMap(key, value)` or `maxMap(Tuple(key, value))` - Calculates the maximum from `value` array according to the keys specified in the `key` array. -Passing a tuple of keys and value arrays is identical to passing two arrays of keys and values. +**Syntax** -The number of elements in `key` and `value` must be the same for each row that is totaled. +```sql +maxMap(key, value) +``` +or +```sql +maxMap(Tuple(key, value)) +``` -Returns a tuple of two arrays: keys and values calculated for the corresponding keys. +Alias: `maxMappedArrays` -Example: +:::note +- Passing a tuple of keys and value arrays is identical to passing two arrays of keys and values. +- The number of elements in `key` and `value` must be the same for each row that is totaled. +::: + +**Parameters** + +- `key` — Array of keys. [Array](../../data-types/array.md). +- `value` — Array of values. [Array](../../data-types/array.md). + +**Returned value** + +- Returns a tuple of two arrays: keys in sorted order, and values calculated for the corresponding keys. [Tuple](../../data-types/tuple.md)([Array](../../data-types/array.md), [Array](../../data-types/array.md)). + +**Example** + +Query: ``` sql SELECT maxMap(a, b) FROM values('a Array(Char), b Array(Int64)', (['x', 'y'], [2, 2]), (['y', 'z'], [3, 1])) ``` +Result: + ``` text ┌─maxMap(a, b)───────────┐ │ [['x','y','z'],[2,3,1]]│ diff --git a/docs/en/sql-reference/aggregate-functions/reference/minmap.md b/docs/en/sql-reference/aggregate-functions/reference/minmap.md index b1fbb9e49f3..c0f340b3f3f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/minmap.md +++ b/docs/en/sql-reference/aggregate-functions/reference/minmap.md @@ -5,23 +5,45 @@ sidebar_position: 169 # minMap -Syntax: `minMap(key, value)` or `minMap(Tuple(key, value))` - Calculates the minimum from `value` array according to the keys specified in the `key` array. -Passing a tuple of keys and value ​​arrays is identical to passing two arrays of keys and values. +**Syntax** -The number of elements in `key` and `value` must be the same for each row that is totaled. +```sql +`minMap(key, value)` +``` +or +```sql +minMap(Tuple(key, value)) +``` -Returns a tuple of two arrays: keys in sorted order, and values calculated for the corresponding keys. +Alias: `minMappedArrays` -Example: +:::note +- Passing a tuple of keys and value arrays is identical to passing an array of keys and an array of values. +- The number of elements in `key` and `value` must be the same for each row that is totaled. +::: + +**Parameters** + +- `key` — Array of keys. [Array](../../data-types/array.md). +- `value` — Array of values. [Array](../../data-types/array.md). + +**Returned value** + +- Returns a tuple of two arrays: keys in sorted order, and values calculated for the corresponding keys. [Tuple](../../data-types/tuple.md)([Array](../../data-types/array.md), [Array](../../data-types/array.md)). + +**Example** + +Query: ``` sql SELECT minMap(a, b) FROM values('a Array(Int32), b Array(Int64)', ([1, 2], [2, 2]), ([2, 3], [1, 1])) ``` +Result: + ``` text ┌─minMap(a, b)──────┐ │ ([1,2,3],[2,1,1]) │ diff --git a/docs/en/sql-reference/aggregate-functions/reference/singlevalueornull.md b/docs/en/sql-reference/aggregate-functions/reference/singlevalueornull.md index 21344b58ba6..19154c488d9 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/singlevalueornull.md +++ b/docs/en/sql-reference/aggregate-functions/reference/singlevalueornull.md @@ -16,7 +16,7 @@ singleValueOrNull(x) **Parameters** -- `x` — Column of any [data type](../../data-types/index.md). +- `x` — Column of any [data type](../../data-types/index.md) (except [Map](../../data-types/map.md), [Array](../../data-types/array.md) or [Tuple](../../data-types/tuple) which cannot be of type [Nullable](../../data-types/nullable.md)). **Returned values** diff --git a/docs/en/sql-reference/data-types/data-types-binary-encoding.md b/docs/en/sql-reference/data-types/data-types-binary-encoding.md new file mode 100644 index 00000000000..812e946e43e --- /dev/null +++ b/docs/en/sql-reference/data-types/data-types-binary-encoding.md @@ -0,0 +1,115 @@ +--- +slug: /en/sql-reference/data-types/data-types-binary-encoding +sidebar_position: 56 +sidebar_label: Data types binary encoding specification. +--- + + +# Data types binary encoding specification + +This specification describes the binary format that can be used for binary encoding and decoding of ClickHouse data types. This format is used in `Dynamic` column [binary serialization](dynamic.md#binary-output-format) and can be used in input/output formats [RowBinaryWithNamesAndTypes](../../interfaces/formats.md#rowbinarywithnamesandtypes) and [Native](../../interfaces/formats.md#native) under corresponding settings. + +The table below describes how each data type is represented in binary format. Each data type encoding consist of 1 byte that indicates the type and some optional additional information. +`var_uint` in the binary encoding means that the size is encoded using Variable-Length Quantity compression. + +| ClickHouse data type | Binary encoding | +|--------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `Nothing` | `0x00` | +| `UInt8` | `0x01` | +| `UInt16` | `0x02` | +| `UInt32` | `0x03` | +| `UInt64` | `0x04` | +| `UInt128` | `0x05` | +| `UInt256` | `0x06` | +| `Int8` | `0x07` | +| `Int16` | `0x08` | +| `Int32` | `0x09` | +| `Int64` | `0x0A` | +| `Int128` | `0x0B` | +| `Int256` | `0x0C` | +| `Float32` | `0x0D` | +| `Float64` | `0x0E` | +| `Date` | `0x0F` | +| `Date32` | `0x10` | +| `DateTime` | `0x11` | +| `DateTime(time_zone)` | `0x12` | +| `DateTime64(P)` | `0x13` | +| `DateTime64(P, time_zone)` | `0x14` | +| `String` | `0x15` | +| `FixedString(N)` | `0x16` | +| `Enum8` | `0x17...` | +| `Enum16` | `0x18...>` | +| `Decimal32(P, S)` | `0x19` | +| `Decimal64(P, S)` | `0x1A` | +| `Decimal128(P, S)` | `0x1B` | +| `Decimal256(P, S)` | `0x1C` | +| `UUID` | `0x1D` | +| `Array(T)` | `0x1E` | +| `Tuple(T1, ..., TN)` | `0x1F...` | +| `Tuple(name1 T1, ..., nameN TN)` | `0x20...` | +| `Set` | `0x21` | +| `Interval` | `0x22` (see [interval kind binary encoding](#interval-kind-binary-encoding)) | +| `Nullable(T)` | `0x23` | +| `Function` | `0x24...` | +| `AggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN)` | `0x25......` (see [aggregate function parameter binary encoding](#aggregate-function-parameter-binary-encoding)) | +| `LowCardinality(T)` | `0x26` | +| `Map(K, V)` | `0x27` | +| `IPv4` | `0x28` | +| `IPv6` | `0x29` | +| `Variant(T1, ..., TN)` | `0x2A...` | +| `Dynamic(max_types=N)` | `0x2B` | +| `Custom type` (`Ring`, `Polygon`, etc) | `0x2C` | +| `Bool` | `0x2D` | +| `SimpleAggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN)` | `0x2E......` (see [aggregate function parameter binary encoding](#aggregate-function-parameter-binary-encoding)) | +| `Nested(name1 T1, ..., nameN TN)` | `0x2F...` | + + +### Interval kind binary encoding + +The table below describes how different interval kinds of `Interval` data type are encoded. + +| Interval kind | Binary encoding | +|---------------|-----------------| +| `Nanosecond` | `0x00` | +| `Microsecond` | `0x01` | +| `Millisecond` | `0x02` | +| `Second` | `0x03` | +| `Minute` | `0x04` | +| `Hour` | `0x05` | +| `Day` | `0x06` | +| `Week` | `0x07` | +| `Month` | `0x08` | +| `Quarter` | `0x09` | +| `Year` | `0x1A` | + +### Aggregate function parameter binary encoding + +The table below describes how parameters of `AggragateFunction` and `SimpleAggregateFunction` are encoded. +The encoding of a parameter consists of 1 byte indicating the type of the parameter and the value itself. + +| Parameter type | Binary encoding | +|--------------------------|--------------------------------------------------------------------------------------------------------------------------------| +| `Null` | `0x00` | +| `UInt64` | `0x01` | +| `Int64` | `0x02` | +| `UInt128` | `0x03` | +| `Int128` | `0x04` | +| `UInt128` | `0x05` | +| `Int128` | `0x06` | +| `Float64` | `0x07` | +| `Decimal32` | `0x08` | +| `Decimal64` | `0x09` | +| `Decimal128` | `0x0A` | +| `Decimal256` | `0x0B` | +| `String` | `0x0C` | +| `Array` | `0x0D...` | +| `Tuple` | `0x0E...` | +| `Map` | `0x0F...` | +| `IPv4` | `0x10` | +| `IPv6` | `0x11` | +| `UUID` | `0x12` | +| `Bool` | `0x13` | +| `Object` | `0x14...` | +| `AggregateFunctionState` | `0x15` | +| `Negative infinity` | `0xFE` | +| `Positive infinity` | `0xFF` | diff --git a/docs/en/sql-reference/data-types/dynamic.md b/docs/en/sql-reference/data-types/dynamic.md index 955fd54e641..8be81471377 100644 --- a/docs/en/sql-reference/data-types/dynamic.md +++ b/docs/en/sql-reference/data-types/dynamic.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/dynamic -sidebar_position: 56 +sidebar_position: 62 sidebar_label: Dynamic --- @@ -493,3 +493,44 @@ SELECT count(), dynamicType(d), _part FROM test GROUP BY _part, dynamicType(d) O ``` As we can see, ClickHouse kept the most frequent types `UInt64` and `Array(UInt64)` and casted all other types to `String`. + +## JSONExtract functions with Dynamic + +All `JSONExtract*` functions support `Dynamic` type: + +```sql +SELECT JSONExtract('{"a" : [1, 2, 3]}', 'a', 'Dynamic') AS dynamic, dynamicType(dynamic) AS dynamic_type; +``` + +```text +┌─dynamic─┬─dynamic_type───────────┐ +│ [1,2,3] │ Array(Nullable(Int64)) │ +└─────────┴────────────────────────┘ +``` + +```sql +SELECT JSONExtract('{"obj" : {"a" : 42, "b" : "Hello", "c" : [1,2,3]}}', 'obj', 'Map(String, Variant(UInt32, String, Array(UInt32)))') AS map_of_dynamics, mapApply((k, v) -> (k, variantType(v)), map_of_dynamics) AS map_of_dynamic_types``` + +```text +┌─map_of_dynamics──────────────────┬─map_of_dynamic_types────────────────────────────┐ +│ {'a':42,'b':'Hello','c':[1,2,3]} │ {'a':'UInt32','b':'String','c':'Array(UInt32)'} │ +└──────────────────────────────────┴─────────────────────────────────────────────────┘ +``` + +```sql +SELECT JSONExtractKeysAndValues('{"a" : 42, "b" : "Hello", "c" : [1,2,3]}', 'Variant(UInt32, String, Array(UInt32))') AS dynamics, arrayMap(x -> (x.1, variantType(x.2)), dynamics) AS dynamic_types``` +``` + +```text +┌─dynamics───────────────────────────────┬─dynamic_types─────────────────────────────────────────┐ +│ [('a',42),('b','Hello'),('c',[1,2,3])] │ [('a','UInt32'),('b','String'),('c','Array(UInt32)')] │ +└────────────────────────────────────────┴───────────────────────────────────────────────────────┘ +``` + +### Binary output format + +In RowBinary format values of `Dynamic` type are serialized in the following format: + +```text + +``` diff --git a/docs/en/sql-reference/data-types/json.md b/docs/en/sql-reference/data-types/json.md index 39e37abad82..f2eac12594d 100644 --- a/docs/en/sql-reference/data-types/json.md +++ b/docs/en/sql-reference/data-types/json.md @@ -1,24 +1,20 @@ --- -slug: /en/sql-reference/data-types/json +slug: /en/sql-reference/data-types/object-data-type sidebar_position: 26 -sidebar_label: JSON +sidebar_label: Object Data Type +keywords: [object, data type] --- -# JSON +# Object Data Type (deprecated) -:::note -This feature is experimental and is not production-ready. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json.md) instead. -::: +**This feature is not production-ready and is now deprecated.** If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json) instead. A new implementation to support JSON object is in progress and can be tracked [here](https://github.com/ClickHouse/ClickHouse/issues/54864). + +
Stores JavaScript Object Notation (JSON) documents in a single column. `JSON` is an alias for `Object('json')`. -:::note -The JSON data type is an obsolete feature. Do not use it. -If you want to use it, set `allow_experimental_object_type = 1`. -::: - ## Example **Example 1** @@ -49,7 +45,7 @@ SELECT o.a, o.b.c, o.b.d[3] FROM json **Example 2** -To be able to create an ordered `MergeTree` family table the sorting key has to be extracted into its column. For example, to insert a file of compressed HTTP access logs in JSON format: +To be able to create an ordered `MergeTree` family table, the sorting key has to be extracted into its column. For example, to insert a file of compressed HTTP access logs in JSON format: ```sql CREATE TABLE logs @@ -69,7 +65,7 @@ FROM file('access.json.gz', JSONAsString) ## Displaying JSON columns -When displaying a `JSON` column ClickHouse only shows the field values by default (because internally, it is represented as a tuple). You can display the field names as well by setting `output_format_json_named_tuples_as_objects = 1`: +When displaying a `JSON` column, ClickHouse only shows the field values by default (because internally, it is represented as a tuple). You can also display the field names by setting `output_format_json_named_tuples_as_objects = 1`: ```sql SET output_format_json_named_tuples_as_objects = 1 @@ -83,4 +79,5 @@ SELECT * FROM json FORMAT JSONEachRow ## Related Content +- [Using JSON in ClickHouse](/docs/en/integrations/data-formats/json) - [Getting Data Into ClickHouse - Part 2 - A JSON detour](https://clickhouse.com/blog/getting-data-into-clickhouse-part-2-json) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index d87ca4a0fe7..1b52440903d 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -3080,4 +3080,4 @@ Result: ## Distance functions -All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md). +All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md). \ No newline at end of file diff --git a/docs/en/sql-reference/functions/bit-functions.md b/docs/en/sql-reference/functions/bit-functions.md index a48893b93bf..5ab7e07fcad 100644 --- a/docs/en/sql-reference/functions/bit-functions.md +++ b/docs/en/sql-reference/functions/bit-functions.md @@ -173,7 +173,7 @@ See function [substring](string-functions.md#substring). ## bitTest -Takes any integer and converts it into [binary form](https://en.wikipedia.org/wiki/Binary_number), returns the value of a bit at specified position. The countdown starts from 0 from the right to the left. +Takes any integer and converts it into [binary form](https://en.wikipedia.org/wiki/Binary_number), returns the value of a bit at specified position. Counting is right-to-left, starting at 0. **Syntax** @@ -226,7 +226,7 @@ Result: ## bitTestAll -Returns result of [logical conjuction](https://en.wikipedia.org/wiki/Logical_conjunction) (AND operator) of all bits at given positions. The countdown starts from 0 from the right to the left. +Returns result of [logical conjuction](https://en.wikipedia.org/wiki/Logical_conjunction) (AND operator) of all bits at given positions. Counting is right-to-left, starting at 0. The conjuction for bit-wise operations: @@ -289,7 +289,7 @@ Result: ## bitTestAny -Returns result of [logical disjunction](https://en.wikipedia.org/wiki/Logical_disjunction) (OR operator) of all bits at given positions. The countdown starts from 0 from the right to the left. +Returns result of [logical disjunction](https://en.wikipedia.org/wiki/Logical_disjunction) (OR operator) of all bits at given positions. Counting is right-to-left, starting at 0. The disjunction for bit-wise operations: diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index b532e0de8f0..4f5e5a5d716 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -83,7 +83,57 @@ Result: ``` ## makeDate32 -Like [makeDate](#makedate) but produces a [Date32](../data-types/date32.md). +Creates a date of type [Date32](../../sql-reference/data-types/date32.md) from a year, month, day (or optionally a year and a day). + +**Syntax** + +```sql +makeDate32(year, [month,] day) +``` + +**Arguments** + +- `year` — Year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `month` — Month (optional). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). + +:::note +If `month` is omitted then `day` should take a value between `1` and `365`, otherwise it should take a value between `1` and `31`. +::: + +**Returned values** + +- A date created from the arguments. [Date32](../../sql-reference/data-types/date32.md). + +**Examples** + +Create a date from a year, month, and day: + +Query: + +```sql +SELECT makeDate32(2024, 1, 1); +``` + +Result: + +```response +2024-01-01 +``` + +Create a Date from a year and day of year: + +Query: + +``` sql +SELECT makeDate32(2024, 100); +``` + +Result: + +```response +2024-04-09 +``` ## makeDateTime @@ -125,12 +175,38 @@ Result: ## makeDateTime64 -Like [makeDateTime](#makedatetime) but produces a [DateTime64](../data-types/datetime64.md). +Creates a [DateTime64](../../sql-reference/data-types/datetime64.md) data type value from its components: year, month, day, hour, minute, second. With optional sub-second precision. **Syntax** +```sql +makeDateTime64(year, month, day, hour, minute, second[, precision]) +``` + +**Arguments** + +- `year` — Year (0-9999). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `month` — Month (1-12). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `day` — Day (1-31). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `hour` — Hour (0-23). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `minute` — Minute (0-59). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `second` — Second (0-59). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `precision` — Optional precision of the sub-second component (0-9). [Integer](../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- A date and time created from the supplied arguments. [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Example** + ``` sql -makeDateTime64(year, month, day, hour, minute, second[, fraction[, precision[, timezone]]]) +SELECT makeDateTime64(2023, 5, 15, 10, 30, 45, 779, 5); +``` + +```response +┌─makeDateTime64(2023, 5, 15, 10, 30, 45, 779, 5)─┐ +│ 2023-05-15 10:30:45.00779 │ +└─────────────────────────────────────────────────┘ ``` ## timestamp @@ -2622,6 +2698,204 @@ Like function `YYYYMMDDhhmmssToDate()` but produces a [DateTime64](../data-types Accepts an additional, optional `precision` parameter after the `timezone` parameter. +## changeYear + +Changes the year component of a date or date time. + +**Syntax** +``` sql + +changeYear(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the year. [Integer](../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- The same type as `date_or_datetime`. + +**Example** + +``` sql +SELECT changeYear(toDate('1999-01-01'), 2000), changeYear(toDateTime64('1999-01-01 00:00:00.000', 3), 2000); +``` + +Result: + +``` +┌─changeYear(toDate('1999-01-01'), 2000)─┬─changeYear(toDateTime64('1999-01-01 00:00:00.000', 3), 2000)─┐ +│ 2000-01-01 │ 2000-01-01 00:00:00.000 │ +└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ +``` + +## changeMonth + +Changes the month component of a date or date time. + +**Syntax** + +``` sql +changeMonth(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the month. [Integer](../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- Returns a value of same type as `date_or_datetime`. + +**Example** + +``` sql +SELECT changeMonth(toDate('1999-01-01'), 2), changeMonth(toDateTime64('1999-01-01 00:00:00.000', 3), 2); +``` + +Result: + +``` +┌─changeMonth(toDate('1999-01-01'), 2)─┬─changeMonth(toDateTime64('1999-01-01 00:00:00.000', 3), 2)─┐ +│ 1999-02-01 │ 1999-02-01 00:00:00.000 │ +└──────────────────────────────────────┴────────────────────────────────────────────────────────────┘ +``` + +## changeDay + +Changes the day component of a date or date time. + +**Syntax** + +``` sql +changeDay(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the day. [Integer](../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- Returns a value of same type as `date_or_datetime`. + +**Example** + +``` sql +SELECT changeDay(toDate('1999-01-01'), 5), changeDay(toDateTime64('1999-01-01 00:00:00.000', 3), 5); +``` + +Result: + +``` +┌─changeDay(toDate('1999-01-01'), 5)─┬─changeDay(toDateTime64('1999-01-01 00:00:00.000', 3), 5)─┐ +│ 1999-01-05 │ 1999-01-05 00:00:00.000 │ +└────────────────────────────────────┴──────────────────────────────────────────────────────────┘ +``` + +## changeHour + +Changes the hour component of a date or date time. + +**Syntax** + +``` sql +changeHour(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the hour. [Integer](../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- Returns a value of same type as `date_or_datetime`. If the input is a [Date](../data-types/date.md), return [DateTime](../data-types/datetime.md). If the input is a [Date32](../data-types/date32.md), return [DateTime64](../data-types/datetime64.md). + +**Example** + +``` sql +SELECT changeHour(toDate('1999-01-01'), 14), changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 14); +``` + +Result: + +``` +┌─changeHour(toDate('1999-01-01'), 14)─┬─changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 14)─┐ +│ 1999-01-01 14:00:00 │ 1999-01-01 14:00:00.000 │ +└──────────────────────────────────────┴────────────────────────────────────────────────────────────┘ +``` + +## changeMinute + +Changes the minute component of a date or date time. + +**Syntax** + +``` sql +changeMinute(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the minute. [Integer](../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- Returns a value of same type as `date_or_datetime`. If the input is a [Date](../data-types/date.md), return [DateTime](../data-types/datetime.md). If the input is a [Date32](../data-types/date32.md), return [DateTime64](../data-types/datetime64.md). + +**Example** + +``` sql + SELECT changeMinute(toDate('1999-01-01'), 15), changeMinute(toDateTime64('1999-01-01 00:00:00.000', 3), 15); +``` + +Result: + +``` +┌─changeMinute(toDate('1999-01-01'), 15)─┬─changeMinute(toDateTime64('1999-01-01 00:00:00.000', 3), 15)─┐ +│ 1999-01-01 00:15:00 │ 1999-01-01 00:15:00.000 │ +└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ +``` + +## changeSecond + +Changes the second component of a date or date time. + +**Syntax** + +``` sql +changeSecond(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the second. [Integer](../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- Returns a value of same type as `date_or_datetime`. If the input is a [Date](../data-types/date.md), return [DateTime](../data-types/datetime.md). If the input is a [Date32](../data-types/date32.md), return [DateTime64](../data-types/datetime64.md). + +**Example** + +``` sql +SELECT changeSecond(toDate('1999-01-01'), 15), changeSecond(toDateTime64('1999-01-01 00:00:00.000', 3), 15); +``` + +Result: + +``` +┌─changeSecond(toDate('1999-01-01'), 15)─┬─changeSecond(toDateTime64('1999-01-01 00:00:00.000', 3), 15)─┐ +│ 1999-01-01 00:00:15 │ 1999-01-01 00:00:15.000 │ +└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ +``` + ## addYears Adds a specified number of years to a date, a date with time or a string-encoded date / date with time. @@ -2638,6 +2912,7 @@ addYears(date, num) - `num`: Number of years to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` plus `num` years. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -2675,6 +2950,7 @@ addQuarters(date, num) - `num`: Number of quarters to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` plus `num` quarters. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -2712,6 +2988,7 @@ addMonths(date, num) - `num`: Number of months to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` plus `num` months. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -2749,6 +3026,7 @@ addWeeks(date, num) - `num`: Number of weeks to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` plus `num` weeks. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -2786,6 +3064,7 @@ addDays(date, num) - `num`: Number of days to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` plus `num` days. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -2823,6 +3102,7 @@ addHours(date, num) - `num`: Number of hours to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** +o - Returns `date` plus `num` hours. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -2860,6 +3140,7 @@ addMinutes(date, num) - `num`: Number of minutes to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` plus `num` minutes. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -2897,6 +3178,7 @@ addSeconds(date, num) - `num`: Number of seconds to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` plus `num` seconds. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -2934,6 +3216,7 @@ addMilliseconds(date_time, num) - `num`: Number of milliseconds to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date_time` plus `num` milliseconds. [DateTime64](../data-types/datetime64.md). **Example** @@ -2969,6 +3252,7 @@ addMicroseconds(date_time, num) - `num`: Number of microseconds to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date_time` plus `num` microseconds. [DateTime64](../data-types/datetime64.md). **Example** @@ -3004,6 +3288,7 @@ addNanoseconds(date_time, num) - `num`: Number of nanoseconds to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date_time` plus `num` nanoseconds. [DateTime64](../data-types/datetime64.md). **Example** @@ -3039,6 +3324,7 @@ addInterval(interval_1, interval_2) - `interval_2`: Second interval to be added. [interval](../data-types/special-data-types/interval.md). **Returned value** + - Returns a tuple of intervals. [tuple](../data-types/tuple.md)([interval](../data-types/special-data-types/interval.md)). :::note @@ -3085,6 +3371,7 @@ addTupleOfIntervals(interval_1, interval_2) - `intervals`: Tuple of intervals to add to `date`. [tuple](../data-types/tuple.md)([interval](../data-types/special-data-types/interval.md)). **Returned value** + - Returns `date` with added `intervals`. [date](../data-types/date.md)/[date32](../data-types/date32.md)/[datetime](../data-types/datetime.md)/[datetime64](../data-types/datetime64.md). **Example** @@ -3119,6 +3406,7 @@ subtractYears(date, num) - `num`: Number of years to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` minus `num` years. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -3156,6 +3444,7 @@ subtractQuarters(date, num) - `num`: Number of quarters to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` minus `num` quarters. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -3193,6 +3482,7 @@ subtractMonths(date, num) - `num`: Number of months to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` minus `num` months. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -3230,6 +3520,7 @@ subtractWeeks(date, num) - `num`: Number of weeks to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` minus `num` weeks. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -3267,6 +3558,7 @@ subtractDays(date, num) - `num`: Number of days to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` minus `num` days. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -3304,6 +3596,7 @@ subtractHours(date, num) - `num`: Number of hours to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` minus `num` hours. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[Datetime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -3341,6 +3634,7 @@ subtractMinutes(date, num) - `num`: Number of minutes to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` minus `num` minutes. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -3378,6 +3672,7 @@ subtractSeconds(date, num) - `num`: Number of seconds to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` minus `num` seconds. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -3415,6 +3710,7 @@ subtractMilliseconds(date_time, num) - `num`: Number of milliseconds to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date_time` minus `num` milliseconds. [DateTime64](../data-types/datetime64.md). **Example** @@ -3450,6 +3746,7 @@ subtractMicroseconds(date_time, num) - `num`: Number of microseconds to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date_time` minus `num` microseconds. [DateTime64](../data-types/datetime64.md). **Example** @@ -3485,6 +3782,7 @@ subtractNanoseconds(date_time, num) - `num`: Number of nanoseconds to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date_time` minus `num` nanoseconds. [DateTime64](../data-types/datetime64.md). **Example** @@ -3520,6 +3818,7 @@ subtractInterval(interval_1, interval_2) - `interval_2`: Second interval to be negated. [interval](../data-types/special-data-types/interval.md). **Returned value** + - Returns a tuple of intervals. [tuple](../data-types/tuple.md)([interval](../data-types/special-data-types/interval.md)). :::note @@ -3566,6 +3865,7 @@ subtractTupleOfIntervals(interval_1, interval_2) - `intervals`: Tuple of intervals to subtract from `date`. [tuple](../data-types/tuple.md)([interval](../data-types/special-data-types/interval.md)). **Returned value** + - Returns `date` with subtracted `intervals`. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index e431ed75465..7c977e7d6dc 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -314,10 +314,71 @@ SELECT groupBitXor(cityHash64(*)) FROM table Calculates a 32-bit hash code from any type of integer. This is a relatively fast non-cryptographic hash function of average quality for numbers. +**Syntax** + +```sql +intHash32(int) +``` + +**Arguments** + +- `int` — Integer to hash. [(U)Int*](../data-types/int-uint.md). + +**Returned value** + +- 32-bit hash code. [UInt32](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT intHash32(42); +``` + +Result: + +```response +┌─intHash32(42)─┐ +│ 1228623923 │ +└───────────────┘ +``` + ## intHash64 Calculates a 64-bit hash code from any type of integer. -It works faster than intHash32. Average quality. +This is a relatively fast non-cryptographic hash function of average quality for numbers. +It works faster than [intHash32](#inthash32). + +**Syntax** + +```sql +intHash64(int) +``` + +**Arguments** + +- `int` — Integer to hash. [(U)Int*](../data-types/int-uint.md). + +**Returned value** + +- 64-bit hash code. [UInt64](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT intHash64(42); +``` + +Result: + +```response +┌────────intHash64(42)─┐ +│ 11490350930367293593 │ +└──────────────────────┘ +``` ## SHA1, SHA224, SHA256, SHA512, SHA512_256 diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 58fc1eba02e..260457b3be1 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -86,7 +86,7 @@ Returns the fully qualified domain name of the ClickHouse server. fqdn(); ``` -This function is case-insensitive. +Aliases: `fullHostName`, 'FQDN'. **Returned value** @@ -2984,6 +2984,66 @@ Result: └─────────┘ ``` +## partitionID + +Computes the [partition ID](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). + +:::note +This function is slow and should not be called for large amount of rows. +::: + +**Syntax** + +```sql +partitionID(x[, y, ...]); +``` + +**Arguments** + +- `x` — Column for which to return the partition ID. +- `y, ...` — Remaining N columns for which to return the partition ID (optional). + +**Returned Value** + +- Partition ID that the row would belong to. [String](../data-types/string.md). + +**Example** + +Query: + +```sql +DROP TABLE IF EXISTS tab; + +CREATE TABLE tab +( + i int, + j int +) +ENGINE = MergeTree +PARTITION BY i +ORDER BY tuple(); + +INSERT INTO tab VALUES (1, 1), (1, 2), (1, 3), (2, 4), (2, 5), (2, 6); + +SELECT i, j, partitionID(i), _partition_id FROM tab ORDER BY i, j; +``` + +Result: + +```response +┌─i─┬─j─┬─partitionID(i)─┬─_partition_id─┐ +│ 1 │ 1 │ 1 │ 1 │ +│ 1 │ 2 │ 1 │ 1 │ +│ 1 │ 3 │ 1 │ 1 │ +└───┴───┴────────────────┴───────────────┘ +┌─i─┬─j─┬─partitionID(i)─┬─_partition_id─┐ +│ 2 │ 4 │ 2 │ 2 │ +│ 2 │ 5 │ 2 │ 2 │ +│ 2 │ 6 │ 2 │ 2 │ +└───┴───┴────────────────┴───────────────┘ +``` + + ## shardNum Returns the index of a shard which processes a part of data in a distributed query. Indices are started from `1`. @@ -3860,3 +3920,138 @@ Result: └───────────────┘ ``` +## transactionID + +Returns the ID of a [transaction](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback). + +:::note +This function is part of an experimental feature set. Enable experimental transaction support by adding this setting to your configuration: + +``` + + 1 + +``` + +For more information see the page [Transactional (ACID) support](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback). +::: + +**Syntax** + +```sql +transactionID() +``` + +**Returned value** + +- Returns a tuple consisting of `start_csn`, `local_tid` and `host_id`. [Tuple](../data-types/tuple.md). + +- `start_csn`: Global sequential number, the newest commit timestamp that was seen when this transaction began. [UInt64](../data-types/int-uint.md). +- `local_tid`: Local sequential number that is unique for each transaction started by this host within a specific start_csn. [UInt64](../data-types/int-uint.md). +- `host_id`: UUID of the host that has started this transaction. [UUID](../data-types/uuid.md). + +**Example** + +Query: + +```sql +BEGIN TRANSACTION; +SELECT transactionID(); +ROLLBACK; +``` + +Result: + +```response +┌─transactionID()────────────────────────────────┐ +│ (32,34,'0ee8b069-f2bb-4748-9eae-069c85b5252b') │ +└────────────────────────────────────────────────┘ +``` + +## transactionLatestSnapshot + +Returns the newest snapshot (Commit Sequence Number) of a [transaction](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback) that is available for reading. + +:::note +This function is part of an experimental feature set. Enable experimental transaction support by adding this setting to your configuration: + +``` + + 1 + +``` + +For more information see the page [Transactional (ACID) support](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback). +::: + +**Syntax** + +```sql +transactionLatestSnapshot() +``` + +**Returned value** + +- Returns the latest snapshot (CSN) of a transaction. [UInt64](../data-types/int-uint.md) + +**Example** + +Query: + +```sql +BEGIN TRANSACTION; +SELECT transactionLatestSnapshot(); +ROLLBACK; +``` + +Result: + +```response +┌─transactionLatestSnapshot()─┐ +│ 32 │ +└─────────────────────────────┘ +``` + +## transactionOldestSnapshot + +Returns the oldest snapshot (Commit Sequence Number) that is visible for some running [transaction](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback). + +:::note +This function is part of an experimental feature set. Enable experimental transaction support by adding this setting to your configuration: + +``` + + 1 + +``` + +For more information see the page [Transactional (ACID) support](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback). +::: + +**Syntax** + +```sql +transactionOldestSnapshot() +``` + +**Returned value** + +- Returns the oldest snapshot (CSN) of a transaction. [UInt64](../data-types/int-uint.md) + +**Example** + +Query: + +```sql +BEGIN TRANSACTION; +SELECT transactionLatestSnapshot(); +ROLLBACK; +``` + +Result: + +```response +┌─transactionOldestSnapshot()─┐ +│ 32 │ +└─────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 894b9026165..e2f1b8c7f14 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -12,9 +12,7 @@ Functions for [searching](string-search-functions.md) in strings and for [replac ## empty -Checks whether the input string is empty. - -A string is considered non-empty if it contains at least one byte, even if this byte is a space or the null byte. +Checks whether the input string is empty. A string is considered non-empty if it contains at least one byte, even if this byte is a space or the null byte. The function is also available for [arrays](array-functions.md#function-empty) and [UUIDs](uuid-functions.md#empty). @@ -48,9 +46,7 @@ Result: ## notEmpty -Checks whether the input string is non-empty. - -A string is considered non-empty if it contains at least one byte, even if this byte is a space or the null byte. +Checks whether the input string is non-empty. A string is considered non-empty if it contains at least one byte, even if this byte is a space or the null byte. The function is also available for [arrays](array-functions.md#function-notempty) and [UUIDs](uuid-functions.md#notempty). @@ -96,7 +92,7 @@ length(s) **Parameters** -- `s`: An input string or array. [String](../data-types/string)/[Array](../data-types/array). +- `s` — An input string or array. [String](../data-types/string)/[Array](../data-types/array). **Returned value** @@ -149,7 +145,7 @@ lengthUTF8(s) **Parameters** -- `s`: String containing valid UTF-8 encoded text. [String](../data-types/string). +- `s` — String containing valid UTF-8 encoded text. [String](../data-types/string). **Returned value** @@ -183,8 +179,8 @@ left(s, offset) **Parameters** -- `s`: The string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -- `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint). +- `s` — The string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `offset` — The number of bytes of the offset. [UInt*](../data-types/int-uint). **Returned value** @@ -230,8 +226,8 @@ leftUTF8(s, offset) **Parameters** -- `s`: The UTF-8 encoded string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -- `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint). +- `s` — The UTF-8 encoded string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `offset` — The number of bytes of the offset. [UInt*](../data-types/int-uint). **Returned value** @@ -347,8 +343,8 @@ right(s, offset) **Parameters** -- `s`: The string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -- `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint). +- `s` — The string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `offset` — The number of bytes of the offset. [UInt*](../data-types/int-uint). **Returned value** @@ -394,8 +390,8 @@ rightUTF8(s, offset) **Parameters** -- `s`: The UTF-8 encoded string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -- `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint). +- `s` — The UTF-8 encoded string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `offset` — The number of bytes of the offset. [UInt*](../data-types/int-uint). **Returned value** @@ -547,7 +543,7 @@ Alias: `ucase` **Parameters** -- `input`: A string type [String](../data-types/string.md). +- `input` — A string type [String](../data-types/string.md). **Returned value** @@ -571,16 +567,47 @@ SELECT upper('clickhouse'); Converts a string to lowercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. -Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). +:::note +Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). If the length of the UTF-8 byte sequence is different for upper and lower case of a code point (such as `ẞ` and `ß`), the result may be incorrect for this code point. +::: -If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. +**Syntax** + +```sql +lowerUTF8(input) +``` + +**Parameters** + +- `input` — A string type [String](../data-types/string.md). + +**Returned value** + +- A [String](../data-types/string.md) data type value. + +**Example** + +Query: + +``` sql +SELECT lowerUTF8('MÜNCHEN') as Lowerutf8; +``` + +Result: + +``` response +┌─Lowerutf8─┐ +│ münchen │ +└───────────┘ +``` ## upperUTF8 Converts a string to uppercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. - -If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. +:::note +Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). If the length of the UTF-8 byte sequence is different for upper and lower case of a code point (such as `ẞ` and `ß`), the result may be incorrect for this code point. +::: **Syntax** @@ -590,7 +617,7 @@ upperUTF8(input) **Parameters** -- `input`: A string type [String](../data-types/string.md). +- `input` — A string type [String](../data-types/string.md). **Returned value** @@ -604,6 +631,8 @@ Query: SELECT upperUTF8('München') as Upperutf8; ``` +Result: + ``` response ┌─Upperutf8─┐ │ MÜNCHEN │ @@ -614,6 +643,34 @@ SELECT upperUTF8('München') as Upperutf8; Returns 1, if the set of bytes constitutes valid UTF-8-encoded text, otherwise 0. +**Syntax** + +``` sql +isValidUTF8(input) +``` + +**Parameters** + +- `input` — A string type [String](../data-types/string.md). + +**Returned value** + +- Returns `1`, if the set of bytes constitutes valid UTF-8-encoded text, otherwise `0`. + +Query: + +``` sql +SELECT isValidUTF8('\xc3\xb1') AS valid, isValidUTF8('\xc3\x28') AS invalid; +``` + +Result: + +``` response +┌─valid─┬─invalid─┐ +│ 1 │ 0 │ +└───────┴─────────┘ +``` + ## toValidUTF8 Replaces invalid UTF-8 characters by the `�` (U+FFFD) character. All running in a row invalid characters are collapsed into the one replacement character. @@ -883,7 +940,7 @@ Returns the substring of a string `s` which starts at the specified byte index ` substring(s, offset[, length]) ``` -Alias: +Aliases: - `substr` - `mid` - `byteSlice` @@ -926,9 +983,9 @@ substringUTF8(s, offset[, length]) **Arguments** -- `s`: The string to calculate a substring from. [String](../data-types/string.md), [FixedString](../data-types/fixedstring.md) or [Enum](../data-types/enum.md) -- `offset`: The starting position of the substring in `s` . [(U)Int*](../data-types/int-uint.md). -- `length`: The maximum length of the substring. [(U)Int*](../data-types/int-uint.md). Optional. +- `s` — The string to calculate a substring from. [String](../data-types/string.md), [FixedString](../data-types/fixedstring.md) or [Enum](../data-types/enum.md) +- `offset` — The starting position of the substring in `s` . [(U)Int*](../data-types/int-uint.md). +- `length` — The maximum length of the substring. [(U)Int*](../data-types/int-uint.md). Optional. **Returned value** @@ -964,9 +1021,9 @@ Alias: `SUBSTRING_INDEX` **Arguments** -- s: The string to extract substring from. [String](../data-types/string.md). -- delim: The character to split. [String](../data-types/string.md). -- count: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md) +- s — The string to extract substring from. [String](../data-types/string.md). +- delim — The character to split. [String](../data-types/string.md). +- count — The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md) **Example** @@ -995,9 +1052,9 @@ substringIndexUTF8(s, delim, count) **Arguments** -- `s`: The string to extract substring from. [String](../data-types/string.md). -- `delim`: The character to split. [String](../data-types/string.md). -- `count`: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md) +- `s` — The string to extract substring from. [String](../data-types/string.md). +- `delim` — The character to split. [String](../data-types/string.md). +- `count` — The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md) **Returned value** @@ -1277,7 +1334,7 @@ tryBase64Decode(encoded) **Arguments** -- `encoded`: [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value, returns an empty string. +- `encoded` — [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value, returns an empty string. **Returned value** @@ -1309,7 +1366,7 @@ tryBase64URLDecode(encodedUrl) **Parameters** -- `encodedURL`: [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string. +- `encodedURL` — [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string. **Returned value** @@ -1555,7 +1612,7 @@ The result type is UInt64. ## normalizeQuery -Replaces literals, sequences of literals and complex aliases with placeholders. +Replaces literals, sequences of literals and complex aliases (containing whitespace, more than two digits or at least 36 bytes long such as UUIDs) with placeholder `?`. **Syntax** @@ -1573,6 +1630,8 @@ normalizeQuery(x) **Example** +Query: + ``` sql SELECT normalizeQuery('[1, 2, 3, x]') AS query; ``` @@ -1585,9 +1644,44 @@ Result: └──────────┘ ``` +## normalizeQueryKeepNames + +Replaces literals, sequences of literals with placeholder `?` but does not replace complex aliases (containing whitespace, more than two digits +or at least 36 bytes long such as UUIDs). This helps better analyze complex query logs. + +**Syntax** + +``` sql +normalizeQueryKeepNames(x) +``` + +**Arguments** + +- `x` — Sequence of characters. [String](../data-types/string.md). + +**Returned value** + +- Sequence of characters with placeholders. [String](../data-types/string.md). + +**Example** + +Query: + +``` sql +SELECT normalizeQuery('SELECT 1 AS aComplexName123'), normalizeQueryKeepNames('SELECT 1 AS aComplexName123'); +``` + +Result: + +```result +┌─normalizeQuery('SELECT 1 AS aComplexName123')─┬─normalizeQueryKeepNames('SELECT 1 AS aComplexName123')─┐ +│ SELECT ? AS `?` │ SELECT ? AS aComplexName123 │ +└───────────────────────────────────────────────┴────────────────────────────────────────────────────────┘ +``` + ## normalizedQueryHash -Returns identical 64bit hash values without the values of literals for similar queries. Can be helpful to analyze query log. +Returns identical 64bit hash values without the values of literals for similar queries. Can be helpful to analyze query logs. **Syntax** @@ -1605,6 +1699,8 @@ normalizedQueryHash(x) **Example** +Query: + ``` sql SELECT normalizedQueryHash('SELECT 1 AS `xyz`') != normalizedQueryHash('SELECT 1 AS `abc`') AS res; ``` @@ -1617,6 +1713,43 @@ Result: └─────┘ ``` +## normalizedQueryHashKeepNames + +Like [normalizedQueryHash](#normalizedqueryhash) it returns identical 64bit hash values without the values of literals for similar queries but it does not replace complex aliases (containing whitespace, more than two digits +or at least 36 bytes long such as UUIDs) with a placeholder before hashing. Can be helpful to analyze query logs. + +**Syntax** + +``` sql +normalizedQueryHashKeepNames(x) +``` + +**Arguments** + +- `x` — Sequence of characters. [String](../data-types/string.md). + +**Returned value** + +- Hash value. [UInt64](../data-types/int-uint.md#uint-ranges). + +**Example** + +``` sql +SELECT normalizedQueryHash('SELECT 1 AS `xyz123`') != normalizedQueryHash('SELECT 1 AS `abc123`') AS normalizedQueryHash; +SELECT normalizedQueryHashKeepNames('SELECT 1 AS `xyz123`') != normalizedQueryHashKeepNames('SELECT 1 AS `abc123`') AS normalizedQueryHashKeepNames; +``` + +Result: + +```result +┌─normalizedQueryHash─┐ +│ 0 │ +└─────────────────────┘ +┌─normalizedQueryHashKeepNames─┐ +│ 1 │ +└──────────────────────────────┘ +``` + ## normalizeUTF8NFC Converts a string to [NFC normalized form](https://en.wikipedia.org/wiki/Unicode_equivalence#Normal_forms), assuming the string is valid UTF8-encoded text. @@ -1935,7 +2068,7 @@ soundex(val) **Arguments** -- `val` - Input value. [String](../data-types/string.md) +- `val` — Input value. [String](../data-types/string.md) **Returned value** @@ -1968,7 +2101,7 @@ punycodeEncode(val) **Arguments** -- `val` - Input value. [String](../data-types/string.md) +- `val` — Input value. [String](../data-types/string.md) **Returned value** @@ -2001,7 +2134,7 @@ punycodeEncode(val) **Arguments** -- `val` - Punycode-encoded string. [String](../data-types/string.md) +- `val` — Punycode-encoded string. [String](../data-types/string.md) **Returned value** @@ -2027,7 +2160,7 @@ Like `punycodeDecode` but returns an empty string if no valid Punycode-encoded s ## idnaEncode -Returns the the ASCII representation (ToASCII algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism. +Returns the ASCII representation (ToASCII algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism. The input string must be UTF-encoded and translatable to an ASCII string, otherwise an exception is thrown. Note: No percent decoding or trimming of tabs, spaces or control characters is performed. @@ -2039,7 +2172,7 @@ idnaEncode(val) **Arguments** -- `val` - Input value. [String](../data-types/string.md) +- `val` — Input value. [String](../data-types/string.md) **Returned value** @@ -2065,7 +2198,7 @@ Like `idnaEncode` but returns an empty string in case of an error instead of thr ## idnaDecode -Returns the the Unicode (UTF-8) representation (ToUnicode algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism. +Returns the Unicode (UTF-8) representation (ToUnicode algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism. In case of an error (e.g. because the input is invalid), the input string is returned. Note that repeated application of `idnaEncode()` and `idnaDecode()` does not necessarily return the original string due to case normalization. @@ -2077,7 +2210,7 @@ idnaDecode(val) **Arguments** -- `val` - Input value. [String](../data-types/string.md) +- `val` — Input value. [String](../data-types/string.md) **Returned value** @@ -2121,7 +2254,7 @@ Result: └───────────────────────────────────────────┘ ``` -Alias: mismatches +Alias: `mismatches` ## stringJaccardIndex @@ -2175,7 +2308,7 @@ Result: └─────────────────────────────────────┘ ``` -Alias: levenshteinDistance +Alias: `levenshteinDistance` ## editDistanceUTF8 @@ -2201,7 +2334,7 @@ Result: └─────────────────────────────────────┘ ``` -Alias: levenshteinDistanceUTF8 +Alias: `levenshteinDistanceUTF8` ## damerauLevenshteinDistance @@ -2279,13 +2412,93 @@ Result: Convert the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters. +:::note +Because `initCap` converts only the first letter of each word to upper case you may observe unexpected behaviour for words containing apostrophes or capital letters. For example: + +```sql +SELECT initCap('mother''s daughter'), initCap('joe McAdam'); +``` + +will return + +```response +┌─initCap('mother\'s daughter')─┬─initCap('joe McAdam')─┐ +│ Mother'S Daughter │ Joe Mcadam │ +└───────────────────────────────┴───────────────────────┘ +``` + +This is a known behaviour, with no plans currently to fix it. +::: + +**Syntax** + +```sql +initcap(val) +``` + +**Arguments** + +- `val` — Input value. [String](../data-types/string.md). + +**Returned value** + +- `val` with the first letter of each word converted to upper case. [String](../data-types/string.md). + +**Example** + +Query: + +```sql +SELECT initcap('building for fast'); +``` + +Result: + +```text +┌─initcap('building for fast')─┐ +│ Building For Fast │ +└──────────────────────────────┘ +``` + ## initcapUTF8 -Like [initcap](#initcap), assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. - -Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). +Like [initcap](#initcap), `initcapUTF8` converts the first letter of each word to upper case and the rest to lower case. Assumes that the string contains valid UTF-8 encoded text. +If this assumption is violated, no exception is thrown and the result is undefined. +:::note +This function does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. +::: + +**Syntax** + +```sql +initcapUTF8(val) +``` + +**Arguments** + +- `val` — Input value. [String](../data-types/string.md). + +**Returned value** + +- `val` with the first letter of each word converted to upper case. [String](../data-types/string.md). + +**Example** + +Query: + +```sql +SELECT initcapUTF8('не тормозит'); +``` + +Result: + +```text +┌─initcapUTF8('не тормозит')─┐ +│ Не Тормозит │ +└────────────────────────────┘ +``` ## firstLine @@ -2299,7 +2512,7 @@ firstLine(val) **Arguments** -- `val` - Input value. [String](../data-types/string.md) +- `val` — Input value. [String](../data-types/string.md) **Returned value** diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index 7aeb1f5b2a7..8793ebdd1a3 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -34,7 +34,7 @@ Alias: `replace`. Replaces the first occurrence of the substring matching the regular expression `pattern` (in [re2 syntax](https://github.com/google/re2/wiki/Syntax)) in `haystack` by the `replacement` string. -`replacement` can containing substitutions `\0-\9`. +`replacement` can contain substitutions `\0-\9`. Substitutions `\1-\9` correspond to the 1st to 9th capturing group (submatch), substitution `\0` corresponds to the entire match. To use a verbatim `\` character in the `pattern` or `replacement` strings, escape it using `\`. diff --git a/docs/en/sql-reference/functions/time-window-functions.md b/docs/en/sql-reference/functions/time-window-functions.md index 2cec1987c20..5169d4487ec 100644 --- a/docs/en/sql-reference/functions/time-window-functions.md +++ b/docs/en/sql-reference/functions/time-window-functions.md @@ -6,44 +6,122 @@ sidebar_label: Time Window # Time Window Functions -Time window functions return the inclusive lower and exclusive upper bound of the corresponding window. The functions for working with WindowView are listed below: +Time window functions return the inclusive lower and exclusive upper bound of the corresponding window. The functions for working with [WindowView](../statements/create/view.md/#window-view-experimental) are listed below: ## tumble A tumbling time window assigns records to non-overlapping, continuous windows with a fixed duration (`interval`). +**Syntax** + ``` sql tumble(time_attr, interval [, timezone]) ``` **Arguments** -- `time_attr` - Date and time. [DateTime](../data-types/datetime.md) data type. -- `interval` - Window interval in [Interval](../data-types/special-data-types/interval.md) data type. +- `time_attr` — Date and time. [DateTime](../data-types/datetime.md). +- `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md). - `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). **Returned values** -- The inclusive lower and exclusive upper bound of the corresponding tumbling window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`. +- The inclusive lower and exclusive upper bound of the corresponding tumbling window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md)). **Example** Query: ``` sql -SELECT tumble(now(), toIntervalDay('1')) +SELECT tumble(now(), toIntervalDay('1')); ``` Result: ``` text ┌─tumble(now(), toIntervalDay('1'))─────────────┐ -│ ['2020-01-01 00:00:00','2020-01-02 00:00:00'] │ +│ ('2024-07-04 00:00:00','2024-07-05 00:00:00') │ └───────────────────────────────────────────────┘ ``` +## tumbleStart + +Returns the inclusive lower bound of the corresponding [tumbling window](#tumble). + +**Syntax** + +``` sql +tumbleStart(time_attr, interval [, timezone]); +``` + +**Arguments** + +- `time_attr` — Date and time. [DateTime](../data-types/datetime.md). +- `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). + +The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md). + +**Returned values** + +- The inclusive lower bound of the corresponding tumbling window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT tumbleStart(now(), toIntervalDay('1')); +``` + +Result: + +```response +┌─tumbleStart(now(), toIntervalDay('1'))─┐ +│ 2024-07-04 00:00:00 │ +└────────────────────────────────────────┘ +``` + +## tumbleEnd + +Returns the exclusive upper bound of the corresponding [tumbling window](#tumble). + +**Syntax** + +``` sql +tumbleEnd(time_attr, interval [, timezone]); +``` + +**Arguments** + +- `time_attr` — Date and time. [DateTime](../data-types/datetime.md). +- `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). + +The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md). + +**Returned values** + +- The inclusive lower bound of the corresponding tumbling window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT tumbleEnd(now(), toIntervalDay('1')); +``` + +Result: + +```response +┌─tumbleEnd(now(), toIntervalDay('1'))─┐ +│ 2024-07-05 00:00:00 │ +└──────────────────────────────────────┘ +``` + ## hop -A hopping time window has a fixed duration (`window_interval`) and hops by a specified hop interval (`hop_interval`). If the `hop_interval` is smaller than the `window_interval`, hopping windows are overlapping. Thus, records can be assigned to multiple windows. +A hopping time window has a fixed duration (`window_interval`) and hops by a specified hop interval (`hop_interval`). If the `hop_interval` is smaller than the `window_interval`, hopping windows are overlapping. Thus, records can be assigned to multiple windows. ``` sql hop(time_attr, hop_interval, window_interval [, timezone]) @@ -51,65 +129,118 @@ hop(time_attr, hop_interval, window_interval [, timezone]) **Arguments** -- `time_attr` - Date and time. [DateTime](../data-types/datetime.md) data type. -- `hop_interval` - Hop interval in [Interval](../data-types/special-data-types/interval.md) data type. Should be a positive number. -- `window_interval` - Window interval in [Interval](../data-types/special-data-types/interval.md) data type. Should be a positive number. -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). +- `time_attr` — Date and time. [DateTime](../data-types/datetime.md). +- `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md). +- `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). **Returned values** -- The inclusive lower and exclusive upper bound of the corresponding hopping window. Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`. +- The inclusive lower and exclusive upper bound of the corresponding hopping window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`. + +:::note +Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. +::: **Example** Query: ``` sql -SELECT hop(now(), INTERVAL '1' SECOND, INTERVAL '2' SECOND) +SELECT hop(now(), INTERVAL '1' DAY, INTERVAL '2' DAY); ``` Result: ``` text -┌─hop(now(), toIntervalSecond('1'), toIntervalSecond('2'))──┐ -│ ('2020-01-14 16:58:22','2020-01-14 16:58:24') │ -└───────────────────────────────────────────────────────────┘ -``` - -## tumbleStart - -Returns the inclusive lower bound of the corresponding tumbling window. - -``` sql -tumbleStart(bounds_tuple); -tumbleStart(time_attr, interval [, timezone]); -``` - -## tumbleEnd - -Returns the exclusive upper bound of the corresponding tumbling window. - -``` sql -tumbleEnd(bounds_tuple); -tumbleEnd(time_attr, interval [, timezone]); +┌─hop(now(), toIntervalDay('1'), toIntervalDay('2'))─┐ +│ ('2024-07-03 00:00:00','2024-07-05 00:00:00') │ +└────────────────────────────────────────────────────┘ ``` ## hopStart -Returns the inclusive lower bound of the corresponding hopping window. +Returns the inclusive lower bound of the corresponding [hopping window](#hop). + +**Syntax** ``` sql -hopStart(bounds_tuple); hopStart(time_attr, hop_interval, window_interval [, timezone]); ``` +**Arguments** + +- `time_attr` — Date and time. [DateTime](../data-types/datetime.md). +- `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md). +- `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). + +The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md). + +**Returned values** + +- The inclusive lower bound of the corresponding hopping window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md). + +:::note +Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. +::: + +**Example** + +Query: + +``` sql +SELECT hopStart(now(), INTERVAL '1' DAY, INTERVAL '2' DAY); +``` + +Result: + +``` text +┌─hopStart(now(), toIntervalDay('1'), toIntervalDay('2'))─┐ +│ 2024-07-03 00:00:00 │ +└─────────────────────────────────────────────────────────┘ +``` ## hopEnd -Returns the exclusive upper bound of the corresponding hopping window. +Returns the exclusive upper bound of the corresponding [hopping window](#hop). + +**Syntax** ``` sql -hopEnd(bounds_tuple); hopEnd(time_attr, hop_interval, window_interval [, timezone]); +``` +**Arguments** + +- `time_attr` — Date and time. [DateTime](../data-types/datetime.md). +- `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md). +- `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). + +The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md). + +**Returned values** + +- The exclusive upper bound of the corresponding hopping window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md). + +:::note +Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. +::: + +**Example** + +Query: + +``` sql +SELECT hopEnd(now(), INTERVAL '1' DAY, INTERVAL '2' DAY); +``` + +Result: + +``` text +┌─hopEnd(now(), toIntervalDay('1'), toIntervalDay('2'))─┐ +│ 2024-07-05 00:00:00 │ +└───────────────────────────────────────────────────────┘ + ``` ## Related content diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index 0663be08240..3b4d68e44b2 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -7,7 +7,7 @@ sidebar_label: Tuples ## tuple A function that allows grouping multiple columns. -For columns with the types T1, T2, ..., it returns a Tuple(T1, T2, ...) type tuple containing these columns. There is no cost to execute the function. +For columns C1, C2, ... with the types T1, T2, ..., it returns a named Tuple(C1 T1, C2 T2, ...) type tuple containing these columns if their names are unique and can be treated as unquoted identifiers, otherwise a Tuple(T1, T2, ...) is returned. There is no cost to execute the function. Tuples are normally used as intermediate values for an argument of IN operators, or for creating a list of formal parameters of lambda functions. Tuples can’t be written to a table. The function implements the operator `(x, y, ...)`. @@ -259,6 +259,60 @@ Result: └───────────────────────────────────────┘ ``` +## tupleNames + +Converts a tuple into an array of column names. For a tuple in the form `Tuple(a T, b T, ...)`, it returns an array of strings representing the named columns of the tuple. If the tuple elements do not have explicit names, their indices will be used as the column names instead. + +**Syntax** + +``` sql +tupleNames(tuple) +``` + +**Arguments** + +- `tuple` — Named tuple. [Tuple](../../sql-reference/data-types/tuple.md) with any types of values. + +**Returned value** + +- An array with strings. + +Type: [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), ...)). + +**Example** + +Query: + +``` sql +CREATE TABLE tupletest (col Tuple(user_ID UInt64, session_ID UInt64)) ENGINE = Memory; + +INSERT INTO tupletest VALUES (tuple(1, 2)); + +SELECT tupleNames(col) FROM tupletest; +``` + +Result: + +``` text +┌─tupleNames(col)──────────┐ +│ ['user_ID','session_ID'] │ +└──────────────────────────┘ +``` + +If you pass a simple tuple to the function, ClickHouse uses the indexes of the columns as their names: + +``` sql +SELECT tupleNames(tuple(3, 2, 1)); +``` + +Result: + +``` text +┌─tupleNames((3, 2, 1))─┐ +│ ['1','2','3'] │ +└───────────────────────┘ +``` + ## tuplePlus Calculates the sum of corresponding values of two tuples of the same size. diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index ad40725d680..24b356eca87 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -600,7 +600,7 @@ mapApply(func, map) **Arguments** -- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). +- `func` — [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). - `map` — [Map](../data-types/map.md). **Returned value** @@ -831,7 +831,39 @@ SELECT mapSort((k, v) -> v, map('key2', 2, 'key3', 1, 'key1', 3)) AS map; └──────────────────────────────┘ ``` -For more details see the [reference](../../sql-reference/functions/array-functions.md#array_functions-sort) for `arraySort` function. +For more details see the [reference](../../sql-reference/functions/array-functions.md#array_functions-sort) for `arraySort` function. + +## mapPartialSort + +Sorts the elements of a map in ascending order with additional `limit` argument allowing partial sorting. +If the `func` function is specified, the sorting order is determined by the result of the `func` function applied to the keys and values of the map. + +**Syntax** + +```sql +mapPartialSort([func,] limit, map) +``` +**Arguments** + +- `func` – Optional function to apply to the keys and values of the map. [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). +- `limit` – Elements in range [1..limit] are sorted. [(U)Int](../data-types/int-uint.md). +- `map` – Map to sort. [Map](../data-types/map.md). + +**Returned value** + +- Partially sorted map. [Map](../data-types/map.md). + +**Example** + +``` sql +SELECT mapPartialSort((k, v) -> v, 2, map('k1', 3, 'k2', 1, 'k3', 2)); +``` + +``` text +┌─mapPartialSort(lambda(tuple(k, v), v), 2, map('k1', 3, 'k2', 1, 'k3', 2))─┐ +│ {'k2':1,'k3':2,'k1':3} │ +└───────────────────────────────────────────────────────────────────────────┘ +``` ## mapReverseSort(\[func,\], map) @@ -861,3 +893,35 @@ SELECT mapReverseSort((k, v) -> v, map('key2', 2, 'key3', 1, 'key1', 3)) AS map; ``` For more details see function [arrayReverseSort](../../sql-reference/functions/array-functions.md#array_functions-reverse-sort). + +## mapPartialReverseSort + +Sorts the elements of a map in descending order with additional `limit` argument allowing partial sorting. +If the `func` function is specified, the sorting order is determined by the result of the `func` function applied to the keys and values of the map. + +**Syntax** + +```sql +mapPartialReverseSort([func,] limit, map) +``` +**Arguments** + +- `func` – Optional function to apply to the keys and values of the map. [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). +- `limit` – Elements in range [1..limit] are sorted. [(U)Int](../data-types/int-uint.md). +- `map` – Map to sort. [Map](../data-types/map.md). + +**Returned value** + +- Partially sorted map. [Map](../data-types/map.md). + +**Example** + +``` sql +SELECT mapPartialReverseSort((k, v) -> v, 2, map('k1', 3, 'k2', 1, 'k3', 2)); +``` + +``` text +┌─mapPartialReverseSort(lambda(tuple(k, v), v), 2, map('k1', 3, 'k2', 1, 'k3', 2))─┐ +│ {'k1':3,'k3':2,'k2':1} │ +└──────────────────────────────────────────────────────────────────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/statements/alter/statistics.md b/docs/en/sql-reference/statements/alter/statistics.md index 80024781f88..6880cef0e5c 100644 --- a/docs/en/sql-reference/statements/alter/statistics.md +++ b/docs/en/sql-reference/statements/alter/statistics.md @@ -28,6 +28,6 @@ There is an example adding two statistics types to two columns: ALTER TABLE t1 MODIFY STATISTICS c, d TYPE TDigest, Uniq; ``` -:::note +:::note Statistic manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants). ::: diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index 0253bc647e6..b866d0b9f5f 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -152,7 +152,7 @@ SELECT * FROM test; `MATERIALIZED expr` -Materialized expression. Values of such columns are always calculated, they cannot be specified in INSERT queries. +Materialized expression. Values of such columns are automatically calculated according to the specified materialized expression when rows are inserted. Values cannot be explicitly specified during `INSERT`s. Also, default value columns of this type are not included in the result of `SELECT *`. This is to preserve the invariant that the result of a `SELECT *` can always be inserted back into the table using `INSERT`. This behavior can be disabled with setting `asterisk_include_materialized_columns`. diff --git a/docs/en/sql-reference/statements/kill.md b/docs/en/sql-reference/statements/kill.md index b665ad85a09..6e18ace10c7 100644 --- a/docs/en/sql-reference/statements/kill.md +++ b/docs/en/sql-reference/statements/kill.md @@ -58,6 +58,8 @@ KILL QUERY WHERE query_id='2-857d-4a57-9ee0-327da5d60a90' KILL QUERY WHERE user='username' SYNC ``` +:::tip If you are killing a query in ClickHouse Cloud or in a self-managed cluster, then be sure to use the ```ON CLUSTER [cluster-name]``` option, in order to ensure the query is killed on all replicas::: + Read-only users can only stop their own queries. By default, the asynchronous version of queries is used (`ASYNC`), which does not wait for confirmation that queries have stopped. @@ -131,6 +133,7 @@ KILL MUTATION WHERE database = 'default' AND table = 'table' -- Cancel the specific mutation: KILL MUTATION WHERE database = 'default' AND table = 'table' AND mutation_id = 'mutation_3.txt' ``` +:::tip If you are killing a mutation in ClickHouse Cloud or in a self-managed cluster, then be sure to use the ```ON CLUSTER [cluster-name]``` option, in order to ensure the mutation is killed on all replicas::: The query is useful when a mutation is stuck and cannot finish (e.g. if some function in the mutation query throws an exception when applied to the data contained in the table). diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md index 3a3162dad9a..44b1b50620a 100644 --- a/docs/en/sql-reference/table-functions/file.md +++ b/docs/en/sql-reference/table-functions/file.md @@ -130,7 +130,9 @@ SELECT * FROM file('user_files/archives/archive{1..2}.zip :: table.csv'); ## Globs in path -Paths may use globbing. Files must match the whole path pattern, not only the suffix or prefix. +Paths may use globbing. Files must match the whole path pattern, not only the suffix or prefix. There is one exception that if the path refers to an existing +directory and does not use globs, a `*` will be implicitly added to the path so +all the files in the directory are selected. - `*` — Represents arbitrarily many characters except `/` but including the empty string. - `?` — Represents an arbitrary single character. @@ -163,6 +165,12 @@ An alternative path expression which achieves the same: SELECT count(*) FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32'); ``` +Query the total number of rows in `some_dir` using the implicit `*`: + +```sql +SELECT count(*) FROM file('some_dir', 'TSV', 'name String, value UInt32'); +``` + :::note If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. ::: diff --git a/docs/en/sql-reference/table-functions/fuzzQuery.md b/docs/en/sql-reference/table-functions/fuzzQuery.md new file mode 100644 index 00000000000..e15f8a40156 --- /dev/null +++ b/docs/en/sql-reference/table-functions/fuzzQuery.md @@ -0,0 +1,36 @@ +--- +slug: /en/sql-reference/table-functions/fuzzQuery +sidebar_position: 75 +sidebar_label: fuzzQuery +--- + +# fuzzQuery + +Perturbs the given query string with random variations. + +``` sql +fuzzQuery(query[, max_query_length[, random_seed]]) +``` + +**Arguments** + +- `query` (String) - The source query to perform the fuzzing on. +- `max_query_length` (UInt64) - A maximum length the query can get during the fuzzing process. +- `random_seed` (UInt64) - A random seed for producing stable results. + +**Returned Value** + +A table object with a single column containing perturbed query strings. + +## Usage Example + +``` sql +SELECT * FROM fuzzQuery('SELECT materialize(\'a\' AS key) GROUP BY key') LIMIT 2; +``` + +``` + ┌─query──────────────────────────────────────────────────────────┐ +1. │ SELECT 'a' AS key GROUP BY key │ +2. │ EXPLAIN PIPELINE compact = true SELECT 'a' AS key GROUP BY key │ + └────────────────────────────────────────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 1a7e2b8d66a..35e5d86034c 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -269,9 +269,9 @@ FROM s3( ## Virtual Columns {#virtual-columns} -- `_path` — Path to the file. Type: `LowCardinalty(String)`. -- `_file` — Name of the file. Type: `LowCardinalty(String)`. -- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. +- `_path` — Path to the file. Type: `LowCardinalty(String)`. In case of archive, shows path in a format: "{path_to_archive}::{path_to_file_inside_archive}" +- `_file` — Name of the file. Type: `LowCardinalty(String)`. In case of archive shows name of the file inside the archive. +- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. In case of archive shows uncompressed file size of the file inside the archive. - `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. ## Storage Settings {#storage-settings} diff --git a/docs/en/sql-reference/window-functions/dense_rank.md b/docs/en/sql-reference/window-functions/dense_rank.md new file mode 100644 index 00000000000..d6445b68c55 --- /dev/null +++ b/docs/en/sql-reference/window-functions/dense_rank.md @@ -0,0 +1,73 @@ +--- +slug: /en/sql-reference/window-functions/dense_rank +sidebar_label: dense_rank +sidebar_position: 7 +--- + +# dense_rank + +Ranks the current row within its partition without gaps. In other words, if the value of any new row encountered is equal to the value of one of the previous rows then it will receive the next successive rank without any gaps in ranking. + +The [rank](./rank.md) function provides the same behaviour, but with gaps in ranking. + +**Syntax** + +```sql +dense_rank (column_name) + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Returned value** + +- A number for the current row within its partition, without gaps in ranking. [UInt64](../data-types/int-uint.md). + +**Example** + +The following example is based on the example provided in the video instructional [Ranking window functions in ClickHouse](https://youtu.be/Yku9mmBYm_4?si=XIMu1jpYucCQEoXA). + +Query: + +```sql +CREATE TABLE salaries +( + `team` String, + `player` String, + `salary` UInt32, + `position` String +) +Engine = Memory; + +INSERT INTO salaries FORMAT Values + ('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'), + ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'), + ('Port Elizabeth Barbarians', 'Michael Stanley', 150000, 'D'), + ('New Coreystad Archdukes', 'Scott Harrison', 150000, 'D'), + ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'), + ('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'), + ('South Hampton Seagulls', 'James Henderson', 140000, 'M'); +``` + +```sql +SELECT player, salary, + dense_rank() OVER (ORDER BY salary DESC) AS dense_rank +FROM salaries; +``` + +Result: + +```response + ┌─player──────────┬─salary─┬─dense_rank─┐ +1. │ Gary Chen │ 195000 │ 1 │ +2. │ Robert George │ 195000 │ 1 │ +3. │ Charles Juarez │ 190000 │ 2 │ +4. │ Michael Stanley │ 150000 │ 3 │ +5. │ Douglas Benson │ 150000 │ 3 │ +6. │ Scott Harrison │ 150000 │ 3 │ +7. │ James Henderson │ 140000 │ 4 │ + └─────────────────┴────────┴────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/first_value.md b/docs/en/sql-reference/window-functions/first_value.md new file mode 100644 index 00000000000..30c3b1f99dc --- /dev/null +++ b/docs/en/sql-reference/window-functions/first_value.md @@ -0,0 +1,79 @@ +--- +slug: /en/sql-reference/window-functions/first_value +sidebar_label: first_value +sidebar_position: 3 +--- + +# first_value + +Returns the first value evaluated within its ordered frame. By default, NULL arguments are skipped, however the `RESPECT NULLS` modifier can be used to override this behaviour. + +**Syntax** + +```sql +first_value (column_name) [[RESPECT NULLS] | [IGNORE NULLS]] + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +Alias: `any`. + +:::note +Using the optional modifier `RESPECT NULLS` after `first_value(column_name)` will ensure that `NULL` arguments are not skipped. +See [NULL processing](../aggregate-functions/index.md/#null-processing) for more information. +::: + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Returned value** + +- The first value evaluated within its ordered frame. + +**Example** + +In this example the `first_value` function is used to find the highest paid footballer from a fictional dataset of salaries of Premier League football players. + +Query: + +```sql +DROP TABLE IF EXISTS salaries; +CREATE TABLE salaries +( + `team` String, + `player` String, + `salary` UInt32, + `position` String +) +Engine = Memory; + +INSERT INTO salaries FORMAT Values + ('Port Elizabeth Barbarians', 'Gary Chen', 196000, 'F'), + ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'), + ('Port Elizabeth Barbarians', 'Michael Stanley', 100000, 'D'), + ('New Coreystad Archdukes', 'Scott Harrison', 180000, 'D'), + ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'), + ('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'), + ('South Hampton Seagulls', 'James Henderson', 140000, 'M'); +``` + +```sql +SELECT player, salary, + first_value(player) OVER (ORDER BY salary DESC) AS highest_paid_player +FROM salaries; +``` + +Result: + +```response + ┌─player──────────┬─salary─┬─highest_paid_player─┐ +1. │ Gary Chen │ 196000 │ Gary Chen │ +2. │ Robert George │ 195000 │ Gary Chen │ +3. │ Charles Juarez │ 190000 │ Gary Chen │ +4. │ Scott Harrison │ 180000 │ Gary Chen │ +5. │ Douglas Benson │ 150000 │ Gary Chen │ +6. │ James Henderson │ 140000 │ Gary Chen │ +7. │ Michael Stanley │ 100000 │ Gary Chen │ + └─────────────────┴────────┴─────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 3a8afd10359..0c3e2ea1cb6 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -1,10 +1,11 @@ --- slug: /en/sql-reference/window-functions/ -sidebar_position: 62 sidebar_label: Window Functions -title: Window Functions +sidebar_position: 1 --- +# Window Functions + Windows functions let you perform calculations across a set of rows that are related to the current row. Some of the calculations that you can do are similar to those that can be done with an aggregate function, but a window function doesn't cause rows to be grouped into a single output - the individual rows are still returned. @@ -12,8 +13,8 @@ Some of the calculations that you can do are similar to those that can be done w ClickHouse supports the standard grammar for defining windows and window functions. The table below indicates whether a feature is currently supported. -| Feature | Supported? | -|------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Feature | Supported? | +|--------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | ad hoc window specification (`count(*) over (partition by id order by time desc)`) | ✅ | | expressions involving window functions, e.g. `(count(*) over ()) / 2)` | ✅ | | `WINDOW` clause (`select ... from table window w as (partition by id)`) | ✅ | @@ -23,6 +24,7 @@ ClickHouse supports the standard grammar for defining windows and window functio | `GROUPS` frame | ❌ | | Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | ✅ (All aggregate functions are supported) | | `rank()`, `dense_rank()`, `row_number()` | ✅ | +| `percent_rank()` | ✅ Efficiently computes the relative standing of a value within a partition in a dataset. This function effectively replaces the more verbose and computationally intensive manual SQL calculation expressed as `ifNull((rank() OVER(PARTITION BY x ORDER BY y) - 1) / nullif(count(1) OVER(PARTITION BY x) - 1, 0), 0)`| | `lag/lead(value, offset)` | ❌
You can use one of the following workarounds:
1) `any(value) over (.... rows between preceding and preceding)`, or `following` for `lead`
2) `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` | | ntile(buckets) | ✅
Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). | @@ -74,14 +76,14 @@ WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column] These functions can be used only as a window function. -- `row_number()` - Number the current row within its partition starting from 1. -- `first_value(x)` - Return the first non-NULL value evaluated within its ordered frame. -- `last_value(x)` - Return the last non-NULL value evaluated within its ordered frame. -- `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. -- `rank()` - Rank the current row within its partition with gaps. -- `dense_rank()` - Rank the current row within its partition without gaps. -- `lagInFrame(x)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. -- `leadInFrame(x)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. +- [`row_number()`](./row_number.md) - Number the current row within its partition starting from 1. +- [`first_value(x)`](./first_value.md) - Return the first value evaluated within its ordered frame. +- [`last_value(x)`](./last_value.md) - Return the last value evaluated within its ordered frame. +- [`nth_value(x, offset)`](./nth_value.md) - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. +- [`rank()`](./rank.md) - Rank the current row within its partition with gaps. +- [`dense_rank()`](./dense_rank.md) - Rank the current row within its partition without gaps. +- [`lagInFrame(x)`](./lagInFrame.md) - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. +- [`leadInFrame(x)`](./leadInFrame.md) - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. ## Examples diff --git a/docs/en/sql-reference/window-functions/lagInFrame.md b/docs/en/sql-reference/window-functions/lagInFrame.md new file mode 100644 index 00000000000..049e095c10f --- /dev/null +++ b/docs/en/sql-reference/window-functions/lagInFrame.md @@ -0,0 +1,79 @@ +--- +slug: /en/sql-reference/window-functions/lagInFrame +sidebar_label: lagInFrame +sidebar_position: 8 +--- + +# lagInFrame + +Returns a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. + +**Syntax** + +```sql +lagInFrame(x[, offset[, default]]) + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Parameters** +- `x` — Column name. +- `offset` — Offset to apply. [(U)Int*](../data-types/int-uint.md). (Optional - `1` by default). +- `default` — Value to return if calculated row exceeds the boundaries of the window frame. (Optional - `null` by default). + +**Returned value** + +- Value evaluated at the row that is at a specified physical offset before the current row within the ordered frame. + +**Example** + +This example looks at historical data for a specific stock and uses the `lagInFrame` function to calculate a day-to-day delta and percentage change in the closing price of the stock. + +Query: + +```sql +CREATE TABLE stock_prices +( + `date` Date, + `open` Float32, -- opening price + `high` Float32, -- daily high + `low` Float32, -- daily low + `close` Float32, -- closing price + `volume` UInt32 -- trade volume +) +Engine = Memory; + +INSERT INTO stock_prices FORMAT Values + ('2024-06-03', 113.62, 115.00, 112.00, 115.00, 438392000), + ('2024-06-04', 115.72, 116.60, 114.04, 116.44, 403324000), + ('2024-06-05', 118.37, 122.45, 117.47, 122.44, 528402000), + ('2024-06-06', 124.05, 125.59, 118.32, 121.00, 664696000), + ('2024-06-07', 119.77, 121.69, 118.02, 120.89, 412386000); +``` + +```sql +SELECT + date, + close, + lagInFrame(close, 1, close) OVER (ORDER BY date ASC) AS previous_day_close, + COALESCE(ROUND(close - previous_day_close, 2)) AS delta, + COALESCE(ROUND((delta / previous_day_close) * 100, 2)) AS percent_change +FROM stock_prices +ORDER BY date DESC; +``` + +Result: + +```response + ┌───────date─┬──close─┬─previous_day_close─┬─delta─┬─percent_change─┐ +1. │ 2024-06-07 │ 120.89 │ 121 │ -0.11 │ -0.09 │ +2. │ 2024-06-06 │ 121 │ 122.44 │ -1.44 │ -1.18 │ +3. │ 2024-06-05 │ 122.44 │ 116.44 │ 6 │ 5.15 │ +4. │ 2024-06-04 │ 116.44 │ 115 │ 1.44 │ 1.25 │ +5. │ 2024-06-03 │ 115 │ 115 │ 0 │ 0 │ + └────────────┴────────┴────────────────────┴───────┴────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/last_value.md b/docs/en/sql-reference/window-functions/last_value.md new file mode 100644 index 00000000000..dd7f5fa078a --- /dev/null +++ b/docs/en/sql-reference/window-functions/last_value.md @@ -0,0 +1,79 @@ +--- +slug: /en/sql-reference/window-functions/last_value +sidebar_label: last_value +sidebar_position: 4 +--- + +# last_value + +Returns the last value evaluated within its ordered frame. By default, NULL arguments are skipped, however the `RESPECT NULLS` modifier can be used to override this behaviour. + +**Syntax** + +```sql +last_value (column_name) [[RESPECT NULLS] | [IGNORE NULLS]] + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +Alias: `anyLast`. + +:::note +Using the optional modifier `RESPECT NULLS` after `first_value(column_name)` will ensure that `NULL` arguments are not skipped. +See [NULL processing](../aggregate-functions/index.md/#null-processing) for more information. +::: + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Returned value** + +- The last value evaluated within its ordered frame. + +**Example** + +In this example the `last_value` function is used to find the highest paid footballer from a fictional dataset of salaries of Premier League football players. + +Query: + +```sql +DROP TABLE IF EXISTS salaries; +CREATE TABLE salaries +( + `team` String, + `player` String, + `salary` UInt32, + `position` String +) +Engine = Memory; + +INSERT INTO salaries FORMAT Values + ('Port Elizabeth Barbarians', 'Gary Chen', 196000, 'F'), + ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'), + ('Port Elizabeth Barbarians', 'Michael Stanley', 100000, 'D'), + ('New Coreystad Archdukes', 'Scott Harrison', 180000, 'D'), + ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'), + ('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'), + ('South Hampton Seagulls', 'James Henderson', 140000, 'M'); +``` + +```sql +SELECT player, salary, + last_value(player) OVER (ORDER BY salary DESC RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS lowest_paid_player +FROM salaries; +``` + +Result: + +```response + ┌─player──────────┬─salary─┬─lowest_paid_player─┐ +1. │ Gary Chen │ 196000 │ Michael Stanley │ +2. │ Robert George │ 195000 │ Michael Stanley │ +3. │ Charles Juarez │ 190000 │ Michael Stanley │ +4. │ Scott Harrison │ 180000 │ Michael Stanley │ +5. │ Douglas Benson │ 150000 │ Michael Stanley │ +6. │ James Henderson │ 140000 │ Michael Stanley │ +7. │ Michael Stanley │ 100000 │ Michael Stanley │ + └─────────────────┴────────┴────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/leadInFrame.md b/docs/en/sql-reference/window-functions/leadInFrame.md new file mode 100644 index 00000000000..fc1b92cc266 --- /dev/null +++ b/docs/en/sql-reference/window-functions/leadInFrame.md @@ -0,0 +1,60 @@ +--- +slug: /en/sql-reference/window-functions/leadInFrame +sidebar_label: leadInFrame +sidebar_position: 9 +--- + +# leadInFrame + +Returns a value evaluated at the row that is offset rows after the current row within the ordered frame. + +**Syntax** + +```sql +leadInFrame(x[, offset[, default]]) + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Parameters** +- `x` — Column name. +- `offset` — Offset to apply. [(U)Int*](../data-types/int-uint.md). (Optional - `1` by default). +- `default` — Value to return if calculated row exceeds the boundaries of the window frame. (Optional - `null` by default). + +**Returned value** + +- value evaluated at the row that is offset rows after the current row within the ordered frame. + +**Example** + +This example looks at [historical data](https://www.kaggle.com/datasets/sazidthe1/nobel-prize-data) for Nobel Prize winners and uses the `leadInFrame` function to return a list of successive winners in the physics category. + +Query: + +```sql +CREATE OR REPLACE VIEW nobel_prize_laureates AS FROM file('nobel_laureates_data.csv') SELECT *; +``` + +```sql +FROM nobel_prize_laureates SELECT fullName, leadInFrame(year, 1, year) OVER (PARTITION BY category ORDER BY year) AS year, category, motivation WHERE category == 'physics' ORDER BY year DESC LIMIT 9; +``` + +Result: + +```response + ┌─fullName─────────┬─year─┬─category─┬─motivation─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +1. │ Pierre Agostini │ 2023 │ physics │ for experimental methods that generate attosecond pulses of light for the study of electron dynamics in matter │ +2. │ Ferenc Krausz │ 2023 │ physics │ for experimental methods that generate attosecond pulses of light for the study of electron dynamics in matter │ +3. │ Anne L Huillier │ 2023 │ physics │ for experimental methods that generate attosecond pulses of light for the study of electron dynamics in matter │ +4. │ Alain Aspect │ 2022 │ physics │ for experiments with entangled photons establishing the violation of Bell inequalities and pioneering quantum information science │ +5. │ Anton Zeilinger │ 2022 │ physics │ for experiments with entangled photons establishing the violation of Bell inequalities and pioneering quantum information science │ +6. │ John Clauser │ 2022 │ physics │ for experiments with entangled photons establishing the violation of Bell inequalities and pioneering quantum information science │ +7. │ Syukuro Manabe │ 2021 │ physics │ for the physical modelling of Earths climate quantifying variability and reliably predicting global warming │ +8. │ Klaus Hasselmann │ 2021 │ physics │ for the physical modelling of Earths climate quantifying variability and reliably predicting global warming │ +9. │ Giorgio Parisi │ 2021 │ physics │ for the discovery of the interplay of disorder and fluctuations in physical systems from atomic to planetary scales │ + └──────────────────┴──────┴──────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/nth_value.md b/docs/en/sql-reference/window-functions/nth_value.md new file mode 100644 index 00000000000..aa5baf651a8 --- /dev/null +++ b/docs/en/sql-reference/window-functions/nth_value.md @@ -0,0 +1,75 @@ +--- +slug: /en/sql-reference/window-functions/nth_value +sidebar_label: nth_value +sidebar_position: 5 +--- + +# nth_value + +Returns the first non-NULL value evaluated against the nth row (offset) in its ordered frame. + +**Syntax** + +```sql +nth_value (x, offset) + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Parameters** + +- `x` — Column name. +- `offset` — nth row to evaluate current row against. + +**Returned value** + +- The first non-NULL value evaluated against the nth row (offset) in its ordered frame. + +**Example** + +In this example the `nth-value` function is used to find the third-highest salary from a fictional dataset of salaries of Premier League football players. + +Query: + +```sql +DROP TABLE IF EXISTS salaries; +CREATE TABLE salaries +( + `team` String, + `player` String, + `salary` UInt32, + `position` String +) +Engine = Memory; + +INSERT INTO salaries FORMAT Values + ('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'), + ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'), + ('Port Elizabeth Barbarians', 'Michael Stanley', 100000, 'D'), + ('New Coreystad Archdukes', 'Scott Harrison', 180000, 'D'), + ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'), + ('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'), + ('South Hampton Seagulls', 'James Henderson', 140000, 'M'); +``` + +```sql +SELECT player, salary, nth_value(player,3) OVER(ORDER BY salary DESC) AS third_highest_salary FROM salaries; +``` + +Result: + +```response + ┌─player──────────┬─salary─┬─third_highest_salary─┐ +1. │ Gary Chen │ 195000 │ │ +2. │ Robert George │ 195000 │ │ +3. │ Charles Juarez │ 190000 │ Charles Juarez │ +4. │ Scott Harrison │ 180000 │ Charles Juarez │ +5. │ Douglas Benson │ 150000 │ Charles Juarez │ +6. │ James Henderson │ 140000 │ Charles Juarez │ +7. │ Michael Stanley │ 100000 │ Charles Juarez │ + └─────────────────┴────────┴──────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/rank.md b/docs/en/sql-reference/window-functions/rank.md new file mode 100644 index 00000000000..dff5e154151 --- /dev/null +++ b/docs/en/sql-reference/window-functions/rank.md @@ -0,0 +1,74 @@ +--- +slug: /en/sql-reference/window-functions/rank +sidebar_label: rank +sidebar_position: 6 +--- + +# rank + +Ranks the current row within its partition with gaps. In other words, if the value of any row it encounters is equal to the value of a previous row then it will receive the same rank as that previous row. +The rank of the next row is then equal to the rank of the previous row plus a gap equal to the number of times the previous rank was given. + +The [dense_rank](./dense_rank.md) function provides the same behaviour but without gaps in ranking. + +**Syntax** + +```sql +rank (column_name) + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Returned value** + +- A number for the current row within its partition, including gaps. [UInt64](../data-types/int-uint.md). + +**Example** + +The following example is based on the example provided in the video instructional [Ranking window functions in ClickHouse](https://youtu.be/Yku9mmBYm_4?si=XIMu1jpYucCQEoXA). + +Query: + +```sql +CREATE TABLE salaries +( + `team` String, + `player` String, + `salary` UInt32, + `position` String +) +Engine = Memory; + +INSERT INTO salaries FORMAT Values + ('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'), + ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'), + ('Port Elizabeth Barbarians', 'Michael Stanley', 150000, 'D'), + ('New Coreystad Archdukes', 'Scott Harrison', 150000, 'D'), + ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'), + ('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'), + ('South Hampton Seagulls', 'James Henderson', 140000, 'M'); +``` + +```sql +SELECT player, salary, + rank() OVER (ORDER BY salary DESC) AS rank +FROM salaries; +``` + +Result: + +```response + ┌─player──────────┬─salary─┬─rank─┐ +1. │ Gary Chen │ 195000 │ 1 │ +2. │ Robert George │ 195000 │ 1 │ +3. │ Charles Juarez │ 190000 │ 3 │ +4. │ Douglas Benson │ 150000 │ 4 │ +5. │ Michael Stanley │ 150000 │ 4 │ +6. │ Scott Harrison │ 150000 │ 4 │ +7. │ James Henderson │ 140000 │ 7 │ + └─────────────────┴────────┴──────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/row_number.md b/docs/en/sql-reference/window-functions/row_number.md new file mode 100644 index 00000000000..f1c331f89a3 --- /dev/null +++ b/docs/en/sql-reference/window-functions/row_number.md @@ -0,0 +1,67 @@ +--- +slug: /en/sql-reference/window-functions/row_number +sidebar_label: row_number +sidebar_position: 2 +--- + +# row_number + +Numbers the current row within its partition starting from 1. + +**Syntax** + +```sql +row_number (column_name) + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Returned value** + +- A number for the current row within its partition. [UInt64](../data-types/int-uint.md). + +**Example** + +The following example is based on the example provided in the video instructional [Ranking window functions in ClickHouse](https://youtu.be/Yku9mmBYm_4?si=XIMu1jpYucCQEoXA). + +Query: + +```sql +CREATE TABLE salaries +( + `team` String, + `player` String, + `salary` UInt32, + `position` String +) +Engine = Memory; + +INSERT INTO salaries FORMAT Values + ('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'), + ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'), + ('Port Elizabeth Barbarians', 'Michael Stanley', 150000, 'D'), + ('New Coreystad Archdukes', 'Scott Harrison', 150000, 'D'), + ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'); +``` + +```sql +SELECT player, salary, + row_number() OVER (ORDER BY salary DESC) AS row_number +FROM salaries; +``` + +Result: + +```response + ┌─player──────────┬─salary─┬─row_number─┐ +1. │ Gary Chen │ 195000 │ 1 │ +2. │ Robert George │ 195000 │ 2 │ +3. │ Charles Juarez │ 190000 │ 3 │ +4. │ Scott Harrison │ 150000 │ 4 │ +5. │ Michael Stanley │ 150000 │ 5 │ + └─────────────────┴────────┴────────────┘ +``` \ No newline at end of file diff --git a/docs/ru/introduction/distinctive-features.md b/docs/ru/introduction/distinctive-features.md index dafaf055980..da820c90a1e 100644 --- a/docs/ru/introduction/distinctive-features.md +++ b/docs/ru/introduction/distinctive-features.md @@ -12,7 +12,7 @@ sidebar_label: "Отличительные возможности ClickHouse" Этот пункт пришлось выделить, так как существуют системы, которые могут хранить значения отдельных столбцов по отдельности, но не могут эффективно выполнять аналитические запросы в силу оптимизации под другой сценарий работы. Примеры: HBase, BigTable, Cassandra, HyperTable. В этих системах вы получите пропускную способность в районе сотен тысяч строк в секунду, но не сотен миллионов строк в секунду. -Также стоит заметить, что ClickHouse является системой управления базами данных, а не системой для одной базой данных. То есть, ClickHouse позволяет создавать таблицы и базы данных во время выполнения (runtime), загружать данные и выполнять запросы без переконфигурирования и перезапуска сервера. +Также стоит заметить, что ClickHouse является системой управления базами данных, а не системой для одной базы данных. То есть, ClickHouse позволяет создавать таблицы и базы данных во время выполнения (runtime), загружать данные и выполнять запросы без переконфигурирования и перезапуска сервера. ## Сжатие данных {#szhatie-dannykh} diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 3a70a0bac12..84bbf6c83d3 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -2077,7 +2077,7 @@ SELECT * FROM test_table - 0 — оптимизация отключена. - 1 — оптимизация включена. -Значение по умолчанию: `0`. +Значение по умолчанию: `1`. ## optimize_trivial_count_query {#optimize-trivial-count-query} diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 4640882f2be..6b3a0b16624 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -66,18 +66,18 @@ else() message(STATUS "Library bridge mode: OFF") endif() -if (ENABLE_CLICKHOUSE_KEEPER) - message(STATUS "ClickHouse keeper mode: ON") -else() - message(STATUS "ClickHouse keeper mode: OFF") -endif() - if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER) message(STATUS "ClickHouse keeper-converter mode: ON") else() message(STATUS "ClickHouse keeper-converter mode: OFF") endif() +if (ENABLE_CLICKHOUSE_KEEPER) + message(STATUS "ClickHouse Keeper: ON") +else() + message(STATUS "ClickHouse Keeper: OFF") +endif() + if (ENABLE_CLICKHOUSE_KEEPER_CLIENT) message(STATUS "ClickHouse keeper-client mode: ON") else() @@ -131,10 +131,6 @@ add_subdirectory (static-files-disk-uploader) add_subdirectory (su) add_subdirectory (disks) -if (ENABLE_CLICKHOUSE_KEEPER) - add_subdirectory (keeper) -endif() - if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER) add_subdirectory (keeper-converter) endif() @@ -143,6 +139,10 @@ if (ENABLE_CLICKHOUSE_KEEPER_CLIENT) add_subdirectory (keeper-client) endif() +if (ENABLE_CLICKHOUSE_KEEPER) + add_subdirectory (keeper) +endif() + if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) add_subdirectory (odbc-bridge) endif () diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 6343dc85d00..e4b40d98819 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -24,9 +24,8 @@ #include #include #include - +#include #include -#include #include #include @@ -49,6 +48,8 @@ #include #include +#include + namespace fs = std::filesystem; using namespace std::literals; @@ -1117,6 +1118,7 @@ void Client::processOptions(const OptionsDescription & options_description, if (!options["user"].defaulted()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "User and JWT flags can't be specified together"); config().setString("jwt", options["jwt"].as()); + config().setString("user", ""); } if (options.count("accept-invalid-certificate")) { diff --git a/programs/client/Client.h b/programs/client/Client.h index 229608f787d..6d57a6ea648 100644 --- a/programs/client/Client.h +++ b/programs/client/Client.h @@ -9,7 +9,10 @@ namespace DB class Client : public ClientBase { public: - Client() = default; + Client() + { + fuzzer = QueryFuzzer(randomSeed(), &std::cout, &std::cerr); + } void initialize(Poco::Util::Application & self) override; diff --git a/programs/disks/CMakeLists.txt b/programs/disks/CMakeLists.txt index f0949fcfceb..7e8afe084fb 100644 --- a/programs/disks/CMakeLists.txt +++ b/programs/disks/CMakeLists.txt @@ -1,6 +1,8 @@ set (CLICKHOUSE_DISKS_SOURCES DisksApp.cpp + DisksClient.cpp ICommand.cpp + CommandChangeDirectory.cpp CommandCopy.cpp CommandLink.cpp CommandList.cpp @@ -9,10 +11,14 @@ set (CLICKHOUSE_DISKS_SOURCES CommandMove.cpp CommandRead.cpp CommandRemove.cpp - CommandWrite.cpp) + CommandSwitchDisk.cpp + CommandWrite.cpp + CommandHelp.cpp + CommandTouch.cpp + CommandGetCurrentDiskAndPath.cpp) if (CLICKHOUSE_CLOUD) - set (CLICKHOUSE_DISKS_SOURCES ${CLICKHOUSE_DISKS_SOURCES} CommandPackedIO.cpp) + set (CLICKHOUSE_DISKS_SOURCES ${CLICKHOUSE_DISKS_SOURCES} CommandPackedIO.cpp) endif () set (CLICKHOUSE_DISKS_LINK diff --git a/programs/disks/CommandChangeDirectory.cpp b/programs/disks/CommandChangeDirectory.cpp new file mode 100644 index 00000000000..b545f37de72 --- /dev/null +++ b/programs/disks/CommandChangeDirectory.cpp @@ -0,0 +1,35 @@ +#include +#include +#include "DisksApp.h" +#include "DisksClient.h" +#include "ICommand.h" + +namespace DB +{ + +class CommandChangeDirectory final : public ICommand +{ +public: + explicit CommandChangeDirectory() : ICommand() + { + command_name = "cd"; + description = "Change directory (makes sense only in interactive mode)"; + options_description.add_options()("path", po::value(), "the path to which we want to change (mandatory, positional)")( + "disk", po::value(), "A disk where the path is changed (without disk switching)"); + positional_options_description.add("path", 1); + } + + void executeImpl(const CommandLineOptions & options, DisksClient & client) override + { + DiskWithPath & disk = getDiskWithPath(client, options, "disk"); + String path = getValueFromCommandLineOptionsThrow(options, "path"); + disk.setPath(path); + } +}; + +CommandPtr makeCommandChangeDirectory() +{ + return std::make_shared(); +} + +} diff --git a/programs/disks/CommandCopy.cpp b/programs/disks/CommandCopy.cpp index f176fa277d7..e3051f2702c 100644 --- a/programs/disks/CommandCopy.cpp +++ b/programs/disks/CommandCopy.cpp @@ -1,6 +1,8 @@ -#include "ICommand.h" #include +#include "Common/Exception.h" #include +#include "DisksClient.h" +#include "ICommand.h" namespace DB { @@ -10,59 +12,89 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } + class CommandCopy final : public ICommand { public: - CommandCopy() + explicit CommandCopy() : ICommand() { command_name = "copy"; - command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); - description = "Recursively copy data from `FROM_PATH` to `TO_PATH`"; - usage = "copy [OPTION]... "; - command_option_description->add_options() - ("disk-from", po::value(), "disk from which we copy") - ("disk-to", po::value(), "disk to which we copy"); + description = "Recursively copy data from `path-from` to `path-to`"; + options_description.add_options()( + "disk-from", po::value(), "disk from which we copy is executed (default value is a current disk)")( + "disk-to", po::value(), "disk to which copy is executed (default value is a current disk)")( + "path-from", po::value(), "path from which copy is executed (mandatory, positional)")( + "path-to", po::value(), "path to which copy is executed (mandatory, positional)")( + "recursive,r", "recursively copy the directory (required to remove a directory)"); + positional_options_description.add("path-from", 1); + positional_options_description.add("path-to", 1); } - void processOptions( - Poco::Util::LayeredConfiguration & config, - po::variables_map & options) const override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (options.count("disk-from")) - config.setString("disk-from", options["disk-from"].as()); - if (options.count("disk-to")) - config.setString("disk-to", options["disk-to"].as()); - } + auto disk_from = getDiskWithPath(client, options, "disk-from"); + auto disk_to = getDiskWithPath(client, options, "disk-to"); + String path_from = disk_from.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-from")); + String path_to = disk_to.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-to")); + bool recursive = options.count("recursive"); - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 2) + if (!disk_from.getDisk()->exists(path_from)) { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "cannot stat '{}' on disk '{}': No such file or directory", + path_from, + disk_from.getDisk()->getName()); } + else if (disk_from.getDisk()->isFile(path_from)) + { + auto target_location = getTargetLocation(path_from, disk_to, path_to); + if (!disk_to.getDisk()->exists(target_location) || disk_to.getDisk()->isFile(target_location)) + { + disk_from.getDisk()->copyFile( + path_from, + *disk_to.getDisk(), + target_location, + /* read_settings= */ {}, + /* write_settings= */ {}, + /* cancellation_hook= */ {}); + } + else + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "cannot overwrite directory {} with non-directory {}", target_location, path_from); + } + } + else if (disk_from.getDisk()->isDirectory(path_from)) + { + if (!recursive) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "--recursive not specified; omitting directory {}", path_from); + } + auto target_location = getTargetLocation(path_from, disk_to, path_to); - String disk_name_from = config.getString("disk-from", config.getString("disk", "default")); - String disk_name_to = config.getString("disk-to", config.getString("disk", "default")); - - const String & path_from = command_arguments[0]; - const String & path_to = command_arguments[1]; - - DiskPtr disk_from = disk_selector->get(disk_name_from); - DiskPtr disk_to = disk_selector->get(disk_name_to); - - String relative_path_from = validatePathAndGetAsRelative(path_from); - String relative_path_to = validatePathAndGetAsRelative(path_to); - - disk_from->copyDirectoryContent(relative_path_from, disk_to, relative_path_to, /* read_settings= */ {}, /* write_settings= */ {}, /* cancellation_hook= */ {}); + if (disk_to.getDisk()->isFile(target_location)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "cannot overwrite non-directory {} with directory {}", path_to, target_location); + } + else if (!disk_to.getDisk()->exists(target_location)) + { + disk_to.getDisk()->createDirectory(target_location); + } + disk_from.getDisk()->copyDirectoryContent( + path_from, + disk_to.getDisk(), + target_location, + /* read_settings= */ {}, + /* write_settings= */ {}, + /* cancellation_hook= */ {}); + } } }; + +CommandPtr makeCommandCopy() +{ + return std::make_shared(); } -std::unique_ptr makeCommandCopy() -{ - return std::make_unique(); } diff --git a/programs/disks/CommandGetCurrentDiskAndPath.cpp b/programs/disks/CommandGetCurrentDiskAndPath.cpp new file mode 100644 index 00000000000..15f8ef5aae8 --- /dev/null +++ b/programs/disks/CommandGetCurrentDiskAndPath.cpp @@ -0,0 +1,30 @@ +#include +#include +#include "DisksApp.h" +#include "DisksClient.h" +#include "ICommand.h" + +namespace DB +{ + +class CommandGetCurrentDiskAndPath final : public ICommand +{ +public: + explicit CommandGetCurrentDiskAndPath() : ICommand() + { + command_name = "current_disk_with_path"; + description = "Prints current disk and path (which coincide with the prompt)"; + } + + void executeImpl(const CommandLineOptions &, DisksClient & client) override + { + auto disk = client.getCurrentDiskWithPath(); + std::cout << "Disk: " << disk.getDisk()->getName() << "\nPath: " << disk.getCurrentPath() << std::endl; + } +}; + +CommandPtr makeCommandGetCurrentDiskAndPath() +{ + return std::make_shared(); +} +} diff --git a/programs/disks/CommandHelp.cpp b/programs/disks/CommandHelp.cpp new file mode 100644 index 00000000000..a3aee9498d3 --- /dev/null +++ b/programs/disks/CommandHelp.cpp @@ -0,0 +1,43 @@ +#include "DisksApp.h" +#include "ICommand.h" + +#include +#include + +namespace DB +{ + +class CommandHelp final : public ICommand +{ +public: + explicit CommandHelp(const DisksApp & disks_app_) : disks_app(disks_app_) + { + command_name = "help"; + description = "Print help message about available commands"; + options_description.add_options()( + "command", po::value(), "A command to help with (optional, positional), if not specified, help lists all the commands"); + positional_options_description.add("command", 1); + } + + void executeImpl(const CommandLineOptions & options, DisksClient & /*client*/) override + { + std::optional command = getValueFromCommandLineOptionsWithOptional(options, "command"); + if (command.has_value()) + { + disks_app.printCommandHelpMessage(command.value()); + } + else + { + disks_app.printAvailableCommandsHelpMessage(); + } + } + + const DisksApp & disks_app; +}; + +CommandPtr makeCommandHelp(const DisksApp & disks_app) +{ + return std::make_shared(disks_app); +} + +} diff --git a/programs/disks/CommandLink.cpp b/programs/disks/CommandLink.cpp index dbaa3162f82..11c196cafc5 100644 --- a/programs/disks/CommandLink.cpp +++ b/programs/disks/CommandLink.cpp @@ -1,14 +1,9 @@ -#include "ICommand.h" #include +#include "ICommand.h" namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - class CommandLink final : public ICommand { public: @@ -16,42 +11,27 @@ public: { command_name = "link"; description = "Create hardlink from `from_path` to `to_path`"; - usage = "link [OPTION]... "; + options_description.add_options()( + "path-from", po::value(), "the path from which a hard link will be created (mandatory, positional)")( + "path-to", po::value(), "the path where a hard link will be created (mandatory, positional)"); + positional_options_description.add("path-from", 1); + positional_options_description.add("path-to", 1); } - void processOptions( - Poco::Util::LayeredConfiguration &, - po::variables_map &) const override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - } + auto disk = client.getCurrentDiskWithPath(); - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 2) - { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } + const String & path_from = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-from")); + const String & path_to = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-to")); - String disk_name = config.getString("disk", "default"); - - const String & path_from = command_arguments[0]; - const String & path_to = command_arguments[1]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path_from = validatePathAndGetAsRelative(path_from); - String relative_path_to = validatePathAndGetAsRelative(path_to); - - disk->createHardLink(relative_path_from, relative_path_to); + disk.getDisk()->createHardLink(path_from, path_to); } }; + +CommandPtr makeCommandLink() +{ + return std::make_shared(); } -std::unique_ptr makeCommandLink() -{ - return std::make_unique(); } diff --git a/programs/disks/CommandList.cpp b/programs/disks/CommandList.cpp index 7213802ea86..77479b1d217 100644 --- a/programs/disks/CommandList.cpp +++ b/programs/disks/CommandList.cpp @@ -1,98 +1,95 @@ -#include "ICommand.h" #include #include +#include "DisksApp.h" +#include "DisksClient.h" +#include "ICommand.h" namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - class CommandList final : public ICommand { public: - CommandList() + explicit CommandList() : ICommand() { command_name = "list"; - command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); description = "List files at path[s]"; - usage = "list [OPTION]... ..."; - command_option_description->add_options() - ("recursive", "recursively list all directories"); + options_description.add_options()("recursive", "recursively list the directory")("all", "show hidden files")( + "path", po::value(), "the path of listing (mandatory, positional)"); + positional_options_description.add("path", 1); } - void processOptions( - Poco::Util::LayeredConfiguration & config, - po::variables_map & options) const override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (options.count("recursive")) - config.setBool("recursive", true); - } - - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 1) - { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } - - String disk_name = config.getString("disk", "default"); - - const String & path = command_arguments[0]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path = validatePathAndGetAsRelative(path); - - bool recursive = config.getBool("recursive", false); + bool recursive = options.count("recursive"); + bool show_hidden = options.count("all"); + auto disk = client.getCurrentDiskWithPath(); + String path = getValueFromCommandLineOptionsWithDefault(options, "path", "."); if (recursive) - listRecursive(disk, relative_path); + listRecursive(disk, path, show_hidden); else - list(disk, relative_path); + list(disk, path, show_hidden); } private: - static void list(const DiskPtr & disk, const std::string & relative_path) + static void list(const DiskWithPath & disk, const std::string & path, bool show_hidden) { - std::vector file_names; - disk->listFiles(relative_path, file_names); + std::vector file_names = disk.listAllFilesByPath(path); + std::vector selected_and_sorted_file_names{}; for (const auto & file_name : file_names) - std::cout << file_name << '\n'; + if (show_hidden || (!file_name.starts_with('.'))) + selected_and_sorted_file_names.push_back(file_name); + + std::sort(selected_and_sorted_file_names.begin(), selected_and_sorted_file_names.end()); + for (const auto & file_name : selected_and_sorted_file_names) + { + std::cout << file_name << "\n"; + } } - static void listRecursive(const DiskPtr & disk, const std::string & relative_path) + static void listRecursive(const DiskWithPath & disk, const std::string & relative_path, bool show_hidden) { - std::vector file_names; - disk->listFiles(relative_path, file_names); + std::vector file_names = disk.listAllFilesByPath(relative_path); + std::vector selected_and_sorted_file_names{}; std::cout << relative_path << ":\n"; - if (!file_names.empty()) - { - for (const auto & file_name : file_names) - std::cout << file_name << '\n'; - std::cout << "\n"; - } - for (const auto & file_name : file_names) + if (show_hidden || (!file_name.starts_with('.'))) + selected_and_sorted_file_names.push_back(file_name); + + std::sort(selected_and_sorted_file_names.begin(), selected_and_sorted_file_names.end()); + for (const auto & file_name : selected_and_sorted_file_names) { - auto path = relative_path.empty() ? file_name : (relative_path + "/" + file_name); - if (disk->isDirectory(path)) - listRecursive(disk, path); + std::cout << file_name << "\n"; + } + std::cout << "\n"; + + for (const auto & file_name : selected_and_sorted_file_names) + { + auto path = [&]() -> String + { + if (relative_path.ends_with("/")) + { + return relative_path + file_name; + } + else + { + return relative_path + "/" + file_name; + } + }(); + if (disk.isDirectory(path)) + { + listRecursive(disk, path, show_hidden); + } } } }; -} -std::unique_ptr makeCommandList() +CommandPtr makeCommandList() { - return std::make_unique(); + return std::make_shared(); +} } diff --git a/programs/disks/CommandListDisks.cpp b/programs/disks/CommandListDisks.cpp index 79da021fd00..9fb67fed5e0 100644 --- a/programs/disks/CommandListDisks.cpp +++ b/programs/disks/CommandListDisks.cpp @@ -1,68 +1,40 @@ -#include "ICommand.h" +#include #include +#include +#include "DisksClient.h" +#include "ICommand.h" namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - class CommandListDisks final : public ICommand { public: - CommandListDisks() + explicit CommandListDisks() : ICommand() { command_name = "list-disks"; - description = "List disks names"; - usage = "list-disks [OPTION]"; + description = "Lists all available disks"; } - void processOptions( - Poco::Util::LayeredConfiguration &, - po::variables_map &) const override - {} - - void execute( - const std::vector & command_arguments, - std::shared_ptr &, - Poco::Util::LayeredConfiguration & config) override + void executeImpl(const CommandLineOptions &, DisksClient & client) override { - if (!command_arguments.empty()) + std::vector sorted_and_selected{}; + for (const auto & disk_name : client.getAllDiskNames()) { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); + sorted_and_selected.push_back(disk_name + ":" + client.getDiskWithPath(disk_name).getAbsolutePath("")); } - - constexpr auto config_prefix = "storage_configuration.disks"; - constexpr auto default_disk_name = "default"; - - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(config_prefix, keys); - - bool has_default_disk = false; - - /// For the output to be ordered - std::set disks; - - for (const auto & disk_name : keys) + std::sort(sorted_and_selected.begin(), sorted_and_selected.end()); + for (const auto & disk_name : sorted_and_selected) { - if (disk_name == default_disk_name) - has_default_disk = true; - disks.insert(disk_name); + std::cout << disk_name << "\n"; } - - if (!has_default_disk) - disks.insert(default_disk_name); - - for (const auto & disk : disks) - std::cout << disk << '\n'; } -}; -} -std::unique_ptr makeCommandListDisks() +private: +}; + +CommandPtr makeCommandListDisks() { - return std::make_unique(); + return std::make_shared(); +} } diff --git a/programs/disks/CommandMkDir.cpp b/programs/disks/CommandMkDir.cpp index 6d33bdec498..c6222f326d4 100644 --- a/programs/disks/CommandMkDir.cpp +++ b/programs/disks/CommandMkDir.cpp @@ -6,61 +6,35 @@ namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - class CommandMkDir final : public ICommand { public: CommandMkDir() { command_name = "mkdir"; - command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); - description = "Create a directory"; - usage = "mkdir [OPTION]... "; - command_option_description->add_options() - ("recursive", "recursively create directories"); + description = "Creates a directory"; + options_description.add_options()("parents", "recursively create directories")( + "path", po::value(), "the path on which directory should be created (mandatory, positional)"); + positional_options_description.add("path", 1); } - void processOptions( - Poco::Util::LayeredConfiguration & config, - po::variables_map & options) const override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (options.count("recursive")) - config.setBool("recursive", true); - } + bool recursive = options.count("parents"); + auto disk = client.getCurrentDiskWithPath(); - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 1) - { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } - - String disk_name = config.getString("disk", "default"); - - const String & path = command_arguments[0]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path = validatePathAndGetAsRelative(path); - bool recursive = config.getBool("recursive", false); + String path = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path")); if (recursive) - disk->createDirectories(relative_path); + disk.getDisk()->createDirectories(path); else - disk->createDirectory(relative_path); + disk.getDisk()->createDirectory(path); } }; + +CommandPtr makeCommandMkDir() +{ + return std::make_shared(); } -std::unique_ptr makeCommandMkDir() -{ - return std::make_unique(); } diff --git a/programs/disks/CommandMove.cpp b/programs/disks/CommandMove.cpp index 75cf96252ed..e3d485032e0 100644 --- a/programs/disks/CommandMove.cpp +++ b/programs/disks/CommandMove.cpp @@ -1,5 +1,5 @@ -#include "ICommand.h" #include +#include "ICommand.h" namespace DB { @@ -9,6 +9,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } + class CommandMove final : public ICommand { public: @@ -16,44 +17,62 @@ public: { command_name = "move"; description = "Move file or directory from `from_path` to `to_path`"; - usage = "move [OPTION]... "; + options_description.add_options()("path-from", po::value(), "path from which we copy (mandatory, positional)")( + "path-to", po::value(), "path to which we copy (mandatory, positional)"); + positional_options_description.add("path-from", 1); + positional_options_description.add("path-to", 1); } - void processOptions( - Poco::Util::LayeredConfiguration &, - po::variables_map &) const override - {} - - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (command_arguments.size() != 2) + auto disk = client.getCurrentDiskWithPath(); + + String path_from = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-from")); + String path_to = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-to")); + + if (disk.getDisk()->isFile(path_from)) { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); + disk.getDisk()->moveFile(path_from, path_to); + } + else if (disk.getDisk()->isDirectory(path_from)) + { + auto target_location = getTargetLocation(path_from, disk, path_to); + if (!disk.getDisk()->exists(target_location)) + { + disk.getDisk()->createDirectory(target_location); + disk.getDisk()->moveDirectory(path_from, target_location); + } + else + { + if (disk.getDisk()->isFile(target_location)) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "cannot overwrite non-directory '{}' with directory '{}'", target_location, path_from); + } + if (!disk.getDisk()->isDirectoryEmpty(target_location)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "cannot move '{}' to '{}': Directory not empty", path_from, target_location); + } + else + { + disk.getDisk()->moveDirectory(path_from, target_location); + } + } + } + else if (!disk.getDisk()->exists(path_from)) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "cannot stat '{}' on disk: '{}': No such file or directory", + path_from, + disk.getDisk()->getName()); } - - String disk_name = config.getString("disk", "default"); - - const String & path_from = command_arguments[0]; - const String & path_to = command_arguments[1]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path_from = validatePathAndGetAsRelative(path_from); - String relative_path_to = validatePathAndGetAsRelative(path_to); - - if (disk->isFile(relative_path_from)) - disk->moveFile(relative_path_from, relative_path_to); - else - disk->moveDirectory(relative_path_from, relative_path_to); } }; + +CommandPtr makeCommandMove() +{ + return std::make_shared(); } -std::unique_ptr makeCommandMove() -{ - return std::make_unique(); } diff --git a/programs/disks/CommandRead.cpp b/programs/disks/CommandRead.cpp index 0f3ac7ab98c..277e735f507 100644 --- a/programs/disks/CommandRead.cpp +++ b/programs/disks/CommandRead.cpp @@ -1,78 +1,52 @@ -#include "ICommand.h" -#include #include #include #include +#include #include +#include "ICommand.h" namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - class CommandRead final : public ICommand { public: CommandRead() { command_name = "read"; - command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); - description = "Read a file from `FROM_PATH` to `TO_PATH`"; - usage = "read [OPTION]... []"; - command_option_description->add_options() - ("output", po::value(), "file to which we are reading, defaults to `stdout`"); + description = "Read a file from `path-from` to `path-to`"; + options_description.add_options()("path-from", po::value(), "file from which we are reading (mandatory, positional)")( + "path-to", po::value(), "file to which we are writing, defaults to `stdout`"); + positional_options_description.add("path-from", 1); } - void processOptions( - Poco::Util::LayeredConfiguration & config, - po::variables_map & options) const override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (options.count("output")) - config.setString("output", options["output"].as()); - } + auto disk = client.getCurrentDiskWithPath(); + String path_from = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-from")); + std::optional path_to = getValueFromCommandLineOptionsWithOptional(options, "path-to"); - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 1) + auto in = disk.getDisk()->readFile(path_from); + std::unique_ptr out = {}; + if (path_to.has_value()) { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } - - String disk_name = config.getString("disk", "default"); - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path = validatePathAndGetAsRelative(command_arguments[0]); - - String path_output = config.getString("output", ""); - - if (!path_output.empty()) - { - String relative_path_output = validatePathAndGetAsRelative(path_output); - - auto in = disk->readFile(relative_path); - auto out = disk->writeFile(relative_path_output); + String relative_path_to = disk.getRelativeFromRoot(path_to.value()); + out = disk.getDisk()->writeFile(relative_path_to); copyData(*in, *out); - out->finalize(); } else { - auto in = disk->readFile(relative_path); - std::unique_ptr out = std::make_unique(STDOUT_FILENO); + out = std::make_unique(STDOUT_FILENO); copyData(*in, *out); + out->write('\n'); } + out->finalize(); } }; + +CommandPtr makeCommandRead() +{ + return std::make_shared(); } -std::unique_ptr makeCommandRead() -{ - return std::make_unique(); } diff --git a/programs/disks/CommandRemove.cpp b/programs/disks/CommandRemove.cpp index 0c631eacff3..4388f6c0b14 100644 --- a/programs/disks/CommandRemove.cpp +++ b/programs/disks/CommandRemove.cpp @@ -1,5 +1,6 @@ -#include "ICommand.h" #include +#include "Common/Exception.h" +#include "ICommand.h" namespace DB { @@ -9,46 +10,49 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } + class CommandRemove final : public ICommand { public: CommandRemove() { command_name = "remove"; - description = "Remove file or directory with all children. Throws exception if file doesn't exists.\nPath should be in format './' or './path' or 'path'"; - usage = "remove [OPTION]... "; + description = "Remove file or directory. Throws exception if file doesn't exists"; + options_description.add_options()("path", po::value(), "path that is going to be deleted (mandatory, positional)")( + "recursive,r", "recursively removes the directory (required to remove a directory)"); + positional_options_description.add("path", 1); } - void processOptions( - Poco::Util::LayeredConfiguration &, - po::variables_map &) const override - {} - - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (command_arguments.size() != 1) + auto disk = client.getCurrentDiskWithPath(); + const String & path = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path")); + bool recursive = options.count("recursive"); + if (!disk.getDisk()->exists(path)) { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path {} on disk {} doesn't exist", path, disk.getDisk()->getName()); + } + else if (disk.getDisk()->isDirectory(path)) + { + if (!recursive) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "cannot remove '{}': Is a directory", path); + } + else + { + disk.getDisk()->removeRecursive(path); + } + } + else + { + disk.getDisk()->removeFileIfExists(path); } - - String disk_name = config.getString("disk", "default"); - - const String & path = command_arguments[0]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path = validatePathAndGetAsRelative(path); - - disk->removeRecursive(relative_path); } }; + +CommandPtr makeCommandRemove() +{ + return std::make_shared(); } -std::unique_ptr makeCommandRemove() -{ - return std::make_unique(); } diff --git a/programs/disks/CommandSwitchDisk.cpp b/programs/disks/CommandSwitchDisk.cpp new file mode 100644 index 00000000000..fa02d991365 --- /dev/null +++ b/programs/disks/CommandSwitchDisk.cpp @@ -0,0 +1,35 @@ +#include +#include +#include +#include "DisksApp.h" +#include "ICommand.h" + +namespace DB +{ + +class CommandSwitchDisk final : public ICommand +{ +public: + explicit CommandSwitchDisk() : ICommand() + { + command_name = "switch-disk"; + description = "Switch disk (makes sense only in interactive mode)"; + options_description.add_options()("disk", po::value(), "the disk to switch to (mandatory, positional)")( + "path", po::value(), "the path to switch on the disk"); + positional_options_description.add("disk", 1); + } + + void executeImpl(const CommandLineOptions & options, DisksClient & client) override + { + String disk = getValueFromCommandLineOptions(options, "disk"); + std::optional path = getValueFromCommandLineOptionsWithOptional(options, "path"); + + client.switchToDisk(disk, path); + } +}; + +CommandPtr makeCommandSwitchDisk() +{ + return std::make_shared(); +} +} diff --git a/programs/disks/CommandTouch.cpp b/programs/disks/CommandTouch.cpp new file mode 100644 index 00000000000..c0bdb64cf9e --- /dev/null +++ b/programs/disks/CommandTouch.cpp @@ -0,0 +1,34 @@ +#include +#include +#include "DisksApp.h" +#include "DisksClient.h" +#include "ICommand.h" + +namespace DB +{ + +class CommandTouch final : public ICommand +{ +public: + explicit CommandTouch() : ICommand() + { + command_name = "touch"; + description = "Create a file by path"; + options_description.add_options()("path", po::value(), "the path of listing (mandatory, positional)"); + positional_options_description.add("path", 1); + } + + void executeImpl(const CommandLineOptions & options, DisksClient & client) override + { + auto disk = client.getCurrentDiskWithPath(); + String path = getValueFromCommandLineOptionsThrow(options, "path"); + + disk.getDisk()->createFile(disk.getRelativeFromRoot(path)); + } +}; + +CommandPtr makeCommandTouch() +{ + return std::make_shared(); +} +} diff --git a/programs/disks/CommandWrite.cpp b/programs/disks/CommandWrite.cpp index 7ded37e067a..9c82132e284 100644 --- a/programs/disks/CommandWrite.cpp +++ b/programs/disks/CommandWrite.cpp @@ -1,79 +1,57 @@ -#include "ICommand.h" #include +#include "ICommand.h" -#include #include #include #include +#include namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - class CommandWrite final : public ICommand { public: CommandWrite() { command_name = "write"; - command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); - description = "Write a file from `FROM_PATH` to `TO_PATH`"; - usage = "write [OPTION]... [] "; - command_option_description->add_options() - ("input", po::value(), "file from which we are reading, defaults to `stdin`"); + description = "Write a file from `path-from` to `path-to`"; + options_description.add_options()("path-from", po::value(), "file from which we are reading, defaults to `stdin` (input from `stdin` is finished by Ctrl+D)")( + "path-to", po::value(), "file to which we are writing (mandatory, positional)"); + positional_options_description.add("path-to", 1); } - void processOptions( - Poco::Util::LayeredConfiguration & config, - po::variables_map & options) const override + + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (options.count("input")) - config.setString("input", options["input"].as()); - } + auto disk = client.getCurrentDiskWithPath(); - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 1) + std::optional path_from = getValueFromCommandLineOptionsWithOptional(options, "path-from"); + + String path_to = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-to")); + + auto in = [&]() -> std::unique_ptr { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } + if (!path_from.has_value()) + { + return std::make_unique(STDIN_FILENO); + } + else + { + String relative_path_from = disk.getRelativeFromRoot(path_from.value()); + return disk.getDisk()->readFile(relative_path_from); + } + }(); - String disk_name = config.getString("disk", "default"); - - const String & path = command_arguments[0]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path = validatePathAndGetAsRelative(path); - - String path_input = config.getString("input", ""); - std::unique_ptr in; - if (path_input.empty()) - { - in = std::make_unique(STDIN_FILENO); - } - else - { - String relative_path_input = validatePathAndGetAsRelative(path_input); - in = disk->readFile(relative_path_input); - } - - auto out = disk->writeFile(relative_path); + auto out = disk.getDisk()->writeFile(path_to); copyData(*in, *out); out->finalize(); } }; + +CommandPtr makeCommandWrite() +{ + return std::make_shared(); } -std::unique_ptr makeCommandWrite() -{ - return std::make_unique(); } diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index 5da5ab4bae9..59ba45b9451 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -1,11 +1,22 @@ #include "DisksApp.h" +#include +#include +#include "Common/Exception.h" +#include "Common/filesystemHelpers.h" +#include +#include "DisksClient.h" #include "ICommand.h" +#include "ICommand_fwd.h" + +#include +#include +#include +#include #include -#include #include - +#include namespace DB { @@ -13,74 +24,289 @@ namespace DB namespace ErrorCodes { extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; +}; + +LineReader::Patterns DisksApp::query_extenders = {"\\"}; +LineReader::Patterns DisksApp::query_delimiters = {""}; +String DisksApp::word_break_characters = " \t\v\f\a\b\r\n"; + +CommandPtr DisksApp::getCommandByName(const String & command) const +{ + try + { + if (auto it = aliases.find(command); it != aliases.end()) + return command_descriptions.at(it->second); + + return command_descriptions.at(command); + } + catch (std::out_of_range &) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The command `{}` is unknown", command); + } } -size_t DisksApp::findCommandPos(std::vector & common_arguments) +std::vector DisksApp::getEmptyCompletion(String command_name) const { - for (size_t i = 0; i < common_arguments.size(); i++) - if (supported_commands.contains(common_arguments[i])) - return i + 1; - return common_arguments.size(); + auto command_ptr = command_descriptions.at(command_name); + std::vector answer{}; + if (multidisk_commands.contains(command_ptr->command_name)) + { + answer = client->getAllFilesByPatternFromAllDisks(""); + } + else + { + answer = client->getCurrentDiskWithPath().getAllFilesByPattern(""); + } + for (const auto & disk_name : client->getAllDiskNames()) + { + answer.push_back(disk_name); + } + for (const auto & option : command_ptr->options_description.options()) + { + answer.push_back("--" + option->long_name()); + } + if (command_name == "help") + { + for (const auto & [current_command_name, description] : command_descriptions) + { + answer.push_back(current_command_name); + } + } + std::sort(answer.begin(), answer.end()); + return answer; } -void DisksApp::printHelpMessage(ProgramOptionsDescription & command_option_description) +std::vector DisksApp::getCommandsToComplete(const String & command_prefix) const { - std::optional help_description = - createOptionsDescription("Help Message for clickhouse-disks", getTerminalWidth()); - - help_description->add(command_option_description); - - std::cout << "ClickHouse disk management tool\n"; - std::cout << "Usage: ./clickhouse-disks [OPTION]\n"; - std::cout << "clickhouse-disks\n\n"; - - for (const auto & current_command : supported_commands) - std::cout << command_descriptions[current_command]->command_name - << "\t" - << command_descriptions[current_command]->description - << "\n\n"; - - std::cout << command_option_description << '\n'; + std::vector answer{}; + for (const auto & [word, _] : command_descriptions) + { + if (word.starts_with(command_prefix)) + { + answer.push_back(word); + } + } + if (!answer.empty()) + { + std::sort(answer.begin(), answer.end()); + return answer; + } + for (const auto & [word, _] : aliases) + { + if (word.starts_with(command_prefix)) + { + answer.push_back(word); + } + } + if (!answer.empty()) + { + std::sort(answer.begin(), answer.end()); + return answer; + } + return {command_prefix}; } -String DisksApp::getDefaultConfigFileName() +std::vector DisksApp::getCompletions(const String & prefix) const { - return "/etc/clickhouse-server/config.xml"; + auto arguments = po::split_unix(prefix, word_break_characters); + if (arguments.empty()) + { + return {}; + } + if (word_break_characters.contains(prefix.back())) + { + CommandPtr command; + try + { + command = getCommandByName(arguments[0]); + } + catch (...) + { + return {arguments.back()}; + } + return getEmptyCompletion(command->command_name); + } + else if (arguments.size() == 1) + { + String command_prefix = arguments[0]; + return getCommandsToComplete(command_prefix); + } + else + { + String last_token = arguments.back(); + CommandPtr command; + try + { + command = getCommandByName(arguments[0]); + } + catch (...) + { + return {last_token}; + } + std::vector answer = {}; + if (command->command_name == "help") + { + return getCommandsToComplete(last_token); + } + else + { + answer = [&]() -> std::vector + { + if (multidisk_commands.contains(command->command_name)) + { + return client->getAllFilesByPatternFromAllDisks(last_token); + } + else + { + return client->getCurrentDiskWithPath().getAllFilesByPattern(last_token); + } + }(); + + for (const auto & disk_name : client->getAllDiskNames()) + { + if (disk_name.starts_with(last_token)) + { + answer.push_back(disk_name); + } + } + for (const auto & option : command->options_description.options()) + { + String option_sign = "--" + option->long_name(); + if (option_sign.starts_with(last_token)) + { + answer.push_back(option_sign); + } + } + } + if (!answer.empty()) + { + std::sort(answer.begin(), answer.end()); + return answer; + } + else + { + return {last_token}; + } + } } -void DisksApp::addOptions( - ProgramOptionsDescription & options_description_, - boost::program_options::positional_options_description & positional_options_description -) +bool DisksApp::processQueryText(const String & text) { - options_description_.add_options() - ("help,h", "Print common help message") - ("config-file,C", po::value(), "Set config file") - ("disk", po::value(), "Set disk name") - ("command_name", po::value(), "Name for command to do") - ("save-logs", "Save logs to a file") - ("log-level", po::value(), "Logging level") - ; + if (text.find_first_not_of(word_break_characters) == std::string::npos) + { + return true; + } + if (exit_strings.find(text) != exit_strings.end()) + return false; + CommandPtr command; + try + { + auto arguments = po::split_unix(text, word_break_characters); + command = getCommandByName(arguments[0]); + arguments.erase(arguments.begin()); + command->execute(arguments, *client); + } + catch (DB::Exception & err) + { + int code = getCurrentExceptionCode(); + if (code == ErrorCodes::LOGICAL_ERROR) + { + throw std::move(err); + } + else if (code == ErrorCodes::BAD_ARGUMENTS) + { + std::cerr << err.message() << "\n" + << "\n"; + if (command.get()) + { + std::cerr << "COMMAND: " << command->command_name << "\n"; + std::cerr << command->options_description << "\n"; + } + else + { + printAvailableCommandsHelpMessage(); + } + } + else + { + std::cerr << err.message() << "\n"; + } + } + catch (std::exception & err) + { + std::cerr << err.what() << "\n"; + } - positional_options_description.add("command_name", 1); + return true; +} - supported_commands = {"list-disks", "list", "move", "remove", "link", "copy", "write", "read", "mkdir"}; -#ifdef CLICKHOUSE_CLOUD - supported_commands.insert("packed-io"); -#endif +void DisksApp::runInteractiveReplxx() +{ + ReplxxLineReader lr( + suggest, + history_file, + /* multiline= */ false, + query_extenders, + query_delimiters, + word_break_characters.c_str(), + /* highlighter_= */ {}); + lr.enableBracketedPaste(); + + while (true) + { + DiskWithPath disk_with_path = client->getCurrentDiskWithPath(); + String prompt = "\x1b[1;34m" + disk_with_path.getDisk()->getName() + "\x1b[0m:" + "\x1b[1;31m" + disk_with_path.getCurrentPath() + + "\x1b[0m$ "; + + auto input = lr.readLine(prompt, "\x1b[1;31m:-] \x1b[0m"); + if (input.empty()) + break; + + if (!processQueryText(input)) + break; + } +} + +void DisksApp::parseAndCheckOptions( + const std::vector & arguments, const ProgramOptionsDescription & options_description, CommandLineOptions & options) +{ + auto parser = po::command_line_parser(arguments).options(options_description).allow_unregistered(); + po::parsed_options parsed = parser.run(); + po::store(parsed, options); +} + +void DisksApp::addOptions() +{ + options_description.add_options()("help,h", "Print common help message")("config-file,C", po::value(), "Set config file")( + "disk", po::value(), "Set disk name")("save-logs", "Save logs to a file")( + "log-level", po::value(), "Logging level")("query,q", po::value(), "Query for a non-interactive mode")( + "test-mode", "Interactive interface in test regyme"); command_descriptions.emplace("list-disks", makeCommandListDisks()); + command_descriptions.emplace("copy", makeCommandCopy()); command_descriptions.emplace("list", makeCommandList()); + command_descriptions.emplace("cd", makeCommandChangeDirectory()); command_descriptions.emplace("move", makeCommandMove()); command_descriptions.emplace("remove", makeCommandRemove()); command_descriptions.emplace("link", makeCommandLink()); - command_descriptions.emplace("copy", makeCommandCopy()); command_descriptions.emplace("write", makeCommandWrite()); command_descriptions.emplace("read", makeCommandRead()); command_descriptions.emplace("mkdir", makeCommandMkDir()); + command_descriptions.emplace("switch-disk", makeCommandSwitchDisk()); + command_descriptions.emplace("current_disk_with_path", makeCommandGetCurrentDiskAndPath()); + command_descriptions.emplace("touch", makeCommandTouch()); + command_descriptions.emplace("help", makeCommandHelp(*this)); #ifdef CLICKHOUSE_CLOUD command_descriptions.emplace("packed-io", makeCommandPackedIO()); #endif + for (const auto & [command_name, command_ptr] : command_descriptions) + { + if (command_name != command_ptr->command_name) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Command name inside map doesn't coincide with actual command name"); + } + } } void DisksApp::processOptions() @@ -93,76 +319,122 @@ void DisksApp::processOptions() config().setBool("save-logs", true); if (options.count("log-level")) config().setString("log-level", options["log-level"].as()); + if (options.count("test-mode")) + config().setBool("test-mode", true); + if (options.count("query")) + query = std::optional{options["query"].as()}; } -DisksApp::~DisksApp() + +void DisksApp::printEntryHelpMessage() const { - if (global_context) - global_context->shutdown(); + std::cout << "\x1b[1;33m ClickHouse disk management tool \x1b[0m \n"; + std::cout << options_description << '\n'; } -void DisksApp::init(std::vector & common_arguments) + +void DisksApp::printAvailableCommandsHelpMessage() const { - stopOptionsProcessing(); + std::cout << "\x1b[1;32mAvailable commands:\x1b[0m\n"; + std::vector> commands_with_aliases_and_descrtiptions{}; + size_t maximal_command_length = 0; + for (const auto & [command_name, command_ptr] : command_descriptions) + { + std::string command_string = getCommandLineWithAliases(command_ptr); + maximal_command_length = std::max(maximal_command_length, command_string.size()); + commands_with_aliases_and_descrtiptions.push_back({std::move(command_string), command_descriptions.at(command_name)}); + } + for (const auto & [command_with_aliases, command_ptr] : commands_with_aliases_and_descrtiptions) + { + std::cout << "\x1b[1;33m" << command_with_aliases << "\x1b[0m" << std::string(5, ' ') << "\x1b[1;33m" << command_ptr->description + << "\x1b[0m \n"; + std::cout << command_ptr->options_description; + std::cout << std::endl; + } +} - ProgramOptionsDescription options_description{createOptionsDescription("clickhouse-disks", getTerminalWidth())}; +void DisksApp::printCommandHelpMessage(CommandPtr command) const +{ + String command_name_with_aliases = getCommandLineWithAliases(command); + std::cout << "\x1b[1;32m" << command_name_with_aliases << "\x1b[0m" << std::string(2, ' ') << command->description << "\n"; + std::cout << command->options_description; +} - po::positional_options_description positional_options_description; +void DisksApp::printCommandHelpMessage(String command_name) const +{ + printCommandHelpMessage(getCommandByName(command_name)); +} - addOptions(options_description, positional_options_description); +String DisksApp::getCommandLineWithAliases(CommandPtr command) const +{ + String command_string = command->command_name; + bool need_comma = false; + for (const auto & [alias_name, alias_command_name] : aliases) + { + if (alias_command_name == command->command_name) + { + if (std::exchange(need_comma, true)) + command_string += ","; + else + command_string += "("; + command_string += alias_name; + } + } + command_string += (need_comma ? ")" : ""); + return command_string; +} - size_t command_pos = findCommandPos(common_arguments); - std::vector global_flags(command_pos); - command_arguments.resize(common_arguments.size() - command_pos); - copy(common_arguments.begin(), common_arguments.begin() + command_pos, global_flags.begin()); - copy(common_arguments.begin() + command_pos, common_arguments.end(), command_arguments.begin()); +void DisksApp::initializeHistoryFile() +{ + String home_path; + const char * home_path_cstr = getenv("HOME"); // NOLINT(concurrency-mt-unsafe) + if (home_path_cstr) + home_path = home_path_cstr; + if (config().has("history-file")) + history_file = config().getString("history-file"); + else + history_file = home_path + "/.disks-file-history"; - parseAndCheckOptions(options_description, positional_options_description, global_flags); + if (!history_file.empty() && !fs::exists(history_file)) + { + try + { + FS::createFile(history_file); + } + catch (const ErrnoException & e) + { + if (e.getErrno() != EEXIST) + throw; + } + } +} + +void DisksApp::init(const std::vector & common_arguments) +{ + addOptions(); + parseAndCheckOptions(common_arguments, options_description, options); po::notify(options); if (options.count("help")) { - printHelpMessage(options_description); + printEntryHelpMessage(); + printAvailableCommandsHelpMessage(); exit(0); // NOLINT(concurrency-mt-unsafe) } - if (!supported_commands.contains(command_name)) - { - std::cerr << "Unknown command name: " << command_name << "\n"; - printHelpMessage(options_description); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } - processOptions(); } -void DisksApp::parseAndCheckOptions( - ProgramOptionsDescription & options_description_, - boost::program_options::positional_options_description & positional_options_description, - std::vector & arguments) +String DisksApp::getDefaultConfigFileName() { - auto parser = po::command_line_parser(arguments) - .options(options_description_) - .positional(positional_options_description) - .allow_unregistered(); - - po::parsed_options parsed = parser.run(); - po::store(parsed, options); - - auto positional_arguments = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); - for (const auto & arg : positional_arguments) - { - if (command_descriptions.contains(arg)) - { - command_name = arg; - break; - } - } + return "/etc/clickhouse-server/config.xml"; } int DisksApp::main(const std::vector & /*args*/) { + std::vector keys; + config().keys(keys); if (config().has("config-file") || fs::exists(getDefaultConfigFileName())) { String config_path = config().getString("config-file", getDefaultConfigFileName()); @@ -173,9 +445,13 @@ int DisksApp::main(const std::vector & /*args*/) } else { + printEntryHelpMessage(); throw Exception(ErrorCodes::BAD_ARGUMENTS, "No config-file specified"); } + config().keys(keys); + initializeHistoryFile(); + if (config().has("save-logs")) { auto log_level = config().getString("log-level", "trace"); @@ -200,61 +476,68 @@ int DisksApp::main(const std::vector & /*args*/) global_context->setApplicationType(Context::ApplicationType::DISKS); String path = config().getString("path", DBMS_DEFAULT_PATH); + global_context->setPath(path); - auto & command = command_descriptions[command_name]; + String main_disk = config().getString("disk", "default"); - auto command_options = command->getCommandOptions(); - std::vector args; - if (command_options) + auto validator = [](const Poco::Util::AbstractConfiguration &, const std::string &, const std::string &) { return true; }; + + constexpr auto config_prefix = "storage_configuration.disks"; + auto disk_selector = std::make_shared(std::unordered_set{"cache", "encrypted"}); + disk_selector->initialize(config(), config_prefix, global_context, validator); + + std::vector>> disks_with_path; + + for (const auto & [_, disk_ptr] : disk_selector->getDisksMap()) { - auto parser = po::command_line_parser(command_arguments).options(*command_options).allow_unregistered(); - po::parsed_options parsed = parser.run(); - po::store(parsed, options); - po::notify(options); + disks_with_path.emplace_back( + disk_ptr, (disk_ptr->getName() == "local") ? std::optional{fs::current_path().string()} : std::nullopt); + } - args = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); - command->processOptions(config(), options); + + client = std::make_unique(std::move(disks_with_path), main_disk); + + suggest.setCompletionsCallback([&](const String & prefix, size_t /* prefix_length */) { return getCompletions(prefix); }); + + if (!query.has_value()) + { + runInteractive(); } else { - auto parser = po::command_line_parser(command_arguments).options({}).allow_unregistered(); - po::parsed_options parsed = parser.run(); - args = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); + processQueryText(query.value()); } - std::unordered_set disks - { - config().getString("disk", "default"), - config().getString("disk-from", config().getString("disk", "default")), - config().getString("disk-to", config().getString("disk", "default")), - }; - - auto validator = [&disks]( - const Poco::Util::AbstractConfiguration & config, - const std::string & disk_config_prefix, - const std::string & disk_name) - { - if (!disks.contains(disk_name)) - return false; - - const auto disk_type = config.getString(disk_config_prefix + ".type", "local"); - - if (disk_type == "cache") - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Disk type 'cache' of disk {} is not supported by clickhouse-disks", disk_name); - - return true; - }; - - constexpr auto config_prefix = "storage_configuration.disks"; - auto disk_selector = std::make_shared(); - disk_selector->initialize(config(), config_prefix, global_context, validator); - - command->execute(args, disk_selector, config()); - return Application::EXIT_OK; } +DisksApp::~DisksApp() +{ + client.reset(nullptr); + if (global_context) + global_context->shutdown(); +} + +void DisksApp::runInteractiveTestMode() +{ + for (String input; std::getline(std::cin, input);) + { + if (!processQueryText(input)) + break; + + std::cout << "\a\a\a\a" << std::endl; + std::cerr << std::flush; + } +} + +void DisksApp::runInteractive() +{ + if (config().hasOption("test-mode")) + runInteractiveTestMode(); + else + runInteractiveReplxx(); +} } int mainEntryClickHouseDisks(int argc, char ** argv) @@ -269,16 +552,16 @@ int mainEntryClickHouseDisks(int argc, char ** argv) catch (const DB::Exception & e) { std::cerr << DB::getExceptionMessage(e, false) << std::endl; - return 1; + return 0; } catch (const boost::program_options::error & e) { std::cerr << "Bad arguments: " << e.what() << std::endl; - return DB::ErrorCodes::BAD_ARGUMENTS; + return 0; } catch (...) { std::cerr << DB::getCurrentExceptionMessage(true) << std::endl; - return 1; + return 0; } } diff --git a/programs/disks/DisksApp.h b/programs/disks/DisksApp.h index 51bc3f58dc4..f8167884c62 100644 --- a/programs/disks/DisksApp.h +++ b/programs/disks/DisksApp.h @@ -1,61 +1,107 @@ #pragma once +#include +#include +#include #include +#include "DisksClient.h" +#include "ICommand_fwd.h" #include +#include +#include #include -#include - namespace DB { -class ICommand; -using CommandPtr = std::unique_ptr; - -namespace po = boost::program_options; using ProgramOptionsDescription = boost::program_options::options_description; using CommandLineOptions = boost::program_options::variables_map; -class DisksApp : public Poco::Util::Application, public Loggers +class DisksApp : public Poco::Util::Application { public: - DisksApp() = default; - ~DisksApp() override; + void addOptions(); - void init(std::vector & common_arguments); - - int main(const std::vector & args) override; - -protected: - static String getDefaultConfigFileName(); - - void addOptions( - ProgramOptionsDescription & options_description, - boost::program_options::positional_options_description & positional_options_description); void processOptions(); - void printHelpMessage(ProgramOptionsDescription & command_option_description); + bool processQueryText(const String & text); - size_t findCommandPos(std::vector & common_arguments); + void init(const std::vector & common_arguments); + + int main(const std::vector & /*args*/) override; + + CommandPtr getCommandByName(const String & command) const; + + void initializeHistoryFile(); + + static void parseAndCheckOptions( + const std::vector & arguments, const ProgramOptionsDescription & options_description, CommandLineOptions & options); + + void printEntryHelpMessage() const; + void printAvailableCommandsHelpMessage() const; + void printCommandHelpMessage(String command_name) const; + void printCommandHelpMessage(CommandPtr command) const; + String getCommandLineWithAliases(CommandPtr command) const; + + + std::vector getCompletions(const String & prefix) const; + + std::vector getEmptyCompletion(String command_name) const; + + ~DisksApp() override; private: - void parseAndCheckOptions( - ProgramOptionsDescription & options_description, - boost::program_options::positional_options_description & positional_options_description, - std::vector & arguments); + void runInteractive(); + void runInteractiveReplxx(); + void runInteractiveTestMode(); + + String getDefaultConfigFileName(); + + std::vector getCommandsToComplete(const String & command_prefix) const; + + // Fields responsible for the REPL work + String history_file; + LineReader::Suggest suggest; + static LineReader::Patterns query_extenders; + static LineReader::Patterns query_delimiters; + static String word_break_characters; + + // General command line arguments parsing fields -protected: - ContextMutablePtr global_context; SharedContextHolder shared_context; - - String command_name; - std::vector command_arguments; - - std::unordered_set supported_commands; + ContextMutablePtr global_context; + ProgramOptionsDescription options_description; + CommandLineOptions options; std::unordered_map command_descriptions; - po::variables_map options; -}; + std::optional query; + const std::unordered_map aliases + = {{"cp", "copy"}, + {"mv", "move"}, + {"ls", "list"}, + {"list_disks", "list-disks"}, + {"ln", "link"}, + {"rm", "remove"}, + {"cat", "read"}, + {"r", "read"}, + {"w", "write"}, + {"create", "touch"}, + {"delete", "remove"}, + {"ls-disks", "list-disks"}, + {"ls_disks", "list-disks"}, + {"packed_io", "packed-io"}, + {"change-dir", "cd"}, + {"change_dir", "cd"}, + {"switch_disk", "switch-disk"}, + {"current", "current_disk_with_path"}, + {"current_disk", "current_disk_with_path"}, + {"current_path", "current_disk_with_path"}, + {"cur", "current_disk_with_path"}}; + + std::set multidisk_commands = {"copy", "packed-io", "switch-disk", "cd"}; + + std::unique_ptr client{}; +}; } diff --git a/programs/disks/DisksClient.cpp b/programs/disks/DisksClient.cpp new file mode 100644 index 00000000000..7e36c7911ab --- /dev/null +++ b/programs/disks/DisksClient.cpp @@ -0,0 +1,263 @@ +#include "DisksClient.h" +#include +#include +#include +#include + +#include + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; +}; + +namespace DB +{ +DiskWithPath::DiskWithPath(DiskPtr disk_, std::optional path_) : disk(disk_) +{ + if (path_.has_value()) + { + if (!fs::path{path_.value()}.is_absolute()) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Initializing path {} is not absolute", path_.value()); + } + path = path_.value(); + } + else + { + path = String{"/"}; + } + + String relative_path = normalizePathAndGetAsRelative(path); + if (disk->isDirectory(relative_path) || (relative_path.empty() && (disk->isDirectory("/")))) + { + return; + } + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Initializing path {} (normalized path: {}) at disk {} is not a directory", + path, + relative_path, + disk->getName()); +} + +std::vector DiskWithPath::listAllFilesByPath(const String & any_path) const +{ + if (isDirectory(any_path)) + { + std::vector file_names; + disk->listFiles(getRelativeFromRoot(any_path), file_names); + return file_names; + } + else + { + return {}; + } +} + +std::vector DiskWithPath::getAllFilesByPattern(const String & pattern) const +{ + auto [path_before, path_after] = [&]() -> std::pair + { + auto slash_pos = pattern.find_last_of('/'); + if (slash_pos >= pattern.size()) + { + return {"", pattern}; + } + else + { + return {pattern.substr(0, slash_pos + 1), pattern.substr(slash_pos + 1, pattern.size() - slash_pos - 1)}; + } + }(); + + if (!isDirectory(path_before)) + { + return {}; + } + else + { + std::vector file_names = listAllFilesByPath(path_before); + + std::vector answer; + + for (const auto & file_name : file_names) + { + if (file_name.starts_with(path_after)) + { + String file_pattern = path_before + file_name; + if (isDirectory(file_pattern)) + { + file_pattern = file_pattern + "/"; + } + answer.push_back(file_pattern); + } + } + return answer; + } +}; + +void DiskWithPath::setPath(const String & any_path) +{ + if (isDirectory(any_path)) + { + path = getAbsolutePath(any_path); + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path {} at disk {} is not a directory", any_path, disk->getName()); + } +} + +String DiskWithPath::validatePathAndGetAsRelative(const String & path) +{ + String lexically_normal_path = fs::path(path).lexically_normal(); + if (lexically_normal_path.find("..") != std::string::npos) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Path {} is not normalized", path); + + /// If path is absolute we should keep it as relative inside disk, so disk will look like + /// an ordinary filesystem with root. + if (fs::path(lexically_normal_path).is_absolute()) + return lexically_normal_path.substr(1); + + return lexically_normal_path; +} + +String DiskWithPath::normalizePathAndGetAsRelative(const String & messyPath) +{ + std::filesystem::path path(messyPath); + std::filesystem::path canonical_path = std::filesystem::weakly_canonical(path); + String npath = canonical_path.make_preferred().string(); + return validatePathAndGetAsRelative(npath); +} + +String DiskWithPath::normalizePath(const String & path) +{ + std::filesystem::path canonical_path = std::filesystem::weakly_canonical(path); + return canonical_path.make_preferred().string(); +} + +DisksClient::DisksClient(std::vector>> && disks_with_paths, std::optional begin_disk) +{ + if (disks_with_paths.empty()) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Initializing array of disks is empty"); + } + if (!begin_disk.has_value()) + { + begin_disk = disks_with_paths[0].first->getName(); + } + bool has_begin_disk = false; + for (auto & [disk, path] : disks_with_paths) + { + addDisk(disk, path); + if (disk->getName() == begin_disk.value()) + { + has_begin_disk = true; + } + } + if (!has_begin_disk) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no begin_disk '{}' in initializing array", begin_disk.value()); + } + current_disk = std::move(begin_disk.value()); +} + +const DiskWithPath & DisksClient::getDiskWithPath(const String & disk) const +{ + try + { + return disks.at(disk); + } + catch (...) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' is unknown", disk); + } +} + +DiskWithPath & DisksClient::getDiskWithPath(const String & disk) +{ + try + { + return disks.at(disk); + } + catch (...) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' is unknown", disk); + } +} + +const DiskWithPath & DisksClient::getCurrentDiskWithPath() const +{ + try + { + return disks.at(current_disk); + } + catch (...) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no current disk in client"); + } +} + +DiskWithPath & DisksClient::getCurrentDiskWithPath() +{ + try + { + return disks.at(current_disk); + } + catch (...) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no current disk in client"); + } +} + +void DisksClient::switchToDisk(const String & disk_, const std::optional & path_) +{ + if (disks.contains(disk_)) + { + if (path_.has_value()) + { + disks.at(disk_).setPath(path_.value()); + } + current_disk = disk_; + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' is unknown", disk_); + } +} + +std::vector DisksClient::getAllDiskNames() const +{ + std::vector answer{}; + answer.reserve(disks.size()); + for (const auto & [disk_name, _] : disks) + { + answer.push_back(disk_name); + } + return answer; +} + +std::vector DisksClient::getAllFilesByPatternFromAllDisks(const String & pattern) const +{ + std::vector answer{}; + for (const auto & [_, disk] : disks) + { + for (auto & word : disk.getAllFilesByPattern(pattern)) + { + answer.push_back(word); + } + } + return answer; +} + +void DisksClient::addDisk(DiskPtr disk_, const std::optional & path_) +{ + String disk_name = disk_->getName(); + if (disks.contains(disk_->getName())) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' already exists", disk_name); + } + disks.emplace(disk_name, DiskWithPath{disk_, path_}); +} +} diff --git a/programs/disks/DisksClient.h b/programs/disks/DisksClient.h new file mode 100644 index 00000000000..8a55d22af93 --- /dev/null +++ b/programs/disks/DisksClient.h @@ -0,0 +1,89 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include "Disks/IDisk.h" + +#include +#include +#include + +namespace fs = std::filesystem; + +namespace DB +{ + +std::vector split(const String & text, const String & delimiters); + +using ProgramOptionsDescription = boost::program_options::options_description; +using CommandLineOptions = boost::program_options::variables_map; + +class DiskWithPath +{ +public: + explicit DiskWithPath(DiskPtr disk_, std::optional path_ = std::nullopt); + + String getAbsolutePath(const String & any_path) const { return normalizePath(fs::path(path) / any_path); } + + String getCurrentPath() const { return path; } + + bool isDirectory(const String & any_path) const + { + return disk->isDirectory(getRelativeFromRoot(any_path)) || (getRelativeFromRoot(any_path).empty() && (disk->isDirectory("/"))); + } + + std::vector listAllFilesByPath(const String & any_path) const; + + std::vector getAllFilesByPattern(const String & pattern) const; + + DiskPtr getDisk() const { return disk; } + + void setPath(const String & any_path); + + String getRelativeFromRoot(const String & any_path) const { return normalizePathAndGetAsRelative(getAbsolutePath(any_path)); } + +private: + static String validatePathAndGetAsRelative(const String & path); + static std::string normalizePathAndGetAsRelative(const std::string & messyPath); + static std::string normalizePath(const std::string & messyPath); + + const DiskPtr disk; + String path; +}; + +class DisksClient +{ +public: + explicit DisksClient(std::vector>> && disks_with_paths, std::optional begin_disk); + + const DiskWithPath & getDiskWithPath(const String & disk) const; + + DiskWithPath & getDiskWithPath(const String & disk); + + const DiskWithPath & getCurrentDiskWithPath() const; + + DiskWithPath & getCurrentDiskWithPath(); + + DiskPtr getCurrentDisk() const { return getCurrentDiskWithPath().getDisk(); } + + DiskPtr getDisk(const String & disk) const { return getDiskWithPath(disk).getDisk(); } + + void switchToDisk(const String & disk_, const std::optional & path_); + + std::vector getAllDiskNames() const; + + std::vector getAllFilesByPatternFromAllDisks(const String & pattern) const; + + +private: + void addDisk(DiskPtr disk_, const std::optional & path_); + + String current_disk; + std::unordered_map disks; +}; +} diff --git a/programs/disks/ICommand.cpp b/programs/disks/ICommand.cpp index 86188fb6db1..f622bcad3c6 100644 --- a/programs/disks/ICommand.cpp +++ b/programs/disks/ICommand.cpp @@ -1,5 +1,5 @@ #include "ICommand.h" -#include +#include "DisksClient.h" namespace DB @@ -10,43 +10,42 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -void ICommand::printHelpMessage() const +CommandLineOptions ICommand::processCommandLineArguments(const Strings & commands) { - std::cout << "Command: " << command_name << '\n'; - std::cout << "Description: " << description << '\n'; - std::cout << "Usage: " << usage << '\n'; + CommandLineOptions options; + auto parser = po::command_line_parser(commands); + parser.options(options_description).positional(positional_options_description); - if (command_option_description) + po::parsed_options parsed = parser.run(); + po::store(parsed, options); + + return options; +} + +void ICommand::execute(const Strings & commands, DisksClient & client) +{ + try { - auto options = *command_option_description; - if (!options.options().empty()) - std::cout << options << '\n'; + processCommandLineArguments(commands); + } + catch (std::exception & exc) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "{}", exc.what()); + } + executeImpl(processCommandLineArguments(commands), client); +} + +DiskWithPath & ICommand::getDiskWithPath(DisksClient & client, const CommandLineOptions & options, const String & name) +{ + auto disk_name = getValueFromCommandLineOptionsWithOptional(options, name); + if (disk_name.has_value()) + { + return client.getDiskWithPath(disk_name.value()); + } + else + { + return client.getCurrentDiskWithPath(); } } -void ICommand::addOptions(ProgramOptionsDescription & options_description) -{ - if (!command_option_description || command_option_description->options().empty()) - return; - - options_description.add(*command_option_description); -} - -String ICommand::validatePathAndGetAsRelative(const String & path) -{ - /// If path contain non-normalized symbols like . we will normalized them. If the resulting normalized path - /// still contain '..' it can be dangerous, disallow such paths. Also since clickhouse-disks - /// is not an interactive program (don't track you current path) it's OK to disallow .. paths. - String lexically_normal_path = fs::path(path).lexically_normal(); - if (lexically_normal_path.find("..") != std::string::npos) - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Path {} is not normalized", path); - - /// If path is absolute we should keep it as relative inside disk, so disk will look like - /// an ordinary filesystem with root. - if (fs::path(lexically_normal_path).is_absolute()) - return lexically_normal_path.substr(1); - - return lexically_normal_path; -} - } diff --git a/programs/disks/ICommand.h b/programs/disks/ICommand.h index efe350fe87b..6faf90e2b52 100644 --- a/programs/disks/ICommand.h +++ b/programs/disks/ICommand.h @@ -1,66 +1,146 @@ #pragma once -#include +#include #include +#include +#include #include -#include #include +#include "Common/Exception.h" +#include -#include +#include + +#include "DisksApp.h" + +#include "DisksClient.h" + +#include "ICommand_fwd.h" namespace DB { namespace po = boost::program_options; -using ProgramOptionsDescription = boost::program_options::options_description; -using CommandLineOptions = boost::program_options::variables_map; +using ProgramOptionsDescription = po::options_description; +using PositionalProgramOptionsDescription = po::positional_options_description; +using CommandLineOptions = po::variables_map; + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} class ICommand { public: - ICommand() = default; + explicit ICommand() = default; virtual ~ICommand() = default; - virtual void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) = 0; + void execute(const Strings & commands, DisksClient & client); - const std::optional & getCommandOptions() const { return command_option_description; } + virtual void executeImpl(const CommandLineOptions & options, DisksClient & client) = 0; - void addOptions(ProgramOptionsDescription & options_description); - - virtual void processOptions(Poco::Util::LayeredConfiguration & config, po::variables_map & options) const = 0; + CommandLineOptions processCommandLineArguments(const Strings & commands); protected: - void printHelpMessage() const; + template + static T getValueFromCommandLineOptions(const CommandLineOptions & options, const String & name) + { + try + { + return options[name].as(); + } + catch (boost::bad_any_cast &) + { + throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Argument '{}' has wrong type and can't be parsed", name); + } + } + + template + static T getValueFromCommandLineOptionsThrow(const CommandLineOptions & options, const String & name) + { + if (options.count(name)) + { + return getValueFromCommandLineOptions(options, name); + } + else + { + throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Mandatory argument '{}' is missing", name); + } + } + + template + static T getValueFromCommandLineOptionsWithDefault(const CommandLineOptions & options, const String & name, const T & default_value) + { + if (options.count(name)) + { + return getValueFromCommandLineOptions(options, name); + } + else + { + return default_value; + } + } + + template + static std::optional getValueFromCommandLineOptionsWithOptional(const CommandLineOptions & options, const String & name) + { + if (options.count(name)) + { + return std::optional{getValueFromCommandLineOptions(options, name)}; + } + else + { + return std::nullopt; + } + } + + DiskWithPath & getDiskWithPath(DisksClient & client, const CommandLineOptions & options, const String & name); + + String getTargetLocation(const String & path_from, DiskWithPath & disk_to, const String & path_to) + { + if (!disk_to.getDisk()->isDirectory(path_to)) + { + return path_to; + } + String copied_path_from = path_from; + if (copied_path_from.ends_with('/')) + { + copied_path_from.pop_back(); + } + String plain_filename = fs::path(copied_path_from).filename(); + + return fs::path{path_to} / plain_filename; + } - static String validatePathAndGetAsRelative(const String & path); public: String command_name; String description; + ProgramOptionsDescription options_description; protected: - std::optional command_option_description; - String usage; - po::positional_options_description positional_options_description; + PositionalProgramOptionsDescription positional_options_description; }; -using CommandPtr = std::unique_ptr; - -} - DB::CommandPtr makeCommandCopy(); -DB::CommandPtr makeCommandLink(); -DB::CommandPtr makeCommandList(); DB::CommandPtr makeCommandListDisks(); +DB::CommandPtr makeCommandList(); +DB::CommandPtr makeCommandChangeDirectory(); +DB::CommandPtr makeCommandLink(); DB::CommandPtr makeCommandMove(); DB::CommandPtr makeCommandRead(); DB::CommandPtr makeCommandRemove(); DB::CommandPtr makeCommandWrite(); DB::CommandPtr makeCommandMkDir(); +DB::CommandPtr makeCommandSwitchDisk(); +DB::CommandPtr makeCommandGetCurrentDiskAndPath(); +DB::CommandPtr makeCommandHelp(const DisksApp & disks_app); +DB::CommandPtr makeCommandTouch(); +#ifdef CLICKHOUSE_CLOUD DB::CommandPtr makeCommandPackedIO(); +#endif +} diff --git a/programs/disks/ICommand_fwd.h b/programs/disks/ICommand_fwd.h new file mode 100644 index 00000000000..84310b4a18d --- /dev/null +++ b/programs/disks/ICommand_fwd.h @@ -0,0 +1,10 @@ +#pragma once + +#include + +namespace DB +{ +class ICommand; + +using CommandPtr = std::shared_ptr; +} diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 079951be55e..9b931c49c24 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -1,4 +1,5 @@ set(CLICKHOUSE_KEEPER_SOURCES + keeper_main.cpp Keeper.cpp ) @@ -8,6 +9,9 @@ set (CLICKHOUSE_KEEPER_LINK clickhouse_common_io clickhouse_common_zookeeper daemon + clickhouse-keeper-converter-lib + clickhouse-keeper-client-lib + clickhouse_functions dbms ) @@ -17,199 +21,11 @@ install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-ke if (BUILD_STANDALONE_KEEPER) # Straight list of all required sources - set(CLICKHOUSE_KEEPER_STANDALONE_SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperReconfiguration.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/RaftServerConfig.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ACLMap.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Changelog.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/CoordinationSettings.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/FourLetterCommand.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/InMemoryLogStore.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperConnectionStats.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperDispatcher.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperLogStore.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperServer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperContext.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperFeatureFlags.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperSnapshotManager.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperSnapshotManagerS3.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStateMachine.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperContext.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStateManager.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStorage.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperConstants.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperAsynchronousMetrics.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperCommon.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/SessionExpiryQueue.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/SummingStateMachine.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/WriteBufferFromNuraftBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ZooKeeperDataReader.cpp + clickhouse_add_executable(clickhouse-keeper ${CLICKHOUSE_KEEPER_SOURCES}) - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/SettingsFields.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/BaseSettings.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/ServerSettings.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/Field.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/SettingsEnums.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/ServerUUID.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/UUID.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/BackgroundSchedulePool.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/IO/ReadBuffer.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPPathHints.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/KeeperTCPHandler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/TCPServer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/NotFoundHandler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ProtocolServerAdapter.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/CertificateReloader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusRequestHandler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusMetricsWriter.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/waitServersToFinish.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ServerType.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPRequestHandlerFactoryMain.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/KeeperReadinessHandler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/CloudPlacementInfo.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/ReadHeaders.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerConnection.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerRequest.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerResponse.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerConnectionFactory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CachedCompressedReadBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CheckingCompressedReadBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBufferBase.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBufferFromFile.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedWriteBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecEncrypted.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecLZ4.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecMultiple.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecNone.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecZSTD.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionFactory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/ICompressionCodec.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/LZ4_decompress_faster.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/CurrentThread.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/NamedCollections/NamedCollections.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/NamedCollections/NamedCollectionConfiguration.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/Jemalloc.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/IKeeper.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/TestKeeper.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperCommon.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperConstants.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeper.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperImpl.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperIO.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperLock.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperNodeCache.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/registerDisks.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IDisk.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskFactory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskSelector.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskLocal.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskLocalCheckThread.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/LocalDirectorySyncGuard.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/TemporaryFileOnDisk.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/loadLocalDiskConfig.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskType.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/IObjectStorage.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataOperationsHolder.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageTransactionState.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorage.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageIterator.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/StoredObject.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3Capabilities.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/diskSettings.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/DiskS3Utils.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageFactory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFactory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/createReadBufferFromFileBase.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ReadBufferFromRemoteFSGather.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/IOUringReader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/getIOUringReader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/WriteBufferFromTemporaryFile.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/WriteBufferWithFinalizeCallback.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/AsynchronousBoundedReadBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/getThreadPoolReader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ThreadPoolRemoteFSReader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ThreadPoolReader.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/BaseDaemon.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/SentryWriter.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/GraphiteWriter.cpp - ${CMAKE_CURRENT_BINARY_DIR}/../../src/Daemon/GitHash.generated.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Standalone/Context.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Standalone/Settings.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Standalone/ThreadStatusExt.cpp - - Keeper.cpp - clickhouse-keeper.cpp - ) - - # List of resources for clickhouse-keeper client - if (ENABLE_CLICKHOUSE_KEEPER_CLIENT) - list(APPEND CLICKHOUSE_KEEPER_STANDALONE_SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/../../programs/keeper-client/KeeperClient.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../programs/keeper-client/Commands.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../programs/keeper-client/Parser.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Client/LineReader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Client/ReplxxLineReader.cpp - ) - endif() - - clickhouse_add_executable(clickhouse-keeper ${CLICKHOUSE_KEEPER_STANDALONE_SOURCES}) - - # Remove some redundant dependencies - target_compile_definitions (clickhouse-keeper PRIVATE -DCLICKHOUSE_KEEPER_STANDALONE_BUILD) - target_compile_definitions (clickhouse-keeper PUBLIC -DWITHOUT_TEXT_LOG) - - if (ENABLE_CLICKHOUSE_KEEPER_CLIENT AND TARGET ch_rust::skim) - target_link_libraries(clickhouse-keeper PRIVATE ch_rust::skim) - endif() - - target_link_libraries(clickhouse-keeper - PRIVATE - ch_contrib::abseil_swiss_tables - ch_contrib::nuraft - ch_contrib::lz4 - ch_contrib::zstd - ch_contrib::cityhash - ch_contrib::jemalloc - common ch_contrib::double_conversion - ch_contrib::dragonbox_to_chars - pcg_random - ch_contrib::pdqsort - ch_contrib::miniselect - clickhouse_common_config_no_zookeeper_log - loggers_no_text_log - clickhouse_common_io - clickhouse_parsers # Otherwise compression will not built. FIXME. - ) + target_link_libraries(clickhouse-keeper PUBLIC ${CLICKHOUSE_KEEPER_LINK}) set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../) - if (SPLIT_DEBUG_SYMBOLS) clickhouse_split_debug_symbols(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR} BINARY_PATH ../clickhouse-keeper) else() diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index f14ef2e5552..44c2daa33ad 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -27,6 +27,8 @@ #include #include +#include + #include #include @@ -75,16 +77,6 @@ int mainEntryClickHouseKeeper(int argc, char ** argv) } } -#ifdef CLICKHOUSE_KEEPER_STANDALONE_BUILD - -// Weak symbols don't work correctly on Darwin -// so we have a stub implementation to avoid linker errors -void collectCrashLog( - Int32, UInt64, const String &, const StackTrace &) -{} - -#endif - namespace DB { @@ -272,11 +264,43 @@ HTTPContextPtr httpContext() return std::make_shared(Context::getGlobalContextInstance()); } +String getKeeperPath(Poco::Util::LayeredConfiguration & config) +{ + String path; + if (config.has("keeper_server.storage_path")) + { + path = config.getString("keeper_server.storage_path"); + } + else if (config.has("keeper_server.log_storage_path")) + { + path = std::filesystem::path(config.getString("keeper_server.log_storage_path")).parent_path(); + } + else if (config.has("keeper_server.snapshot_storage_path")) + { + path = std::filesystem::path(config.getString("keeper_server.snapshot_storage_path")).parent_path(); + } + else if (std::filesystem::is_directory(std::filesystem::path{config.getString("path", DBMS_DEFAULT_PATH)} / "coordination")) + { + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, + "By default 'keeper_server.storage_path' could be assigned to {}, but the directory {} already exists. Please specify 'keeper_server.storage_path' in the keeper configuration explicitly", + KEEPER_DEFAULT_PATH, String{std::filesystem::path{config.getString("path", DBMS_DEFAULT_PATH)} / "coordination"}); + } + else + { + path = KEEPER_DEFAULT_PATH; + } + return path; +} + + } int Keeper::main(const std::vector & /*args*/) try { +#if USE_JEMALLOC + setJemallocBackgroundThreads(true); +#endif Poco::Logger * log = &logger(); UseSSL use_ssl; @@ -321,31 +345,7 @@ try updateMemorySoftLimitInConfig(config()); - std::string path; - - if (config().has("keeper_server.storage_path")) - { - path = config().getString("keeper_server.storage_path"); - } - else if (config().has("keeper_server.log_storage_path")) - { - path = std::filesystem::path(config().getString("keeper_server.log_storage_path")).parent_path(); - } - else if (config().has("keeper_server.snapshot_storage_path")) - { - path = std::filesystem::path(config().getString("keeper_server.snapshot_storage_path")).parent_path(); - } - else if (std::filesystem::is_directory(std::filesystem::path{config().getString("path", DBMS_DEFAULT_PATH)} / "coordination")) - { - throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, - "By default 'keeper_server.storage_path' could be assigned to {}, but the directory {} already exists. Please specify 'keeper_server.storage_path' in the keeper configuration explicitly", - KEEPER_DEFAULT_PATH, String{std::filesystem::path{config().getString("path", DBMS_DEFAULT_PATH)} / "coordination"}); - } - else - { - path = KEEPER_DEFAULT_PATH; - } - + std::string path = getKeeperPath(config()); std::filesystem::create_directories(path); /// Check that the process user id matches the owner of the data. @@ -559,7 +559,7 @@ try auto main_config_reloader = std::make_unique( config_path, extra_paths, - config().getString("path", KEEPER_DEFAULT_PATH), + getKeeperPath(config()), std::move(unused_cache), unused_event, [&](ConfigurationPtr config, bool /* initial_loading */) diff --git a/programs/keeper/clickhouse-keeper.cpp b/programs/keeper/clickhouse-keeper.cpp deleted file mode 100644 index f2f91930ac0..00000000000 --- a/programs/keeper/clickhouse-keeper.cpp +++ /dev/null @@ -1,30 +0,0 @@ -#include -#include "config_tools.h" - - -int mainEntryClickHouseKeeper(int argc, char ** argv); - -#if ENABLE_CLICKHOUSE_KEEPER_CLIENT -int mainEntryClickHouseKeeperClient(int argc, char ** argv); -#endif - -int main(int argc_, char ** argv_) -{ -#if ENABLE_CLICKHOUSE_KEEPER_CLIENT - - if (argc_ >= 2) - { - /// 'clickhouse-keeper --client ...' and 'clickhouse-keeper client ...' are OK - if (strcmp(argv_[1], "--client") == 0 || strcmp(argv_[1], "client") == 0) - { - argv_[1] = argv_[0]; - return mainEntryClickHouseKeeperClient(--argc_, argv_ + 1); - } - } - - if (argc_ > 0 && (strcmp(argv_[0], "clickhouse-keeper-client") == 0 || endsWith(argv_[0], "/clickhouse-keeper-client"))) - return mainEntryClickHouseKeeperClient(argc_, argv_); -#endif - - return mainEntryClickHouseKeeper(argc_, argv_); -} diff --git a/programs/keeper/conf.d/local.yaml b/programs/keeper/conf.d/local.yaml new file mode 100644 index 00000000000..722e90e374a --- /dev/null +++ b/programs/keeper/conf.d/local.yaml @@ -0,0 +1,9 @@ +logger: + log: + "@remove": remove + errorlog: + "@remove": remove + console: 1 +keeper_server: + log_storage_path: ./logs + snapshot_storage_path: ./snapshots diff --git a/programs/keeper/keeper_main.cpp b/programs/keeper/keeper_main.cpp new file mode 100644 index 00000000000..a240f9699f2 --- /dev/null +++ b/programs/keeper/keeper_main.cpp @@ -0,0 +1,189 @@ +#include +#include + +#include +#include +#include +#include +#include /// pair + +#include + +#include "config.h" +#include "config_tools.h" + +#include +#include + +#include +#include +#include + +#include +#include + + +int mainEntryClickHouseKeeper(int argc, char ** argv); +#if ENABLE_CLICKHOUSE_KEEPER_CONVERTER +int mainEntryClickHouseKeeperConverter(int argc, char ** argv); +#endif +#if ENABLE_CLICKHOUSE_KEEPER_CLIENT +int mainEntryClickHouseKeeperClient(int argc, char ** argv); +#endif + +namespace +{ + +using MainFunc = int (*)(int, char**); + +/// Add an item here to register new application +std::pair clickhouse_applications[] = +{ + // keeper + {"keeper", mainEntryClickHouseKeeper}, +#if ENABLE_CLICKHOUSE_KEEPER_CONVERTER + {"converter", mainEntryClickHouseKeeperConverter}, + {"keeper-converter", mainEntryClickHouseKeeperConverter}, +#endif +#if ENABLE_CLICKHOUSE_KEEPER_CLIENT + {"client", mainEntryClickHouseKeeperClient}, + {"keeper-client", mainEntryClickHouseKeeperClient}, +#endif + +}; + +int printHelp(int, char **) +{ + std::cerr << "Use one of the following commands:" << std::endl; + for (auto & application : clickhouse_applications) + std::cerr << "clickhouse " << application.first << " [args] " << std::endl; + return -1; +} + +} + + +bool isClickhouseApp(std::string_view app_suffix, std::vector & argv) +{ + /// Use app if the first arg 'app' is passed (the arg should be quietly removed) + if (argv.size() >= 2) + { + auto first_arg = argv.begin() + 1; + + /// 'clickhouse --client ...' and 'clickhouse client ...' are Ok + if (*first_arg == app_suffix + || (std::string_view(*first_arg).starts_with("--") && std::string_view(*first_arg).substr(2) == app_suffix)) + { + argv.erase(first_arg); + return true; + } + } + + /// keeper suffix is default which will be used if no other app is detected + if (app_suffix == "keeper") + return false; + + /// Use app if clickhouse binary is run through symbolic link with name clickhouse-app + std::string app_name = "clickhouse-" + std::string(app_suffix); + return !argv.empty() && (app_name == argv[0] || endsWith(argv[0], "/" + app_name)); +} + +/// Don't allow dlopen in the main ClickHouse binary, because it is harmful and insecure. +/// We don't use it. But it can be used by some libraries for implementation of "plugins". +/// We absolutely discourage the ancient technique of loading +/// 3rd-party uncontrolled dangerous libraries into the process address space, +/// because it is insane. + +#if !defined(USE_MUSL) +extern "C" +{ + void * dlopen(const char *, int) + { + return nullptr; + } + + void * dlmopen(long, const char *, int) // NOLINT + { + return nullptr; + } + + int dlclose(void *) + { + return 0; + } + + const char * dlerror() + { + return "ClickHouse does not allow dynamic library loading"; + } +} +#endif + +/// Prevent messages from JeMalloc in the release build. +/// Some of these messages are non-actionable for the users, such as: +/// : Number of CPUs detected is not deterministic. Per-CPU arena disabled. +#if USE_JEMALLOC && defined(NDEBUG) && !defined(SANITIZER) +extern "C" void (*malloc_message)(void *, const char *s); +__attribute__((constructor(0))) void init_je_malloc_message() { malloc_message = [](void *, const char *){}; } +#endif + +/// This allows to implement assert to forbid initialization of a class in static constructors. +/// Usage: +/// +/// extern bool inside_main; +/// class C { C() { assert(inside_main); } }; +bool inside_main = false; + +int main(int argc_, char ** argv_) +{ + inside_main = true; + SCOPE_EXIT({ inside_main = false; }); + + /// PHDR cache is required for query profiler to work reliably + /// It also speed up exception handling, but exceptions from dynamically loaded libraries (dlopen) + /// will work only after additional call of this function. + /// Note: we forbid dlopen in our code. + updatePHDRCache(); + +#if !defined(USE_MUSL) + checkHarmfulEnvironmentVariables(argv_); +#endif + + /// This is used for testing. For example, + /// clickhouse-local should be able to run a simple query without throw/catch. + if (getenv("CLICKHOUSE_TERMINATE_ON_ANY_EXCEPTION")) // NOLINT(concurrency-mt-unsafe) + DB::terminate_on_any_exception = true; + + /// Reset new handler to default (that throws std::bad_alloc) + /// It is needed because LLVM library clobbers it. + std::set_new_handler(nullptr); + + std::vector argv(argv_, argv_ + argc_); + + /// Print a basic help if nothing was matched + MainFunc main_func = mainEntryClickHouseKeeper; + + if (isClickhouseApp("help", argv)) + { + main_func = printHelp; + } + else + { + for (auto & application : clickhouse_applications) + { + if (isClickhouseApp(application.first, argv)) + { + main_func = application.second; + break; + } + } + } + + int exit_code = main_func(static_cast(argv.size()), argv.data()); + +#if defined(SANITIZE_COVERAGE) + dumpCoverage(); +#endif + + return exit_code; +} diff --git a/programs/library-bridge/LibraryBridge.cpp b/programs/library-bridge/LibraryBridge.cpp index 8a07ca57104..d35e7a9b832 100644 --- a/programs/library-bridge/LibraryBridge.cpp +++ b/programs/library-bridge/LibraryBridge.cpp @@ -1,5 +1,7 @@ #include "LibraryBridge.h" +#include + int mainEntryClickHouseLibraryBridge(int argc, char ** argv) { DB::LibraryBridge app; diff --git a/programs/library-bridge/LibraryBridgeHandlers.cpp b/programs/library-bridge/LibraryBridgeHandlers.cpp index 8d116e537aa..53f4b31ca06 100644 --- a/programs/library-bridge/LibraryBridgeHandlers.cpp +++ b/programs/library-bridge/LibraryBridgeHandlers.cpp @@ -6,6 +6,7 @@ #include "ExternalDictionaryLibraryHandlerFactory.h" #include +#include #include #include #include diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index b33e1595056..46b543e49e9 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -376,6 +376,7 @@ void LocalServer::setupUsers() " " " default" " default" + " 1" " " " " " " diff --git a/programs/local/LocalServer.h b/programs/local/LocalServer.h index da2466650a7..0715f358313 100644 --- a/programs/local/LocalServer.h +++ b/programs/local/LocalServer.h @@ -3,11 +3,12 @@ #include #include -#include -#include -#include +#include #include #include +#include +#include +#include #include #include diff --git a/programs/main.cpp b/programs/main.cpp index 61e2bc18ed7..02ea1471108 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -1,5 +1,3 @@ -#include -#include #include #include @@ -7,7 +5,6 @@ #include #include #include -#include #include #include /// pair @@ -16,6 +13,8 @@ #include "config.h" #include "config_tools.h" +#include +#include #include #include #include @@ -119,268 +118,6 @@ std::pair clickhouse_short_names[] = {"chc", "client"}, }; - -enum class InstructionFail : uint8_t -{ - NONE = 0, - SSE3 = 1, - SSSE3 = 2, - SSE4_1 = 3, - SSE4_2 = 4, - POPCNT = 5, - AVX = 6, - AVX2 = 7, - AVX512 = 8 -}; - -auto instructionFailToString(InstructionFail fail) -{ - switch (fail) - { -#define ret(x) return std::make_tuple(STDERR_FILENO, x, sizeof(x) - 1) - case InstructionFail::NONE: - ret("NONE"); - case InstructionFail::SSE3: - ret("SSE3"); - case InstructionFail::SSSE3: - ret("SSSE3"); - case InstructionFail::SSE4_1: - ret("SSE4.1"); - case InstructionFail::SSE4_2: - ret("SSE4.2"); - case InstructionFail::POPCNT: - ret("POPCNT"); - case InstructionFail::AVX: - ret("AVX"); - case InstructionFail::AVX2: - ret("AVX2"); - case InstructionFail::AVX512: - ret("AVX512"); -#undef ret - } -} - - -sigjmp_buf jmpbuf; - -[[noreturn]] void sigIllCheckHandler(int, siginfo_t *, void *) -{ - siglongjmp(jmpbuf, 1); -} - -/// Check if necessary SSE extensions are available by trying to execute some sse instructions. -/// If instruction is unavailable, SIGILL will be sent by kernel. -void checkRequiredInstructionsImpl(volatile InstructionFail & fail) -{ -#if defined(__SSE3__) - fail = InstructionFail::SSE3; - __asm__ volatile ("addsubpd %%xmm0, %%xmm0" : : : "xmm0"); -#endif - -#if defined(__SSSE3__) - fail = InstructionFail::SSSE3; - __asm__ volatile ("pabsw %%xmm0, %%xmm0" : : : "xmm0"); - -#endif - -#if defined(__SSE4_1__) - fail = InstructionFail::SSE4_1; - __asm__ volatile ("pmaxud %%xmm0, %%xmm0" : : : "xmm0"); -#endif - -#if defined(__SSE4_2__) - fail = InstructionFail::SSE4_2; - __asm__ volatile ("pcmpgtq %%xmm0, %%xmm0" : : : "xmm0"); -#endif - - /// Defined by -msse4.2 -#if defined(__POPCNT__) - fail = InstructionFail::POPCNT; - { - uint64_t a = 0; - uint64_t b = 0; - __asm__ volatile ("popcnt %1, %0" : "=r"(a) :"r"(b) :); - } -#endif - -#if defined(__AVX__) - fail = InstructionFail::AVX; - __asm__ volatile ("vaddpd %%ymm0, %%ymm0, %%ymm0" : : : "ymm0"); -#endif - -#if defined(__AVX2__) - fail = InstructionFail::AVX2; - __asm__ volatile ("vpabsw %%ymm0, %%ymm0" : : : "ymm0"); -#endif - -#if defined(__AVX512__) - fail = InstructionFail::AVX512; - __asm__ volatile ("vpabsw %%zmm0, %%zmm0" : : : "zmm0"); -#endif - - fail = InstructionFail::NONE; -} - -/// Macros to avoid using strlen(), since it may fail if SSE is not supported. -#define writeError(data) do \ - { \ - static_assert(__builtin_constant_p(data)); \ - if (!writeRetry(STDERR_FILENO, data, sizeof(data) - 1)) \ - _Exit(1); \ - } while (false) - -/// Check SSE and others instructions availability. Calls exit on fail. -/// This function must be called as early as possible, even before main, because static initializers may use unavailable instructions. -void checkRequiredInstructions() -{ - struct sigaction sa{}; - struct sigaction sa_old{}; - sa.sa_sigaction = sigIllCheckHandler; - sa.sa_flags = SA_SIGINFO; - auto signal = SIGILL; - if (sigemptyset(&sa.sa_mask) != 0 - || sigaddset(&sa.sa_mask, signal) != 0 - || sigaction(signal, &sa, &sa_old) != 0) - { - /// You may wonder about strlen. - /// Typical implementation of strlen is using SSE4.2 or AVX2. - /// But this is not the case because it's compiler builtin and is executed at compile time. - - writeError("Can not set signal handler\n"); - _Exit(1); - } - - volatile InstructionFail fail = InstructionFail::NONE; - - if (sigsetjmp(jmpbuf, 1)) - { - writeError("Instruction check fail. The CPU does not support "); - if (!std::apply(writeRetry, instructionFailToString(fail))) - _Exit(1); - writeError(" instruction set.\n"); - _Exit(1); - } - - checkRequiredInstructionsImpl(fail); - - if (sigaction(signal, &sa_old, nullptr)) - { - writeError("Can not set signal handler\n"); - _Exit(1); - } -} - -struct Checker -{ - Checker() - { - checkRequiredInstructions(); - } -} checker -#ifndef OS_DARWIN - __attribute__((init_priority(101))) /// Run before other static initializers. -#endif -; - - -#if !defined(USE_MUSL) -/// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete. -void checkHarmfulEnvironmentVariables(char ** argv) -{ - std::initializer_list harmful_env_variables = { - /// The list is a selection from "man ld-linux". - "LD_PRELOAD", - "LD_LIBRARY_PATH", - "LD_ORIGIN_PATH", - "LD_AUDIT", - "LD_DYNAMIC_WEAK", - /// The list is a selection from "man dyld" (osx). - "DYLD_LIBRARY_PATH", - "DYLD_FALLBACK_LIBRARY_PATH", - "DYLD_VERSIONED_LIBRARY_PATH", - "DYLD_INSERT_LIBRARIES", - }; - - bool require_reexec = false; - for (const auto * var : harmful_env_variables) - { - if (const char * value = getenv(var); value && value[0]) // NOLINT(concurrency-mt-unsafe) - { - /// NOTE: setenv() is used over unsetenv() since unsetenv() marked as harmful - if (setenv(var, "", true)) // NOLINT(concurrency-mt-unsafe) // this is safe if not called concurrently - { - fmt::print(stderr, "Cannot override {} environment variable", var); - _exit(1); - } - require_reexec = true; - } - } - - if (require_reexec) - { - /// Use execvp() over execv() to search in PATH. - /// - /// This should be safe, since: - /// - if argv[0] is relative path - it is OK - /// - if argv[0] has only basename, the it will search in PATH, like shell will do. - /// - /// Also note, that this (search in PATH) because there is no easy and - /// portable way to get absolute path of argv[0]. - /// - on linux there is /proc/self/exec and AT_EXECFN - /// - but on other OSes there is no such thing (especially on OSX). - /// - /// And since static linking will be done someday anyway, - /// let's not pollute the code base with special cases. - int error = execvp(argv[0], argv); - _exit(error); - } -} -#endif - - -#if defined(SANITIZE_COVERAGE) -__attribute__((no_sanitize("coverage"))) -void dumpCoverage() -{ - /// A user can request to dump the coverage information into files at exit. - /// This is useful for non-server applications such as clickhouse-format or clickhouse-client, - /// that cannot introspect it with SQL functions at runtime. - - /// The CLICKHOUSE_WRITE_COVERAGE environment variable defines a prefix for a filename 'prefix.pid' - /// containing the list of addresses of covered . - - /// The format is even simpler than Clang's "sancov": an array of 64-bit addresses, native byte order, no header. - - if (const char * coverage_filename_prefix = getenv("CLICKHOUSE_WRITE_COVERAGE")) // NOLINT(concurrency-mt-unsafe) - { - auto dump = [](const std::string & name, auto span) - { - /// Write only non-zeros. - std::vector data; - data.reserve(span.size()); - for (auto addr : span) - if (addr) - data.push_back(addr); - - int fd = ::open(name.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0400); - if (-1 == fd) - { - writeError("Cannot open a file to write the coverage data\n"); - } - else - { - if (!writeRetry(fd, reinterpret_cast(data.data()), data.size() * sizeof(data[0]))) - writeError("Cannot write the coverage data to a file\n"); - if (0 != ::close(fd)) - writeError("Cannot close the file with coverage data\n"); - } - }; - - dump(fmt::format("{}.{}", coverage_filename_prefix, getpid()), getCumulativeCoverage()); - } -} -#endif - } bool isClickhouseApp(std::string_view app_suffix, std::vector & argv) diff --git a/programs/odbc-bridge/ColumnInfoHandler.cpp b/programs/odbc-bridge/ColumnInfoHandler.cpp index 5ff985b3d12..a738d5e8c12 100644 --- a/programs/odbc-bridge/ColumnInfoHandler.cpp +++ b/programs/odbc-bridge/ColumnInfoHandler.cpp @@ -2,6 +2,7 @@ #if USE_ODBC +#include #include #include #include diff --git a/programs/odbc-bridge/IdentifierQuoteHandler.cpp b/programs/odbc-bridge/IdentifierQuoteHandler.cpp index cf5acdc4534..2f00e1171bb 100644 --- a/programs/odbc-bridge/IdentifierQuoteHandler.cpp +++ b/programs/odbc-bridge/IdentifierQuoteHandler.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include "getIdentifierQuote.h" #include "validateODBCConnectionString.h" #include "ODBCPooledConnectionFactory.h" diff --git a/programs/odbc-bridge/MainHandler.cpp b/programs/odbc-bridge/MainHandler.cpp index 2cf1576ccd7..404f66eab3b 100644 --- a/programs/odbc-bridge/MainHandler.cpp +++ b/programs/odbc-bridge/MainHandler.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/programs/odbc-bridge/ODBCBridge.cpp b/programs/odbc-bridge/ODBCBridge.cpp index e91cc3158df..155e26c8a27 100644 --- a/programs/odbc-bridge/ODBCBridge.cpp +++ b/programs/odbc-bridge/ODBCBridge.cpp @@ -1,5 +1,7 @@ #include "ODBCBridge.h" +#include + int mainEntryClickHouseODBCBridge(int argc, char ** argv) { DB::ODBCBridge app; diff --git a/programs/odbc-bridge/ODBCSource.cpp b/programs/odbc-bridge/ODBCSource.cpp index 940970f36ab..41a9813ce50 100644 --- a/programs/odbc-bridge/ODBCSource.cpp +++ b/programs/odbc-bridge/ODBCSource.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -47,9 +48,17 @@ Chunk ODBCSource::generate() for (int idx = 0; idx < result.columns(); ++idx) { const auto & sample = description.sample_block.getByPosition(idx); - if (!result.is_null(idx)) - insertValue(*columns[idx], removeNullable(sample.type), description.types[idx].first, result, idx); + { + if (columns[idx]->isNullable()) + { + ColumnNullable & column_nullable = assert_cast(*columns[idx]); + insertValue(column_nullable.getNestedColumn(), removeNullable(sample.type), description.types[idx].first, result, idx); + column_nullable.getNullMapData().emplace_back(0); + } + else + insertValue(*columns[idx], removeNullable(sample.type), description.types[idx].first, result, idx); + } else insertDefaultValue(*columns[idx], *sample.column); } diff --git a/programs/odbc-bridge/SchemaAllowedHandler.cpp b/programs/odbc-bridge/SchemaAllowedHandler.cpp index c7025ca4311..dfb4333240b 100644 --- a/programs/odbc-bridge/SchemaAllowedHandler.cpp +++ b/programs/odbc-bridge/SchemaAllowedHandler.cpp @@ -2,8 +2,10 @@ #if USE_ODBC +#include #include #include +#include #include #include #include diff --git a/programs/self-extracting/CMakeLists.txt b/programs/self-extracting/CMakeLists.txt index 4b6dd07f618..32b686d40dd 100644 --- a/programs/self-extracting/CMakeLists.txt +++ b/programs/self-extracting/CMakeLists.txt @@ -10,9 +10,24 @@ else () set (COMPRESSOR "${PROJECT_BINARY_DIR}/utils/self-extracting-executable/compressor") endif () -add_custom_target (self-extracting ALL +add_custom_target (self-extracting-server ALL ${CMAKE_COMMAND} -E remove clickhouse clickhouse-stripped COMMAND ${COMPRESSOR} ${DECOMPRESSOR} clickhouse ../clickhouse COMMAND ${COMPRESSOR} ${DECOMPRESSOR} clickhouse-stripped ../clickhouse-stripped DEPENDS clickhouse clickhouse-stripped compressor ) + +set(self_extracting_deps "self-extracting-server") + +if (BUILD_STANDALONE_KEEPER) + add_custom_target (self-extracting-keeper ALL + ${CMAKE_COMMAND} -E remove clickhouse-keeper + COMMAND ${COMPRESSOR} ${DECOMPRESSOR} clickhouse-keeper ../clickhouse-keeper + DEPENDS compressor clickhouse-keeper + ) + list(APPEND self_extracting_deps "self-extracting-keeper") +endif() + +add_custom_target (self-extracting ALL + DEPENDS ${self_extracting_deps} +) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 4cb3b5f45c7..053ddaf8d8b 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -67,6 +68,7 @@ #include #include #include +#include #include #include #include @@ -133,10 +135,6 @@ # include #endif -#if USE_JEMALLOC -# include -#endif - #if USE_AZURE_BLOB_STORAGE # include # include @@ -176,34 +174,10 @@ namespace ProfileEvents namespace fs = std::filesystem; -#if USE_JEMALLOC -static bool jemallocOptionEnabled(const char *name) -{ - bool value; - size_t size = sizeof(value); - - if (mallctl(name, reinterpret_cast(&value), &size, /* newp= */ nullptr, /* newlen= */ 0)) - throw Poco::SystemException("mallctl() failed"); - - return value; -} -#else -static bool jemallocOptionEnabled(const char *) { return false; } -#endif - int mainEntryClickHouseServer(int argc, char ** argv) { DB::Server app; - if (jemallocOptionEnabled("opt.background_thread")) - { - LOG_ERROR(&app.logger(), - "jemalloc.background_thread was requested, " - "however ClickHouse uses percpu_arena and background_thread most likely will not give any benefits, " - "and also background_thread is not compatible with ClickHouse watchdog " - "(that can be disabled with CLICKHOUSE_WATCHDOG_ENABLE=0)"); - } - /// Do not fork separate process from watchdog if we attached to terminal. /// Otherwise it breaks gdb usage. /// Can be overridden by environment variable (cannot use server config at this moment). @@ -614,6 +588,54 @@ static void sanityChecks(Server & server) } } +void loadStartupScripts(const Poco::Util::AbstractConfiguration & config, ContextMutablePtr context, Poco::Logger * log) +{ + try + { + Poco::Util::AbstractConfiguration::Keys keys; + config.keys("startup_scripts", keys); + + SetResultDetailsFunc callback; + for (const auto & key : keys) + { + std::string full_prefix = "startup_scripts." + key; + + if (config.has(full_prefix + ".condition")) + { + auto condition = config.getString(full_prefix + ".condition"); + auto condition_read_buffer = ReadBufferFromString(condition); + auto condition_write_buffer = WriteBufferFromOwnString(); + + LOG_DEBUG(log, "Checking startup query condition `{}`", condition); + executeQuery(condition_read_buffer, condition_write_buffer, true, context, callback, QueryFlags{ .internal = true }, std::nullopt, {}); + + auto result = condition_write_buffer.str(); + + if (result != "1\n" && result != "true\n") + { + if (result != "0\n" && result != "false\n") + context->addWarningMessage(fmt::format("The condition query returned `{}`, which can't be interpreted as a boolean (`0`, `false`, `1`, `true`). Will skip this query.", result)); + + continue; + } + + LOG_DEBUG(log, "Condition is true, will execute the query next"); + } + + auto query = config.getString(full_prefix + ".query"); + auto read_buffer = ReadBufferFromString(query); + auto write_buffer = WriteBufferFromOwnString(); + + LOG_DEBUG(log, "Executing query `{}`", query); + executeQuery(read_buffer, write_buffer, true, context, callback, QueryFlags{ .internal = true }, std::nullopt, {}); + } + } + catch (...) + { + tryLogCurrentException(log, "Failed to parse startup scripts file"); + } +} + static void initializeAzureSDKLogger( [[ maybe_unused ]] const ServerSettings & server_settings, [[ maybe_unused ]] int server_logs_level) @@ -653,9 +675,35 @@ static void initializeAzureSDKLogger( #endif } +#if defined(SANITIZER) +static std::vector getSanitizerNames() +{ + std::vector names; + +#if defined(ADDRESS_SANITIZER) + names.push_back("address"); +#endif +#if defined(THREAD_SANITIZER) + names.push_back("thread"); +#endif +#if defined(MEMORY_SANITIZER) + names.push_back("memory"); +#endif +#if defined(UNDEFINED_BEHAVIOR_SANITIZER) + names.push_back("undefined behavior"); +#endif + + return names; +} +#endif + int Server::main(const std::vector & /*args*/) try { +#if USE_JEMALLOC + setJemallocBackgroundThreads(true); +#endif + Stopwatch startup_watch; Poco::Logger * log = &logger(); @@ -739,7 +787,17 @@ try global_context->addWarningMessage("ThreadFuzzer is enabled. Application will run slowly and unstable."); #if defined(SANITIZER) - global_context->addWarningMessage("Server was built with sanitizer. It will work slowly."); + auto sanitizers = getSanitizerNames(); + + String log_message; + if (sanitizers.empty()) + log_message = "sanitizer"; + else if (sanitizers.size() == 1) + log_message = fmt::format("{} sanitizer", sanitizers.front()); + else + log_message = fmt::format("sanitizers ({})", fmt::join(sanitizers, ", ")); + + global_context->addWarningMessage(fmt::format("Server was built with {}. It will work slowly.", log_message)); #endif #if defined(SANITIZE_COVERAGE) || WITH_COVERAGE @@ -1981,6 +2039,11 @@ try /// otherwise there is a race condition between the system database initialization /// and creation of new tables in the database. waitLoad(TablesLoaderForegroundPoolId, system_startup_tasks); + + /// Startup scripts can depend on the system log tables. + if (config().has("startup_scripts") && !server_settings.prepare_system_log_tables_on_startup.changed) + global_context->setServerSetting("prepare_system_log_tables_on_startup", true); + /// After attaching system databases we can initialize system log. global_context->initializeSystemLogs(); global_context->setSystemZooKeeperLogAfterInitializationIfNeeded(); @@ -2129,6 +2192,9 @@ try load_metadata_tasks.clear(); load_metadata_tasks.shrink_to_fit(); + if (config().has("startup_scripts")) + loadStartupScripts(config(), global_context, log); + { std::lock_guard lock(servers_lock); for (auto & server : servers) diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html index b21d4b86314..45f988f7b1e 100644 --- a/programs/server/dashboard.html +++ b/programs/server/dashboard.html @@ -506,6 +506,14 @@ let user = 'default'; let password = ''; let add_http_cors_header = (location.protocol != 'file:'); +const current_url = new URL(window.location); +/// Substitute user name if it's specified in the query string +const user_from_url = current_url.searchParams.get('user'); +if (user_from_url) { + user = user_from_url; +} + + const errorCodeMessageMap = { 516: 'Error authenticating with database. Please check your connection params and try again.' } diff --git a/programs/server/play.html b/programs/server/play.html index 507a96382a7..9590a65524c 100644 --- a/programs/server/play.html +++ b/programs/server/play.html @@ -516,6 +516,9 @@ /// Save query in history only if it is different. let previous_query = ''; + /// Start of the last query + let last_query_start = 0; + const current_url = new URL(window.location); const opened_locally = location.protocol == 'file:'; @@ -567,6 +570,8 @@ '&password=' + encodeURIComponent(password) } + last_query_start = performance.now(); + const xhr = new XMLHttpRequest; xhr.open('POST', url, true); @@ -579,7 +584,8 @@ if (posted_request_num != request_num) { return; } else if (this.readyState === XMLHttpRequest.DONE) { - renderResponse(this.status, this.response); + const elapsed_msec = performance.now() - last_query_start; + renderResponse(this.status, this.response, elapsed_msec); /// The query is saved in browser history (in state JSON object) /// as well as in URL fragment identifier. @@ -587,7 +593,8 @@ const state = { query: query, status: this.status, - response: this.response.length > 100000 ? null : this.response /// Lower than the browser's limit. + response: this.response.length > 100000 ? null : this.response, /// Lower than the browser's limit. + elapsed_msec: elapsed_msec, }; const title = "ClickHouse Query: " + query; @@ -617,7 +624,7 @@ xhr.send(query); } - function renderResponse(status, response) { + function renderResponse(status, response, elapsed_msec) { document.getElementById('hourglass').style.display = 'none'; if (status === 200) { @@ -632,6 +639,7 @@ renderChart(json); } else { renderUnparsedResult(response); + stats.innerText = `Elapsed (client-side): ${(elapsed_msec / 1000).toFixed(3)} sec.`; } document.getElementById('check-mark').style.display = 'inline'; } else { @@ -651,7 +659,7 @@ clear(); return; } - renderResponse(event.state.status, event.state.response); + renderResponse(event.state.status, event.state.response, event.state.elapsed_msec); }; if (window.location.hash) { diff --git a/pyproject.toml b/pyproject.toml index 279d077a695..c89d46c0929 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,8 @@ src_paths = ["src", "tests/ci", "tests/sqllogic"] [tool.pylint.'MESSAGES CONTROL'] # pytest.mark.parametrize is not callable (not-callable) disable = ''' + pointless-string-statement, + line-too-long, missing-docstring, too-few-public-methods, invalid-name, @@ -35,10 +37,10 @@ disable = ''' broad-except, bare-except, no-else-return, - global-statement + global-statement, + f-string-without-interpolation, ''' [tool.pylint.SIMILARITIES] # due to SQL min-similarity-lines=1000 - diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp index 353358fac65..95a467bbbe5 100644 --- a/src/Access/AccessControl.cpp +++ b/src/Access/AccessControl.cpp @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include diff --git a/src/AggregateFunctions/AggregateFunctionFactory.cpp b/src/AggregateFunctions/AggregateFunctionFactory.cpp index 6555ae63128..b5c6440a69c 100644 --- a/src/AggregateFunctions/AggregateFunctionFactory.cpp +++ b/src/AggregateFunctions/AggregateFunctionFactory.cpp @@ -6,6 +6,7 @@ #include #include #include +#include static constexpr size_t MAX_AGGREGATE_FUNCTION_NAME_LENGTH = 1000; diff --git a/src/Analyzer/InDepthQueryTreeVisitor.h b/src/Analyzer/InDepthQueryTreeVisitor.h index 62ddc06659c..67e3b75d8b5 100644 --- a/src/Analyzer/InDepthQueryTreeVisitor.h +++ b/src/Analyzer/InDepthQueryTreeVisitor.h @@ -3,7 +3,6 @@ #include #include -#include #include #include diff --git a/src/Analyzer/Passes/AggregateFunctionOfGroupByKeysPass.cpp b/src/Analyzer/Passes/AggregateFunctionOfGroupByKeysPass.cpp index a419bbcc133..b68635ab953 100644 --- a/src/Analyzer/Passes/AggregateFunctionOfGroupByKeysPass.cpp +++ b/src/Analyzer/Passes/AggregateFunctionOfGroupByKeysPass.cpp @@ -10,6 +10,8 @@ #include #include +#include + namespace DB { diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp index e6798a792dd..6578a6c5bd7 100644 --- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp +++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp @@ -9,6 +9,9 @@ #include #include #include +#include + +#include namespace DB { @@ -164,32 +167,15 @@ private: auto aggregate_function_clone = aggregate_function->clone(); auto & aggregate_function_clone_typed = aggregate_function_clone->as(); + aggregate_function_clone_typed.getArguments().getNodes() = { arithmetic_function_clone_argument }; - resolveAggregateFunctionNode(aggregate_function_clone_typed, arithmetic_function_clone_argument, result_aggregate_function_name); + resolveAggregateFunctionNodeByName(aggregate_function_clone_typed, result_aggregate_function_name); arithmetic_function_clone_arguments_nodes[arithmetic_function_argument_index] = std::move(aggregate_function_clone); - resolveOrdinaryFunctionNode(arithmetic_function_clone_typed, arithmetic_function_clone_typed.getFunctionName()); + resolveOrdinaryFunctionNodeByName(arithmetic_function_clone_typed, arithmetic_function_clone_typed.getFunctionName(), getContext()); return arithmetic_function_clone; } - - void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const - { - auto function = FunctionFactory::instance().get(function_name, getContext()); - function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); - } - - static void resolveAggregateFunctionNode(FunctionNode & function_node, const QueryTreeNodePtr & argument, const String & aggregate_function_name) - { - auto function_aggregate_function = function_node.getAggregateFunction(); - - AggregateFunctionProperties properties; - auto action = NullsAction::EMPTY; - auto aggregate_function = AggregateFunctionFactory::instance().get( - aggregate_function_name, action, {argument->getResultType()}, function_aggregate_function->getParameters(), properties); - - function_node.resolveAsAggregateFunction(std::move(aggregate_function)); - } }; } diff --git a/src/Analyzer/Passes/ArrayExistsToHasPass.cpp b/src/Analyzer/Passes/ArrayExistsToHasPass.cpp index 62db502e1dc..bf4c1232d79 100644 --- a/src/Analyzer/Passes/ArrayExistsToHasPass.cpp +++ b/src/Analyzer/Passes/ArrayExistsToHasPass.cpp @@ -11,6 +11,8 @@ #include #include +#include + namespace DB { diff --git a/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp b/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp index 70aa1a41548..f7dd542dbf4 100644 --- a/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp +++ b/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp @@ -8,6 +8,8 @@ #include #include +#include + namespace DB { diff --git a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp index ebefc12ae53..180470952cd 100644 --- a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp +++ b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp @@ -11,6 +11,7 @@ #include #include #include +#include namespace DB { @@ -18,19 +19,18 @@ namespace DB namespace { -class ComparisonTupleEliminationPassVisitor : public InDepthQueryTreeVisitor +class ComparisonTupleEliminationPassVisitor : public InDepthQueryTreeVisitorWithContext { public: - explicit ComparisonTupleEliminationPassVisitor(ContextPtr context_) - : context(std::move(context_)) - {} + using Base = InDepthQueryTreeVisitorWithContext; + using Base::Base; static bool needChildVisit(QueryTreeNodePtr &, QueryTreeNodePtr & child) { return child->getNodeType() != QueryTreeNodeType::TABLE_FUNCTION; } - void visitImpl(QueryTreeNodePtr & node) const + void enterImpl(QueryTreeNodePtr & node) const { auto * function_node = node->as(); if (!function_node) @@ -171,13 +171,13 @@ private: { auto result_function = std::make_shared("and"); result_function->getArguments().getNodes() = std::move(tuple_arguments_equals_functions); - resolveOrdinaryFunctionNode(*result_function, result_function->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*result_function, result_function->getFunctionName(), getContext()); if (comparison_function_name == "notEquals") { auto not_function = std::make_shared("not"); not_function->getArguments().getNodes().push_back(std::move(result_function)); - resolveOrdinaryFunctionNode(*not_function, not_function->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*not_function, not_function->getFunctionName(), getContext()); result_function = std::move(not_function); } @@ -197,18 +197,10 @@ private: comparison_function->getArguments().getNodes().push_back(std::move(lhs_argument)); comparison_function->getArguments().getNodes().push_back(std::move(rhs_argument)); - resolveOrdinaryFunctionNode(*comparison_function, comparison_function->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*comparison_function, comparison_function->getFunctionName(), getContext()); return comparison_function; } - - void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const - { - auto function = FunctionFactory::instance().get(function_name, context); - function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); - } - - ContextPtr context; }; } diff --git a/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp b/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp index eb897ef8746..2b2ac95d7b9 100644 --- a/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp +++ b/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp @@ -22,6 +22,8 @@ #include #include +#include + namespace DB { diff --git a/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp b/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp index 5951e8fc5ea..f66d092429f 100644 --- a/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp +++ b/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp @@ -10,6 +10,8 @@ #include #include +#include + #include #include diff --git a/src/Analyzer/Passes/CountDistinctPass.cpp b/src/Analyzer/Passes/CountDistinctPass.cpp index 3307c440f42..3f9185a07ca 100644 --- a/src/Analyzer/Passes/CountDistinctPass.cpp +++ b/src/Analyzer/Passes/CountDistinctPass.cpp @@ -9,6 +9,9 @@ #include #include #include +#include + +#include namespace DB { @@ -77,11 +80,9 @@ public: /// Replace `countDistinct` of initial query into `count` auto result_type = function_node->getResultType(); - AggregateFunctionProperties properties; - auto action = NullsAction::EMPTY; - auto aggregate_function = AggregateFunctionFactory::instance().get("count", action, {}, {}, properties); - function_node->resolveAsAggregateFunction(std::move(aggregate_function)); + function_node->getArguments().getNodes().clear(); + resolveAggregateFunctionNodeByName(*function_node, "count"); } }; diff --git a/src/Analyzer/Passes/CrossToInnerJoinPass.cpp b/src/Analyzer/Passes/CrossToInnerJoinPass.cpp index 3e2a2055fdb..eb615879893 100644 --- a/src/Analyzer/Passes/CrossToInnerJoinPass.cpp +++ b/src/Analyzer/Passes/CrossToInnerJoinPass.cpp @@ -9,13 +9,14 @@ #include #include #include +#include #include #include #include #include -#include +#include namespace DB diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 15ac8d642a4..b968f43c6a6 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -16,6 +17,11 @@ #include #include #include +#include +#include +#include + +#include namespace DB { @@ -23,202 +29,410 @@ namespace DB namespace { -class FunctionToSubcolumnsVisitor : public InDepthQueryTreeVisitorWithContext +struct ColumnContext +{ + NameAndTypePair column; + QueryTreeNodePtr column_source; + ContextPtr context; +}; + +using NodeToSubcolumnTransformer = std::function; + +void optimizeFunctionLength(QueryTreeNodePtr & node, FunctionNode &, ColumnContext & ctx) +{ + /// Replace `length(argument)` with `argument.size0` + /// `argument` may be Array or Map. + + NameAndTypePair column{ctx.column.name + ".size0", std::make_shared()}; + node = std::make_shared(column, ctx.column_source); +} + +template +void optimizeFunctionEmpty(QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx) +{ + /// Replace `empty(argument)` with `equals(argument.size0, 0)` if positive + /// Replace `notEmpty(argument)` with `notEquals(argument.size0, 0)` if not positive + /// `argument` may be Array or Map. + + NameAndTypePair column{ctx.column.name + ".size0", std::make_shared()}; + auto & function_arguments_nodes = function_node.getArguments().getNodes(); + + function_arguments_nodes.clear(); + function_arguments_nodes.push_back(std::make_shared(column, ctx.column_source)); + function_arguments_nodes.push_back(std::make_shared(static_cast(0))); + + const auto * function_name = positive ? "equals" : "notEquals"; + resolveOrdinaryFunctionNodeByName(function_node, function_name, ctx.context); +} + +String getSubcolumnNameForElement(const Field & value, const DataTypeTuple & data_type_tuple) +{ + if (value.getType() == Field::Types::String) + return value.get(); + + if (value.getType() == Field::Types::UInt64) + return data_type_tuple.getNameByPosition(value.get()); + + return ""; +} + +String getSubcolumnNameForElement(const Field & value, const DataTypeVariant &) +{ + if (value.getType() == Field::Types::String) + return value.get(); + + return ""; +} + +template +void optimizeTupleOrVariantElement(QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx) +{ + /// Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)` with `tuple_argument.column_name`. + /// Replace `variantElement(variant_argument, string_literal)` with `variant_argument.column_name`. + + auto & function_arguments_nodes = function_node.getArguments().getNodes(); + if (function_arguments_nodes.size() != 2) + return; + + const auto * second_argument_constant_node = function_arguments_nodes[1]->as(); + if (!second_argument_constant_node) + return; + + const auto & data_type_concrete = assert_cast(*ctx.column.type); + auto subcolumn_name = getSubcolumnNameForElement(second_argument_constant_node->getValue(), data_type_concrete); + + if (subcolumn_name.empty()) + return; + + NameAndTypePair column{ctx.column.name + "." + subcolumn_name, function_node.getResultType()}; + node = std::make_shared(column, ctx.column_source); +} + +std::map, NodeToSubcolumnTransformer> node_transformers = +{ + { + {TypeIndex::Array, "length"}, optimizeFunctionLength, + }, + { + {TypeIndex::Array, "empty"}, optimizeFunctionEmpty, + }, + { + {TypeIndex::Array, "notEmpty"}, optimizeFunctionEmpty, + }, + { + {TypeIndex::Map, "length"}, optimizeFunctionLength, + }, + { + {TypeIndex::Map, "empty"}, optimizeFunctionEmpty, + }, + { + {TypeIndex::Map, "notEmpty"}, optimizeFunctionEmpty, + }, + { + {TypeIndex::Map, "mapKeys"}, + [](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx) + { + /// Replace `mapKeys(map_argument)` with `map_argument.keys` + NameAndTypePair column{ctx.column.name + ".keys", function_node.getResultType()}; + node = std::make_shared(column, ctx.column_source); + }, + }, + { + {TypeIndex::Map, "mapValues"}, + [](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx) + { + /// Replace `mapValues(map_argument)` with `map_argument.values` + NameAndTypePair column{ctx.column.name + ".values", function_node.getResultType()}; + node = std::make_shared(column, ctx.column_source); + }, + }, + { + {TypeIndex::Map, "mapContains"}, + [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx) + { + /// Replace `mapContains(map_argument, argument)` with `has(map_argument.keys, argument)` + const auto & data_type_map = assert_cast(*ctx.column.type); + + NameAndTypePair column{ctx.column.name + ".keys", std::make_shared(data_type_map.getKeyType())}; + auto & function_arguments_nodes = function_node.getArguments().getNodes(); + + auto has_function_argument = std::make_shared(column, ctx.column_source); + function_arguments_nodes[0] = std::move(has_function_argument); + + resolveOrdinaryFunctionNodeByName(function_node, "has", ctx.context); + }, + }, + { + {TypeIndex::Nullable, "count"}, + [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx) + { + /// Replace `count(nullable_argument)` with `sum(not(nullable_argument.null))` + NameAndTypePair column{ctx.column.name + ".null", std::make_shared()}; + auto & function_arguments_nodes = function_node.getArguments().getNodes(); + + auto new_column_node = std::make_shared(column, ctx.column_source); + auto function_node_not = std::make_shared("not"); + + function_node_not->getArguments().getNodes().push_back(std::move(new_column_node)); + resolveOrdinaryFunctionNodeByName(*function_node_not, "not", ctx.context); + + function_arguments_nodes = {std::move(function_node_not)}; + resolveAggregateFunctionNodeByName(function_node, "sum"); + }, + }, + { + {TypeIndex::Nullable, "isNull"}, + [](QueryTreeNodePtr & node, FunctionNode &, ColumnContext & ctx) + { + /// Replace `isNull(nullable_argument)` with `nullable_argument.null` + NameAndTypePair column{ctx.column.name + ".null", std::make_shared()}; + node = std::make_shared(column, ctx.column_source); + }, + }, + { + {TypeIndex::Nullable, "isNotNull"}, + [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx) + { + /// Replace `isNotNull(nullable_argument)` with `not(nullable_argument.null)` + NameAndTypePair column{ctx.column.name + ".null", std::make_shared()}; + auto & function_arguments_nodes = function_node.getArguments().getNodes(); + + function_arguments_nodes = {std::make_shared(column, ctx.column_source)}; + resolveOrdinaryFunctionNodeByName(function_node, "not", ctx.context); + }, + }, + { + {TypeIndex::Tuple, "tupleElement"}, optimizeTupleOrVariantElement, + }, + { + {TypeIndex::Variant, "variantElement"}, optimizeTupleOrVariantElement, + }, +}; + +std::tuple getTypedNodesForOptimization(const QueryTreeNodePtr & node) +{ + auto * function_node = node->as(); + if (!function_node) + return {}; + + auto & function_arguments_nodes = function_node->getArguments().getNodes(); + if (function_arguments_nodes.empty() || function_arguments_nodes.size() > 2) + return {}; + + auto * first_argument_column_node = function_arguments_nodes.front()->as(); + if (!first_argument_column_node || first_argument_column_node->getColumnName() == "__grouping_set") + return {}; + + auto column_source = first_argument_column_node->getColumnSource(); + auto * table_node = column_source->as(); + if (!table_node) + return {}; + + const auto & storage = table_node->getStorage(); + const auto & storage_snapshot = table_node->getStorageSnapshot(); + auto column = first_argument_column_node->getColumn(); + + if (!storage->supportsOptimizationToSubcolumns() || storage->isVirtualColumn(column.name, storage_snapshot->metadata)) + return {}; + + auto column_in_table = storage_snapshot->tryGetColumn(GetColumnsOptions::All, column.name); + if (!column_in_table || !column_in_table->type->equals(*column.type)) + return {}; + + return std::make_tuple(function_node, first_argument_column_node, table_node); +} + +/// First pass collects info about identifiers to determine which identifiers are allowed to optimize. +class FunctionToSubcolumnsVisitorFirstPass : public InDepthQueryTreeVisitorWithContext { public: - using Base = InDepthQueryTreeVisitorWithContext; + using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; + void enterImpl(const QueryTreeNodePtr & node) + { + if (!getSettings().optimize_functions_to_subcolumns) + return; + + if (auto * table_node = node->as()) + { + enterImpl(*table_node); + return; + } + + if (auto * column_node = node->as()) + { + enterImpl(*column_node); + return; + } + + auto [function_node, first_argument_node, table_node] = getTypedNodesForOptimization(node); + if (function_node && first_argument_node && table_node) + { + enterImpl(*function_node, *first_argument_node, *table_node); + return; + } + + if (const auto * join_node = node->as()) + { + can_wrap_result_columns_with_nullable |= getContext()->getSettingsRef().join_use_nulls; + return; + } + + if (const auto * query_node = node->as()) + { + if (query_node->isGroupByWithCube() || query_node->isGroupByWithRollup() || query_node->isGroupByWithGroupingSets()) + can_wrap_result_columns_with_nullable |= getContext()->getSettingsRef().group_by_use_nulls; + return; + } + } + + std::unordered_set getIdentifiersToOptimize() const + { + if (can_wrap_result_columns_with_nullable) + { + /// Do not optimize if we have JOIN with setting join_use_null. + /// Do not optimize if we have GROUP BY WITH ROLLUP/CUBE/GROUPING SETS with setting group_by_use_nulls. + /// It may change the behaviour if subcolumn can be converted + /// to Nullable while the original column cannot (e.g. for Array type). + return {}; + } + + /// Do not optimize if full column is requested in other context. + /// It doesn't make sense because it doesn't reduce amount of read data + /// and optimized functions are not computation heavy. But introducing + /// new identifier complicates query analysis and may break it. + /// + /// E.g. query: + /// SELECT n FROM table GROUP BY n HAVING isNotNull(n) + /// may be optimized to incorrect query: + /// SELECT n FROM table GROUP BY n HAVING not(n.null) + /// Will produce: `n.null` is not under aggregate function and not in GROUP BY keys) + /// + /// Do not optimize index columns (primary, min-max, secondary), + /// because otherwise analysis of indexes may be broken. + /// TODO: handle subcolumns in index analysis. + + std::unordered_set identifiers_to_optimize; + for (const auto & [identifier, count] : optimized_identifiers_count) + { + if (all_key_columns.contains(identifier)) + continue; + + auto it = identifiers_count.find(identifier); + if (it != identifiers_count.end() && it->second == count) + identifiers_to_optimize.insert(identifier); + } + + return identifiers_to_optimize; + } + +private: + std::unordered_set all_key_columns; + std::unordered_map identifiers_count; + std::unordered_map optimized_identifiers_count; + + NameSet processed_tables; + bool can_wrap_result_columns_with_nullable = false; + + void enterImpl(const TableNode & table_node) + { + auto table_name = table_node.getStorage()->getStorageID().getFullTableName(); + if (processed_tables.emplace(table_name).second) + return; + + auto add_key_columns = [&](const auto & key_columns) + { + for (const auto & column_name : key_columns) + { + Identifier identifier({table_name, column_name}); + all_key_columns.insert(identifier); + } + }; + + const auto & metadata_snapshot = table_node.getStorageSnapshot()->metadata; + const auto & primary_key_columns = metadata_snapshot->getColumnsRequiredForPrimaryKey(); + const auto & partition_key_columns = metadata_snapshot->getColumnsRequiredForPartitionKey(); + + add_key_columns(primary_key_columns); + add_key_columns(partition_key_columns); + + for (const auto & index : metadata_snapshot->getSecondaryIndices()) + { + const auto & index_columns = index.expression->getRequiredColumns(); + add_key_columns(index_columns); + } + } + + void enterImpl(const ColumnNode & column_node) + { + if (column_node.getColumnName() == "__grouping_set") + return; + + auto column_source = column_node.getColumnSource(); + auto * table_node = column_source->as(); + if (!table_node) + return; + + auto table_name = table_node->getStorage()->getStorageID().getFullTableName(); + Identifier qualified_name({table_name, column_node.getColumnName()}); + + ++identifiers_count[qualified_name]; + } + + void enterImpl(const FunctionNode & function_node, const ColumnNode & first_argument_column_node, const TableNode & table_node) + { + /// For queries with FINAL converting function to subcolumn may alter + /// special merging algorithms and produce wrong result of query. + if (table_node.hasTableExpressionModifiers() && table_node.getTableExpressionModifiers()->hasFinal()) + return; + + const auto & column = first_argument_column_node.getColumn(); + auto table_name = table_node.getStorage()->getStorageID().getFullTableName(); + Identifier qualified_name({table_name, column.name}); + + if (node_transformers.contains({column.type->getTypeId(), function_node.getFunctionName()})) + ++optimized_identifiers_count[qualified_name]; + } +}; + +/// Second pass optimizes functions to subcolumns for allowed identifiers. +class FunctionToSubcolumnsVisitorSecondPass : public InDepthQueryTreeVisitorWithContext +{ +private: + std::unordered_set identifiers_to_optimize; + +public: + using Base = InDepthQueryTreeVisitorWithContext; + using Base::Base; + + FunctionToSubcolumnsVisitorSecondPass(ContextPtr context_, std::unordered_set identifiers_to_optimize_) + : Base(std::move(context_)), identifiers_to_optimize(std::move(identifiers_to_optimize_)) + { + } + void enterImpl(QueryTreeNodePtr & node) const { if (!getSettings().optimize_functions_to_subcolumns) return; - auto * function_node = node->as(); - if (!function_node) - return; - - auto & function_arguments_nodes = function_node->getArguments().getNodes(); - size_t function_arguments_nodes_size = function_arguments_nodes.size(); - - if (function_arguments_nodes.empty() || function_arguments_nodes_size > 2) - return; - - auto * first_argument_column_node = function_arguments_nodes.front()->as(); - - if (!first_argument_column_node) - return; - - if (first_argument_column_node->getColumnName() == "__grouping_set") - return; - - auto column_source = first_argument_column_node->getColumnSource(); - auto * table_node = column_source->as(); - - if (!table_node) - return; - - const auto & storage = table_node->getStorage(); - if (!storage->supportsSubcolumns()) + auto [function_node, first_argument_column_node, table_node] = getTypedNodesForOptimization(node); + if (!function_node || !first_argument_column_node || !table_node) return; auto column = first_argument_column_node->getColumn(); - WhichDataType column_type(column.type); + auto table_name = table_node->getStorage()->getStorageID().getFullTableName(); - const auto & function_name = function_node->getFunctionName(); + Identifier qualified_name({table_name, column.name}); + if (!identifiers_to_optimize.contains(qualified_name)) + return; - if (function_arguments_nodes_size == 1) + auto transformer_it = node_transformers.find({column.type->getTypeId(), function_node->getFunctionName()}); + if (transformer_it != node_transformers.end()) { - if (column_type.isArray()) - { - if (function_name == "length") - { - /// Replace `length(array_argument)` with `array_argument.size0` - column.name += ".size0"; - column.type = std::make_shared(); - - node = std::make_shared(column, column_source); - } - else if (function_name == "empty") - { - /// Replace `empty(array_argument)` with `equals(array_argument.size0, 0)` - column.name += ".size0"; - column.type = std::make_shared(); - - function_arguments_nodes.clear(); - function_arguments_nodes.push_back(std::make_shared(column, column_source)); - function_arguments_nodes.push_back(std::make_shared(static_cast(0))); - - resolveOrdinaryFunctionNode(*function_node, "equals"); - } - else if (function_name == "notEmpty") - { - /// Replace `notEmpty(array_argument)` with `notEquals(array_argument.size0, 0)` - column.name += ".size0"; - column.type = std::make_shared(); - - function_arguments_nodes.clear(); - function_arguments_nodes.push_back(std::make_shared(column, column_source)); - function_arguments_nodes.push_back(std::make_shared(static_cast(0))); - - resolveOrdinaryFunctionNode(*function_node, "notEquals"); - } - } - else if (column_type.isNullable()) - { - if (function_name == "isNull") - { - /// Replace `isNull(nullable_argument)` with `nullable_argument.null` - column.name += ".null"; - column.type = std::make_shared(); - - node = std::make_shared(column, column_source); - } - else if (function_name == "isNotNull") - { - /// Replace `isNotNull(nullable_argument)` with `not(nullable_argument.null)` - column.name += ".null"; - column.type = std::make_shared(); - - function_arguments_nodes = {std::make_shared(column, column_source)}; - - resolveOrdinaryFunctionNode(*function_node, "not"); - } - } - else if (column_type.isMap()) - { - if (function_name == "mapKeys") - { - /// Replace `mapKeys(map_argument)` with `map_argument.keys` - column.name += ".keys"; - column.type = function_node->getResultType(); - - node = std::make_shared(column, column_source); - } - else if (function_name == "mapValues") - { - /// Replace `mapValues(map_argument)` with `map_argument.values` - column.name += ".values"; - column.type = function_node->getResultType(); - - node = std::make_shared(column, column_source); - } - } + ColumnContext ctx{std::move(column), first_argument_column_node->getColumnSource(), getContext()}; + transformer_it->second(node, *function_node, ctx); } - else - { - const auto * second_argument_constant_node = function_arguments_nodes[1]->as(); - - if (function_name == "tupleElement" && column_type.isTuple() && second_argument_constant_node) - { - /** Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)` - * with `tuple_argument.column_name`. - */ - const auto & tuple_element_constant_value = second_argument_constant_node->getValue(); - const auto & tuple_element_constant_value_type = tuple_element_constant_value.getType(); - - const auto & data_type_tuple = assert_cast(*column.type); - - String subcolumn_name; - - if (tuple_element_constant_value_type == Field::Types::String) - { - subcolumn_name = tuple_element_constant_value.get(); - } - else if (tuple_element_constant_value_type == Field::Types::UInt64) - { - auto tuple_column_index = tuple_element_constant_value.get(); - subcolumn_name = data_type_tuple.getNameByPosition(tuple_column_index); - } - else - { - return; - } - - column.name += '.'; - column.name += subcolumn_name; - column.type = function_node->getResultType(); - - node = std::make_shared(column, column_source); - } - else if (function_name == "variantElement" && isVariant(column_type) && second_argument_constant_node) - { - /// Replace `variantElement(variant_argument, type_name)` with `variant_argument.type_name`. - const auto & variant_element_constant_value = second_argument_constant_node->getValue(); - String subcolumn_name; - - if (variant_element_constant_value.getType() != Field::Types::String) - return; - - subcolumn_name = variant_element_constant_value.get(); - - column.name += '.'; - column.name += subcolumn_name; - column.type = function_node->getResultType(); - - node = std::make_shared(column, column_source); - } - else if (function_name == "mapContains" && column_type.isMap()) - { - const auto & data_type_map = assert_cast(*column.type); - - /// Replace `mapContains(map_argument, argument)` with `has(map_argument.keys, argument)` - column.name += ".keys"; - column.type = std::make_shared(data_type_map.getKeyType()); - - auto has_function_argument = std::make_shared(column, column_source); - function_arguments_nodes[0] = std::move(has_function_argument); - - resolveOrdinaryFunctionNode(*function_node, "has"); - } - } - } - -private: - void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const - { - auto function = FunctionFactory::instance().get(function_name, getContext()); - function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); } }; @@ -226,8 +440,15 @@ private: void FunctionToSubcolumnsPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context) { - FunctionToSubcolumnsVisitor visitor(context); - visitor.visit(query_tree_node); + FunctionToSubcolumnsVisitorFirstPass first_visitor(context); + first_visitor.visit(query_tree_node); + auto identifiers_to_optimize = first_visitor.getIdentifiersToOptimize(); + + if (identifiers_to_optimize.empty()) + return; + + FunctionToSubcolumnsVisitorSecondPass second_visitor(std::move(context), std::move(identifiers_to_optimize)); + second_visitor.visit(query_tree_node); } } diff --git a/src/Analyzer/Passes/FuseFunctionsPass.cpp b/src/Analyzer/Passes/FuseFunctionsPass.cpp index ac4678fb362..0175e304a2b 100644 --- a/src/Analyzer/Passes/FuseFunctionsPass.cpp +++ b/src/Analyzer/Passes/FuseFunctionsPass.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include diff --git a/src/Analyzer/Passes/GroupingFunctionsResolvePass.cpp b/src/Analyzer/Passes/GroupingFunctionsResolvePass.cpp index e259ffa64a6..91f06183523 100644 --- a/src/Analyzer/Passes/GroupingFunctionsResolvePass.cpp +++ b/src/Analyzer/Passes/GroupingFunctionsResolvePass.cpp @@ -1,6 +1,7 @@ #include #include +#include #include diff --git a/src/Analyzer/Passes/IfChainToMultiIfPass.cpp b/src/Analyzer/Passes/IfChainToMultiIfPass.cpp index beb2247607e..cff98adb6af 100644 --- a/src/Analyzer/Passes/IfChainToMultiIfPass.cpp +++ b/src/Analyzer/Passes/IfChainToMultiIfPass.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include diff --git a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp index 29b392a0951..d966f129d08 100644 --- a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp +++ b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp index ac221bd66e7..854697bca9f 100644 --- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp +++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp @@ -8,8 +8,10 @@ #include #include #include +#include #include +#include namespace DB { @@ -26,12 +28,103 @@ static constexpr std::array boolean_functions{ "like"sv, "notLike"sv, "ilike"sv, "notILike"sv, "empty"sv, "notEmpty"sv, "not"sv, "and"sv, "or"sv}; -static bool isBooleanFunction(const String & func_name) + +bool isBooleanFunction(const String & func_name) { return std::any_of( boolean_functions.begin(), boolean_functions.end(), [&](const auto boolean_func) { return func_name == boolean_func; }); } +bool isNodeFunction(const QueryTreeNodePtr & node, const String & func_name) +{ + if (const auto * function_node = node->as()) + return function_node->getFunctionName() == func_name; + return false; +} + +QueryTreeNodePtr getFunctionArgument(const QueryTreeNodePtr & node, size_t idx) +{ + if (const auto * function_node = node->as()) + { + const auto & args = function_node->getArguments().getNodes(); + if (idx < args.size()) + return args[idx]; + } + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected '{}' to be a function with at least {} arguments", node->formatASTForErrorMessage(), idx + 1); +} + +QueryTreeNodePtr findEqualsFunction(const QueryTreeNodes & nodes) +{ + for (const auto & node : nodes) + { + const auto * function_node = node->as(); + if (function_node && function_node->getFunctionName() == "equals" && + function_node->getArguments().getNodes().size() == 2) + { + return node; + } + } + return nullptr; +} + +bool isBooleanConstant(const QueryTreeNodePtr & node, bool expected_value) +{ + const auto * constant_node = node->as(); + if (!constant_node || !constant_node->getResultType()->equals(DataTypeUInt8())) + return false; + + UInt64 constant_value; + return (constant_node->getValue().tryGet(constant_value) && constant_value == expected_value); +} + +/// Returns true if expression consists of only conjunctions of functions with the specified name or true constants +bool isOnlyConjunctionOfFunctions( + const QueryTreeNodePtr & node, + const String & func_name, + const QueryTreeNodePtrWithHashSet & allowed_arguments) +{ + if (isBooleanConstant(node, true)) + return true; + + const auto * node_function = node->as(); + if (!node_function) + return false; + + if (node_function->getFunctionName() == func_name + && allowed_arguments.contains(node_function->getArgumentsNode())) + return true; + + if (node_function->getFunctionName() == "and") + { + for (const auto & and_argument : node_function->getArguments().getNodes()) + { + if (!isOnlyConjunctionOfFunctions(and_argument, func_name, allowed_arguments)) + return false; + } + return true; + } + return false; +} + +/// We can rewrite to a <=> b only if we are joining on a and b, +/// because the function is not yet implemented for other cases. +bool isTwoArgumentsFromDifferentSides(const FunctionNode & node_function, const JoinNode & join_node) +{ + const auto & argument_nodes = node_function.getArguments().getNodes(); + if (argument_nodes.size() != 2) + return false; + + auto first_src = getExpressionSource(argument_nodes[0]); + auto second_src = getExpressionSource(argument_nodes[1]); + if (!first_src || !second_src) + return false; + + const auto & lhs_join = *join_node.getLeftTableExpression(); + const auto & rhs_join = *join_node.getRightTableExpression(); + return (first_src->isEqual(lhs_join) && second_src->isEqual(rhs_join)) || + (first_src->isEqual(rhs_join) && second_src->isEqual(lhs_join)); +} + /// Visitor that optimizes logical expressions _only_ in JOIN ON section class JoinOnLogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithContext { @@ -47,15 +140,16 @@ public: { auto * function_node = node->as(); - if (!function_node) - return; + QueryTreeNodePtr new_node = nullptr; + if (function_node && function_node->getFunctionName() == "or") + new_node = tryOptimizeJoinOnNulls(function_node->getArguments().getNodes(), getContext()); + else + new_node = tryOptimizeJoinOnNulls({node}, getContext()); - if (function_node->getFunctionName() == "or") + if (new_node) { - bool is_argument_type_changed = tryOptimizeIsNotDistinctOrIsNull(node, getContext()); - if (is_argument_type_changed) - need_rerun_resolve = true; - return; + need_rerun_resolve |= !new_node->getResultType()->equals(*node->getResultType()); + node = new_node; } } @@ -72,15 +166,11 @@ private: const JoinNode * join_node; bool need_rerun_resolve = false; - /// Returns true if type of some operand is changed and parent function needs to be re-resolved - bool tryOptimizeIsNotDistinctOrIsNull(QueryTreeNodePtr & node, const ContextPtr & context) + /// Returns optimized node or nullptr if nothing have been changed + QueryTreeNodePtr tryOptimizeJoinOnNulls(const QueryTreeNodes & nodes, const ContextPtr & context) { - auto & function_node = node->as(); - chassert(function_node.getFunctionName() == "or"); - - QueryTreeNodes or_operands; - or_operands.reserve(function_node.getArguments().getNodes().size()); + or_operands.reserve(nodes.size()); /// Indices of `equals` or `isNotDistinctFrom` functions in the vector above std::vector equals_functions_indices; @@ -93,47 +183,73 @@ private: * b => [(a IS NULL AND b IS NULL)] * c => [(a IS NULL AND c IS NULL)] * } - * Then for each a <=> b we can find all operands that contains both a IS NULL and b IS NULL + * Then for each equality a = b we can check if we have operand (a IS NULL AND b IS NULL) */ QueryTreeNodePtrWithHashMap> is_null_argument_to_indices; - for (const auto & argument : function_node.getArguments()) - { - or_operands.push_back(argument); + bool is_anything_changed = false; - auto * argument_function = argument->as(); + for (const auto & node : nodes) + { + if (isBooleanConstant(node, false)) + { + /// Remove false constants from OR + is_anything_changed = true; + continue; + } + + or_operands.push_back(node); + auto * argument_function = node->as(); if (!argument_function) continue; const auto & func_name = argument_function->getFunctionName(); if (func_name == "equals" || func_name == "isNotDistinctFrom") { - const auto & argument_nodes = argument_function->getArguments().getNodes(); - if (argument_nodes.size() != 2) - continue; - /// We can rewrite to a <=> b only if we are joining on a and b, - /// because the function is not yet implemented for other cases. - auto first_src = getExpressionSource(argument_nodes[0]); - auto second_src = getExpressionSource(argument_nodes[1]); - if (!first_src || !second_src) - continue; - const auto & lhs_join = *join_node->getLeftTableExpression(); - const auto & rhs_join = *join_node->getRightTableExpression(); - bool arguments_from_both_sides = (first_src->isEqual(lhs_join) && second_src->isEqual(rhs_join)) || - (first_src->isEqual(rhs_join) && second_src->isEqual(lhs_join)); - if (!arguments_from_both_sides) - continue; - equals_functions_indices.push_back(or_operands.size() - 1); + if (isTwoArgumentsFromDifferentSides(*argument_function, *join_node)) + equals_functions_indices.push_back(or_operands.size() - 1); } else if (func_name == "and") { - for (const auto & and_argument : argument_function->getArguments().getNodes()) + const auto & and_arguments = argument_function->getArguments().getNodes(); + bool all_are_is_null = and_arguments.size() == 2 && isNodeFunction(and_arguments[0], "isNull") && isNodeFunction(and_arguments[1], "isNull"); + if (all_are_is_null) { - auto * and_argument_function = and_argument->as(); - if (and_argument_function && and_argument_function->getFunctionName() == "isNull") + is_null_argument_to_indices[getFunctionArgument(and_arguments.front(), 0)].push_back(or_operands.size() - 1); + is_null_argument_to_indices[getFunctionArgument(and_arguments.back(), 0)].push_back(or_operands.size() - 1); + } + + /// Expression `a = b AND (a IS NOT NULL) AND true AND (b IS NOT NULL)` we can be replaced with `a = b` + /// Even though this expression are not equivalent (first is NULL on NULLs, while second is FALSE), + /// it is still correct since for JOIN ON condition NULL is treated as FALSE + if (const auto & equals_function = findEqualsFunction(and_arguments)) + { + const auto & equals_arguments = equals_function->as()->getArguments().getNodes(); + /// Expected isNotNull arguments + QueryTreeNodePtrWithHashSet allowed_arguments; + allowed_arguments.insert(QueryTreeNodePtrWithHash(std::make_shared(QueryTreeNodes{equals_arguments[0]}))); + allowed_arguments.insert(QueryTreeNodePtrWithHash(std::make_shared(QueryTreeNodes{equals_arguments[1]}))); + + bool can_be_optimized = true; + for (const auto & and_argument : and_arguments) { - const auto & is_null_argument = and_argument_function->getArguments().getNodes()[0]; - is_null_argument_to_indices[is_null_argument].push_back(or_operands.size() - 1); + if (and_argument.get() == equals_function.get()) + continue; + + if (isOnlyConjunctionOfFunctions(and_argument, "isNotNull", allowed_arguments)) + continue; + + can_be_optimized = false; + break; + } + + if (can_be_optimized) + { + is_anything_changed = true; + or_operands.pop_back(); + or_operands.push_back(equals_function); + if (isTwoArgumentsFromDifferentSides(equals_function->as(), *join_node)) + equals_functions_indices.push_back(or_operands.size() - 1); } } } @@ -144,9 +260,9 @@ private: for (size_t equals_function_idx : equals_functions_indices) { - auto * equals_function = or_operands[equals_function_idx]->as(); + const auto * equals_function = or_operands[equals_function_idx]->as(); - /// For a <=> b we are looking for expressions containing both `a IS NULL` and `b IS NULL` combined with AND + /// For a = b we are looking for all expressions `a IS NULL AND b IS NULL` const auto & argument_nodes = equals_function->getArguments().getNodes(); const auto & lhs_is_null_parents = is_null_argument_to_indices[argument_nodes[0]]; const auto & rhs_is_null_parents = is_null_argument_to_indices[argument_nodes[1]]; @@ -161,60 +277,40 @@ private: for (size_t to_optimize_idx : operands_to_optimize) { - /// We are looking for operand `a IS NULL AND b IS NULL AND ...` - auto * operand_to_optimize = or_operands[to_optimize_idx]->as(); - - /// Remove `a IS NULL` and `b IS NULL` arguments from AND - QueryTreeNodes new_arguments; - for (const auto & and_argument : operand_to_optimize->getArguments().getNodes()) - { - bool to_eliminate = false; - - const auto * and_argument_function = and_argument->as(); - if (and_argument_function && and_argument_function->getFunctionName() == "isNull") - { - const auto & is_null_argument = and_argument_function->getArguments().getNodes()[0]; - to_eliminate = (is_null_argument->isEqual(*argument_nodes[0]) || is_null_argument->isEqual(*argument_nodes[1])); - } - - if (to_eliminate) - arguments_to_reresolve.insert(to_optimize_idx); - else - new_arguments.emplace_back(and_argument); - } - /// If less than two arguments left, we will remove or replace the whole AND below - operand_to_optimize->getArguments().getNodes() = std::move(new_arguments); + /// Remove `a IS NULL AND b IS NULL` + or_operands[to_optimize_idx] = nullptr; + is_anything_changed = true; } } - if (arguments_to_reresolve.empty()) + if (arguments_to_reresolve.empty() && !is_anything_changed) /// Nothing have been changed - return false; + return nullptr; auto and_function_resolver = FunctionFactory::instance().get("and", context); auto strict_equals_function_resolver = FunctionFactory::instance().get("isNotDistinctFrom", context); - bool need_reresolve = false; QueryTreeNodes new_or_operands; for (size_t i = 0; i < or_operands.size(); ++i) { if (arguments_to_reresolve.contains(i)) { - auto * function = or_operands[i]->as(); + const auto * function = or_operands[i]->as(); if (function->getFunctionName() == "equals") { /// We should replace `a = b` with `a <=> b` because we removed checks for IS NULL - need_reresolve |= function->getResultType()->isNullable(); - function->resolveAsFunction(strict_equals_function_resolver); - new_or_operands.emplace_back(std::move(or_operands[i])); + auto new_function = or_operands[i]->clone(); + new_function->as()->resolveAsFunction(strict_equals_function_resolver); + new_or_operands.emplace_back(std::move(new_function)); } else if (function->getFunctionName() == "and") { const auto & and_arguments = function->getArguments().getNodes(); if (and_arguments.size() > 1) { - function->resolveAsFunction(and_function_resolver); - new_or_operands.emplace_back(std::move(or_operands[i])); + auto new_function = or_operands[i]->clone(); + new_function->as()->resolveAsFunction(and_function_resolver); + new_or_operands.emplace_back(std::move(new_function)); } else if (and_arguments.size() == 1) { @@ -223,25 +319,26 @@ private: } } else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function name: '{}'", function->getFunctionName()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function '{}'", function->getFunctionName()); } - else + else if (or_operands[i]) { new_or_operands.emplace_back(std::move(or_operands[i])); } } + if (new_or_operands.empty()) + return nullptr; + if (new_or_operands.size() == 1) - { - node = std::move(new_or_operands[0]); - return need_reresolve; - } + return new_or_operands[0]; /// Rebuild OR function auto or_function_resolver = FunctionFactory::instance().get("or", context); - function_node.getArguments().getNodes() = std::move(new_or_operands); - function_node.resolveAsFunction(or_function_resolver); - return need_reresolve; + auto function_node = std::make_shared("or"); + function_node->getArguments().getNodes() = std::move(new_or_operands); + function_node->resolveAsFunction(or_function_resolver); + return function_node; } }; diff --git a/src/Analyzer/Passes/MultiIfToIfPass.cpp b/src/Analyzer/Passes/MultiIfToIfPass.cpp index c42ea61b34a..0b441032b02 100644 --- a/src/Analyzer/Passes/MultiIfToIfPass.cpp +++ b/src/Analyzer/Passes/MultiIfToIfPass.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include diff --git a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp index e70e08e65f4..3a8b6e75d40 100644 --- a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp +++ b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include #include @@ -47,25 +49,17 @@ public: if (function_node->getFunctionName() == "count" && !first_argument_constant_literal.isNull()) { - resolveAsCountAggregateFunction(*function_node); function_node->getArguments().getNodes().clear(); + resolveAggregateFunctionNodeByName(*function_node, "count"); } else if (function_node->getFunctionName() == "sum" && first_argument_constant_literal.getType() == Field::Types::UInt64 && first_argument_constant_literal.get() == 1) { - resolveAsCountAggregateFunction(*function_node); function_node->getArguments().getNodes().clear(); + resolveAggregateFunctionNodeByName(*function_node, "count"); } } -private: - static void resolveAsCountAggregateFunction(FunctionNode & function_node) - { - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties); - - function_node.resolveAsAggregateFunction(std::move(aggregate_function)); - } }; } diff --git a/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp b/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp index 0c37749c706..feb8bcc792d 100644 --- a/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp +++ b/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp @@ -5,9 +5,11 @@ #include #include #include +#include #include #include #include +#include namespace DB { diff --git a/src/Analyzer/Passes/OptimizeGroupByFunctionKeysPass.cpp b/src/Analyzer/Passes/OptimizeGroupByFunctionKeysPass.cpp index fd8c3e6ee6c..bfb6aa1dd07 100644 --- a/src/Analyzer/Passes/OptimizeGroupByFunctionKeysPass.cpp +++ b/src/Analyzer/Passes/OptimizeGroupByFunctionKeysPass.cpp @@ -8,6 +8,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Analyzer/Passes/OptimizeGroupByInjectiveFunctionsPass.cpp b/src/Analyzer/Passes/OptimizeGroupByInjectiveFunctionsPass.cpp index a30ad2a1590..fc989003c02 100644 --- a/src/Analyzer/Passes/OptimizeGroupByInjectiveFunctionsPass.cpp +++ b/src/Analyzer/Passes/OptimizeGroupByInjectiveFunctionsPass.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp index b13af7f00ae..4f0271b4914 100644 --- a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp +++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp @@ -8,6 +8,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp index 3500d8327ac..c1adf05ac76 100644 --- a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp +++ b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp @@ -6,6 +6,8 @@ #include #include +#include + #include #include @@ -74,8 +76,7 @@ public: new_arguments[1] = std::move(if_arguments_nodes[0]); function_arguments_nodes = std::move(new_arguments); - resolveAsAggregateFunctionWithIf( - *function_node, {function_arguments_nodes[0]->getResultType(), function_arguments_nodes[1]->getResultType()}); + resolveAggregateFunctionNodeByName(*function_node, function_node->getFunctionName() + "If"); } } else if (first_const_node) @@ -104,27 +105,10 @@ public: new_arguments[1] = std::move(not_function); function_arguments_nodes = std::move(new_arguments); - resolveAsAggregateFunctionWithIf( - *function_node, {function_arguments_nodes[0]->getResultType(), function_arguments_nodes[1]->getResultType()}); + resolveAggregateFunctionNodeByName(*function_node, function_node->getFunctionName() + "If"); } } } - -private: - static void resolveAsAggregateFunctionWithIf(FunctionNode & function_node, const DataTypes & argument_types) - { - auto result_type = function_node.getResultType(); - - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get( - function_node.getFunctionName() + "If", - function_node.getNullsAction(), - argument_types, - function_node.getAggregateFunction()->getParameters(), - properties); - - function_node.resolveAsAggregateFunction(std::move(aggregate_function)); - } }; } diff --git a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp index 5646d26f7f6..a42b4750f47 100644 --- a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp +++ b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include namespace DB @@ -73,23 +74,24 @@ public: const auto lhs = std::make_shared("sum"); lhs->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]); - resolveAsAggregateFunctionNode(*lhs, column_type); + resolveAggregateFunctionNodeByName(*lhs, lhs->getFunctionName()); const auto rhs_count = std::make_shared("count"); rhs_count->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]); - resolveAsAggregateFunctionNode(*rhs_count, column_type); + resolveAggregateFunctionNodeByName(*rhs_count, rhs_count->getFunctionName()); const auto rhs = std::make_shared("multiply"); rhs->getArguments().getNodes().push_back(func_plus_minus_nodes[literal_id]); rhs->getArguments().getNodes().push_back(rhs_count); - resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*rhs, rhs->getFunctionName(), getContext()); auto new_node = std::make_shared(Poco::toLower(func_plus_minus_node->getFunctionName())); if (column_id == 0) new_node->getArguments().getNodes() = {lhs, rhs}; else if (column_id == 1) new_node->getArguments().getNodes() = {rhs, lhs}; - resolveOrdinaryFunctionNode(*new_node, new_node->getFunctionName()); + + resolveOrdinaryFunctionNodeByName(*new_node, new_node->getFunctionName(), getContext()); if (!new_node) return; @@ -100,28 +102,7 @@ public: res = createCastFunction(res, function_node->getResultType(), getContext()); node = std::move(res); - } - -private: - void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const - { - const auto function = FunctionFactory::instance().get(function_name, getContext()); - function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); - } - - static void resolveAsAggregateFunctionNode(FunctionNode & function_node, const DataTypePtr & argument_type) - { - AggregateFunctionProperties properties; - const auto aggregate_function = AggregateFunctionFactory::instance().get(function_node.getFunctionName(), - NullsAction::EMPTY, - {argument_type}, - {}, - properties); - - function_node.resolveAsAggregateFunction(aggregate_function); - } - }; } diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp index 852cbe75c4a..1524629dc81 100644 --- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp +++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp @@ -1,18 +1,16 @@ #include -#include -#include - #include #include - -#include - -#include - #include #include #include +#include +#include +#include +#include +#include +#include namespace DB { @@ -32,7 +30,7 @@ public: return; auto * function_node = node->as(); - if (!function_node || !function_node->isAggregateFunction()) + if (!function_node || !function_node->isAggregateFunction() || !function_node->getResultType()->equals(DataTypeUInt64())) return; auto function_name = function_node->getFunctionName(); @@ -65,7 +63,8 @@ public: auto multiplier_node = function_node_arguments_nodes[0]; function_node_arguments_nodes[0] = std::move(function_node_arguments_nodes[1]); function_node_arguments_nodes.resize(1); - resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType()); + + resolveAggregateFunctionNodeByName(*function_node, "countIf"); if (constant_value_literal.get() != 1) { @@ -115,7 +114,7 @@ public: function_node_arguments_nodes[0] = nested_if_function_arguments_nodes[0]; function_node_arguments_nodes.resize(1); - resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType()); + resolveAggregateFunctionNodeByName(*function_node, "countIf"); if (if_true_condition_value != 1) { @@ -144,7 +143,7 @@ public: function_node_arguments_nodes[0] = std::move(not_function); function_node_arguments_nodes.resize(1); - resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType()); + resolveAggregateFunctionNodeByName(*function_node, "countIf"); if (if_false_condition_value != 1) { @@ -156,15 +155,6 @@ public: } private: - static void resolveAsCountIfAggregateFunction(FunctionNode & function_node, const DataTypePtr & argument_type) - { - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get( - "countIf", NullsAction::EMPTY, {argument_type}, function_node.getAggregateFunction()->getParameters(), properties); - - function_node.resolveAsAggregateFunction(std::move(aggregate_function)); - } - QueryTreeNodePtr getMultiplyFunction(QueryTreeNodePtr left, QueryTreeNodePtr right) { auto multiply_function_node = std::make_shared("multiply"); diff --git a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp index 91186db0e0c..cb8ef35fc13 100644 --- a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp +++ b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp @@ -7,6 +7,9 @@ #include #include +#include + +#include namespace DB diff --git a/src/Analyzer/Passes/UniqToCountPass.cpp b/src/Analyzer/Passes/UniqToCountPass.cpp index b801865c9a5..3f73322d1fa 100644 --- a/src/Analyzer/Passes/UniqToCountPass.cpp +++ b/src/Analyzer/Passes/UniqToCountPass.cpp @@ -7,6 +7,9 @@ #include #include #include +#include + +#include namespace DB { @@ -184,11 +187,8 @@ public: /// Replace uniq of initial query to count if (match_subquery_with_distinct() || match_subquery_with_group_by()) { - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties); - function_node->getArguments().getNodes().clear(); - function_node->resolveAsAggregateFunction(std::move(aggregate_function)); + resolveAggregateFunctionNodeByName(*function_node, "count"); } } }; diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp index 6a5db4bc1de..a62b6e56ac5 100644 --- a/src/Analyzer/QueryTreeBuilder.cpp +++ b/src/Analyzer/QueryTreeBuilder.cpp @@ -40,6 +40,8 @@ #include #include +#include + #include #include @@ -940,6 +942,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildJoinTree(const ASTPtr & tables_in_select table_join.locality, result_join_strictness, result_join_kind); + join_node->setOriginalAST(table_element.table_join); /** Original AST is not set because it will contain only join part and does * not include left table expression. diff --git a/src/Analyzer/Resolve/IdentifierResolveScope.cpp b/src/Analyzer/Resolve/IdentifierResolveScope.cpp index ae363b57047..eb3e2179440 100644 --- a/src/Analyzer/Resolve/IdentifierResolveScope.cpp +++ b/src/Analyzer/Resolve/IdentifierResolveScope.cpp @@ -1,8 +1,9 @@ #include -#include #include #include +#include +#include namespace DB { diff --git a/src/Analyzer/Resolve/IdentifierResolver.cpp b/src/Analyzer/Resolve/IdentifierResolver.cpp index 692a31b66ba..9dd8aa1a05f 100644 --- a/src/Analyzer/Resolve/IdentifierResolver.cpp +++ b/src/Analyzer/Resolve/IdentifierResolver.cpp @@ -25,6 +25,8 @@ #include #include +#include + namespace DB { namespace ErrorCodes diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index 576c4943ccb..5f7b06231d9 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -64,6 +64,8 @@ #include #include +#include + namespace ProfileEvents { extern const Event ScalarSubqueriesGlobalCacheHit; @@ -1490,6 +1492,10 @@ void QueryAnalyzer::qualifyColumnNodesWithProjectionNames(const QueryTreeNodes & additional_column_qualification_parts = {table_expression_node->getAlias()}; else if (auto * table_node = table_expression_node->as()) additional_column_qualification_parts = {table_node->getStorageID().getDatabaseName(), table_node->getStorageID().getTableName()}; + else if (auto * query_node = table_expression_node->as(); query_node && query_node->isCTE()) + additional_column_qualification_parts = {query_node->getCTEName()}; + else if (auto * union_node = table_expression_node->as(); union_node && union_node->isCTE()) + additional_column_qualification_parts = {union_node->getCTEName()}; size_t additional_column_qualification_parts_size = additional_column_qualification_parts.size(); const auto & table_expression_data = scope.getTableExpressionDataOrThrow(table_expression_node); @@ -4455,9 +4461,8 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table { auto left_table_expression = extractLeftTableExpression(scope_query_node->getJoinTree()); if (table_expression_node.get() == left_table_expression.get() && - scope.joins_count == 1 && - scope.context->getSettingsRef().single_join_prefer_left_table) - table_expression_data.should_qualify_columns = false; + scope.joins_count == 1 && scope.context->getSettingsRef().single_join_prefer_left_table) + table_expression_data.should_qualify_columns = false; } scope.table_expression_node_to_data.emplace(table_expression_node, std::move(table_expression_data)); diff --git a/src/Analyzer/TableNode.cpp b/src/Analyzer/TableNode.cpp index 1c4d022542d..d5a4b06f652 100644 --- a/src/Analyzer/TableNode.cpp +++ b/src/Analyzer/TableNode.cpp @@ -10,6 +10,8 @@ #include +#include + namespace DB { diff --git a/src/Analyzer/Utils.cpp b/src/Analyzer/Utils.cpp index 3c3489681f6..d10bbd9bd23 100644 --- a/src/Analyzer/Utils.cpp +++ b/src/Analyzer/Utils.cpp @@ -1,5 +1,7 @@ #include +#include + #include #include #include @@ -636,16 +638,16 @@ private: bool has_function = false; }; -inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode * function_node) +inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode & function_node, const String & function_name) { Array parameters; - for (const auto & param : function_node->getParameters()) + for (const auto & param : function_node.getParameters()) { auto * constant = param->as(); parameters.push_back(constant->getValue()); } - const auto & function_node_argument_nodes = function_node->getArguments().getNodes(); + const auto & function_node_argument_nodes = function_node.getArguments().getNodes(); DataTypes argument_types; argument_types.reserve(function_node_argument_nodes.size()); @@ -655,7 +657,7 @@ inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode * function_nod AggregateFunctionProperties properties; auto action = NullsAction::EMPTY; - return AggregateFunctionFactory::instance().get(function_node->getFunctionName(), action, argument_types, parameters, properties); + return AggregateFunctionFactory::instance().get(function_name, action, argument_types, parameters, properties); } } @@ -736,11 +738,11 @@ void rerunFunctionResolve(FunctionNode * function_node, ContextPtr context) { if (name == "nothing" || name == "nothingUInt64" || name == "nothingNull") return; - function_node->resolveAsAggregateFunction(resolveAggregateFunction(function_node)); + function_node->resolveAsAggregateFunction(resolveAggregateFunction(*function_node, function_node->getFunctionName())); } else if (function_node->isWindowFunction()) { - function_node->resolveAsWindowFunction(resolveAggregateFunction(function_node)); + function_node->resolveAsWindowFunction(resolveAggregateFunction(*function_node, function_node->getFunctionName())); } } @@ -793,6 +795,18 @@ QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_ty return function_node; } +void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const String & function_name, const ContextPtr & context) +{ + auto function = FunctionFactory::instance().get(function_name, context); + function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); +} + +void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name) +{ + auto aggregate_function = resolveAggregateFunction(function_node, function_name); + function_node.resolveAsAggregateFunction(std::move(aggregate_function)); +} + /** Returns: * {_, false} - multiple sources * {nullptr, true} - no sources (for constants) diff --git a/src/Analyzer/Utils.h b/src/Analyzer/Utils.h index f64b724abeb..f2e2c500384 100644 --- a/src/Analyzer/Utils.h +++ b/src/Analyzer/Utils.h @@ -112,6 +112,14 @@ NameSet collectIdentifiersFullNames(const QueryTreeNodePtr & node); /// Wrap node into `_CAST` function QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_type, ContextPtr context); +/// Resolves function node as ordinary function with given name. +/// Arguments and parameters are taken from the node. +void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const String & function_name, const ContextPtr & context); + +/// Resolves function node as aggregate function with given name. +/// Arguments and parameters are taken from the node. +void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name); + /// Checks that node has only one source and returns it QueryTreeNodePtr getExpressionSource(const QueryTreeNodePtr & node); diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp index 64ab2112326..3ee7a05600e 100644 --- a/src/Backups/BackupEntriesCollector.cpp +++ b/src/Backups/BackupEntriesCollector.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index 44e7808babc..cee41861d70 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -29,48 +29,49 @@ namespace ErrorCodes } BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage( - const StorageAzureConfiguration & configuration_, + const AzureBlobStorage::ConnectionParams & connection_params_, + const String & blob_path_, bool allow_azure_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_) : BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderAzureBlobStorage")) - , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.getConnectionURL().toString(), false, false} - , configuration(configuration_) + , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, connection_params_.getConnectionURL(), false, false} + , connection_params(connection_params_) + , blob_path(blob_path_) { - auto client_ptr = configuration.createClient(/* is_readonly */false, /* attempt_to_create_container */true); - client_ptr->SetClickhouseOptions(Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=true}); + auto client_ptr = AzureBlobStorage::getContainerClient(connection_params, /*readonly=*/ false); + auto settings_ptr = AzureBlobStorage::getRequestSettingsForBackup(context_->getSettingsRef(), allow_azure_native_copy); - object_storage = std::make_unique("BackupReaderAzureBlobStorage", - std::move(client_ptr), - configuration.createSettings(context_), - configuration_.container, - configuration.getConnectionURL().toString()); + object_storage = std::make_unique( + "BackupReaderAzureBlobStorage", + std::move(client_ptr), + std::move(settings_ptr), + connection_params.getContainer(), + connection_params.getConnectionURL()); client = object_storage->getAzureBlobStorageClient(); - auto settings_copy = *object_storage->getSettings(); - settings_copy.use_native_copy = allow_azure_native_copy; - settings = std::make_unique(settings_copy); + settings = object_storage->getSettings(); } BackupReaderAzureBlobStorage::~BackupReaderAzureBlobStorage() = default; bool BackupReaderAzureBlobStorage::fileExists(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; return object_storage->exists(StoredObject(key)); } UInt64 BackupReaderAzureBlobStorage::getFileSize(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; ObjectMetadata object_metadata = object_storage->getObjectMetadata(key); return object_metadata.size_bytes; } std::unique_ptr BackupReaderAzureBlobStorage::readFile(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; return std::make_unique( client, key, read_settings, settings->max_single_read_retries, settings->max_single_download_retries); @@ -85,23 +86,23 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, && destination_data_source_description.is_encrypted == encrypted_in_backup) { LOG_TRACE(log, "Copying {} from AzureBlobStorage to disk {}", path_in_backup, destination_disk->getName()); - auto write_blob_function = [&](const Strings & blob_path, WriteMode mode, const std::optional &) -> size_t + auto write_blob_function = [&](const Strings & dst_blob_path, WriteMode mode, const std::optional &) -> size_t { /// Object storage always uses mode `Rewrite` because it simulates append using metadata and different files. - if (blob_path.size() != 2 || mode != WriteMode::Rewrite) + if (dst_blob_path.size() != 2 || mode != WriteMode::Rewrite) throw Exception(ErrorCodes::LOGICAL_ERROR, "Blob writing function called with unexpected blob_path.size={} or mode={}", - blob_path.size(), mode); + dst_blob_path.size(), mode); copyAzureBlobStorageFile( client, destination_disk->getObjectStorage()->getAzureBlobStorageClient(), - configuration.container, - fs::path(configuration.blob_path) / path_in_backup, + connection_params.getContainer(), + fs::path(blob_path) / path_in_backup, 0, file_size, - /* dest_container */ blob_path[1], - /* dest_path */ blob_path[0], + /* dest_container */ dst_blob_path[1], + /* dest_path */ dst_blob_path[0], settings, read_settings, threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupRDAzure")); @@ -119,28 +120,33 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage( - const StorageAzureConfiguration & configuration_, + const AzureBlobStorage::ConnectionParams & connection_params_, + const String & blob_path_, bool allow_azure_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_, bool attempt_to_create_container) : BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterAzureBlobStorage")) - , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.getConnectionURL().toString(), false, false} - , configuration(configuration_) + , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, connection_params_.getConnectionURL(), false, false} + , connection_params(connection_params_) + , blob_path(blob_path_) { - auto client_ptr = configuration.createClient(/* is_readonly */false, attempt_to_create_container); - client_ptr->SetClickhouseOptions(Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=true}); + if (!attempt_to_create_container) + connection_params.endpoint.container_already_exists = true; + + auto client_ptr = AzureBlobStorage::getContainerClient(connection_params, /*readonly=*/ false); + auto settings_ptr = AzureBlobStorage::getRequestSettingsForBackup(context_->getSettingsRef(), allow_azure_native_copy); + + object_storage = std::make_unique( + "BackupWriterAzureBlobStorage", + std::move(client_ptr), + std::move(settings_ptr), + connection_params.getContainer(), + connection_params.getConnectionURL()); - object_storage = std::make_unique("BackupWriterAzureBlobStorage", - std::move(client_ptr), - configuration.createSettings(context_), - configuration.container, - configuration_.getConnectionURL().toString()); client = object_storage->getAzureBlobStorageClient(); - auto settings_copy = *object_storage->getSettings(); - settings_copy.use_native_copy = allow_azure_native_copy; - settings = std::make_unique(settings_copy); + settings = object_storage->getSettings(); } void BackupWriterAzureBlobStorage::copyFileFromDisk( @@ -159,18 +165,18 @@ void BackupWriterAzureBlobStorage::copyFileFromDisk( { /// getBlobPath() can return more than 3 elements if the file is stored as multiple objects in AzureBlobStorage container. /// In this case we can't use the native copy. - if (auto blob_path = src_disk->getBlobPath(src_path); blob_path.size() == 2) + if (auto src_blob_path = src_disk->getBlobPath(src_path); src_blob_path.size() == 2) { LOG_TRACE(log, "Copying file {} from disk {} to AzureBlobStorag", src_path, src_disk->getName()); copyAzureBlobStorageFile( src_disk->getObjectStorage()->getAzureBlobStorageClient(), client, - /* src_container */ blob_path[1], - /* src_path */ blob_path[0], + /* src_container */ src_blob_path[1], + /* src_path */ src_blob_path[0], start_pos, length, - configuration.container, - fs::path(configuration.blob_path) / path_in_backup, + connection_params.getContainer(), + fs::path(blob_path) / path_in_backup, settings, read_settings, threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupWRAzure")); @@ -188,11 +194,11 @@ void BackupWriterAzureBlobStorage::copyFile(const String & destination, const St copyAzureBlobStorageFile( client, client, - configuration.container, - fs::path(configuration.blob_path)/ source, + connection_params.getContainer(), + fs::path(blob_path)/ source, 0, size, - /* dest_container */ configuration.container, + /* dest_container */ connection_params.getContainer(), /* dest_path */ destination, settings, read_settings, @@ -206,22 +212,28 @@ void BackupWriterAzureBlobStorage::copyDataToFile( UInt64 length) { copyDataToAzureBlobStorageFile( - create_read_buffer, start_pos, length, client, configuration.container, - fs::path(configuration.blob_path) / path_in_backup, settings, - threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupWRAzure")); + create_read_buffer, + start_pos, + length, + client, + connection_params.getContainer(), + fs::path(blob_path) / path_in_backup, + settings, + threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), + "BackupWRAzure")); } BackupWriterAzureBlobStorage::~BackupWriterAzureBlobStorage() = default; bool BackupWriterAzureBlobStorage::fileExists(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; return object_storage->exists(StoredObject(key)); } UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; RelativePathsWithMetadata children; object_storage->listObjects(key,children,/*max_keys*/0); if (children.empty()) @@ -231,7 +243,7 @@ UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name) std::unique_ptr BackupWriterAzureBlobStorage::readFile(const String & file_name, size_t /*expected_file_size*/) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; return std::make_unique( client, key, read_settings, settings->max_single_read_retries, settings->max_single_download_retries); @@ -239,7 +251,7 @@ std::unique_ptr BackupWriterAzureBlobStorage::readFile(const String std::unique_ptr BackupWriterAzureBlobStorage::writeFile(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; return std::make_unique( client, key, @@ -251,7 +263,7 @@ std::unique_ptr BackupWriterAzureBlobStorage::writeFile(const Strin void BackupWriterAzureBlobStorage::removeFile(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; StoredObject object(key); object_storage->removeObjectIfExists(object); } @@ -260,7 +272,7 @@ void BackupWriterAzureBlobStorage::removeFiles(const Strings & file_names) { StoredObjects objects; for (const auto & file_name : file_names) - objects.emplace_back(fs::path(configuration.blob_path) / file_name); + objects.emplace_back(fs::path(blob_path) / file_name); object_storage->removeObjectsIfExist(objects); @@ -270,7 +282,7 @@ void BackupWriterAzureBlobStorage::removeFilesBatch(const Strings & file_names) { StoredObjects objects; for (const auto & file_name : file_names) - objects.emplace_back(fs::path(configuration.blob_path) / file_name); + objects.emplace_back(fs::path(blob_path) / file_name); object_storage->removeObjectsIfExist(objects); } diff --git a/src/Backups/BackupIO_AzureBlobStorage.h b/src/Backups/BackupIO_AzureBlobStorage.h index 61688107839..c3b88f245ab 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.h +++ b/src/Backups/BackupIO_AzureBlobStorage.h @@ -1,12 +1,10 @@ #pragma once - #include "config.h" #if USE_AZURE_BLOB_STORAGE #include #include -#include -#include +#include namespace DB @@ -17,7 +15,8 @@ class BackupReaderAzureBlobStorage : public BackupReaderDefault { public: BackupReaderAzureBlobStorage( - const StorageAzureConfiguration & configuration_, + const AzureBlobStorage::ConnectionParams & connection_params_, + const String & blob_path_, bool allow_azure_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, @@ -40,16 +39,18 @@ public: private: const DataSourceDescription data_source_description; std::shared_ptr client; - StorageAzureConfiguration configuration; + AzureBlobStorage::ConnectionParams connection_params; + String blob_path; std::unique_ptr object_storage; - std::shared_ptr settings; + std::shared_ptr settings; }; class BackupWriterAzureBlobStorage : public BackupWriterDefault { public: BackupWriterAzureBlobStorage( - const StorageAzureConfiguration & configuration_, + const AzureBlobStorage::ConnectionParams & connection_params_, + const String & blob_path_, bool allow_azure_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, @@ -87,9 +88,10 @@ private: const DataSourceDescription data_source_description; std::shared_ptr client; - StorageAzureConfiguration configuration; + AzureBlobStorage::ConnectionParams connection_params; + String blob_path; std::unique_ptr object_storage; - std::shared_ptr settings; + std::shared_ptr settings; }; } diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 56544312c26..aef37021227 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -1,6 +1,7 @@ #include #if USE_AWS_S3 +#include #include #include #include diff --git a/src/Backups/BackupOperationInfo.h b/src/Backups/BackupOperationInfo.h index 21b5284458c..71589ec3b30 100644 --- a/src/Backups/BackupOperationInfo.h +++ b/src/Backups/BackupOperationInfo.h @@ -3,6 +3,8 @@ #include #include +#include + namespace DB { diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 69d9c52ebd9..15a7d7c1eca 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include diff --git a/src/Backups/DDLAdjustingForBackupVisitor.cpp b/src/Backups/DDLAdjustingForBackupVisitor.cpp index 7e5ce91629b..910831195a3 100644 --- a/src/Backups/DDLAdjustingForBackupVisitor.cpp +++ b/src/Backups/DDLAdjustingForBackupVisitor.cpp @@ -1,11 +1,11 @@ #include +#include +#include #include #include #include -#include -#include - #include +#include namespace DB diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index 454a0468e9f..3056f9fe421 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -23,8 +23,9 @@ #include #include #include -#include +#include +#include #include #include diff --git a/src/Backups/WithRetries.cpp b/src/Backups/WithRetries.cpp index 66851fa42ce..181e6331ac9 100644 --- a/src/Backups/WithRetries.cpp +++ b/src/Backups/WithRetries.cpp @@ -1,5 +1,7 @@ -#include #include +#include + +#include namespace DB { diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp index 03d156d1009..626df99b00c 100644 --- a/src/Backups/registerBackupEngineAzureBlobStorage.cpp +++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp @@ -5,6 +5,7 @@ #if USE_AZURE_BLOB_STORAGE #include +#include #include #include #include @@ -49,7 +50,9 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) const String & id_arg = params.backup_info.id_arg; const auto & args = params.backup_info.args; - StorageAzureConfiguration configuration; + String blob_path; + AzureBlobStorage::ConnectionParams connection_params; + auto request_settings = AzureBlobStorage::getRequestSettings(params.context->getSettingsRef()); if (!id_arg.empty()) { @@ -59,55 +62,42 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) if (!config.has(config_prefix)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", id_arg); - if (config.has(config_prefix + ".connection_string")) + connection_params = { - configuration.connection_url = config.getString(config_prefix + ".connection_string"); - configuration.is_connection_string = true; - configuration.container = config.getString(config_prefix + ".container"); - } - else - { - configuration.connection_url = config.getString(config_prefix + ".storage_account_url"); - configuration.is_connection_string = false; - configuration.container = config.getString(config_prefix + ".container"); - configuration.account_name = config.getString(config_prefix + ".account_name"); - configuration.account_key = config.getString(config_prefix + ".account_key"); - - if (config.has(config_prefix + ".account_name") && config.has(config_prefix + ".account_key")) - { - configuration.account_name = config.getString(config_prefix + ".account_name"); - configuration.account_key = config.getString(config_prefix + ".account_key"); - } - } + .endpoint = AzureBlobStorage::processEndpoint(config, config_prefix), + .auth_method = AzureBlobStorage::getAuthMethod(config, config_prefix), + .client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ true), + }; if (args.size() > 1) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Backup AzureBlobStorage requires 1 or 2 arguments: named_collection, [filename]"); if (args.size() == 1) - configuration.setPath(args[0].safeGet()); - + blob_path = args[0].safeGet(); } else { if (args.size() == 3) { - configuration.connection_url = args[0].safeGet(); - configuration.is_connection_string = !configuration.connection_url.starts_with("http"); + auto connection_url = args[0].safeGet(); + auto container_name = args[1].safeGet(); + blob_path = args[2].safeGet(); - configuration.container = args[1].safeGet(); - configuration.blob_path = args[2].safeGet(); + AzureBlobStorage::processURL(connection_url, container_name, connection_params.endpoint, connection_params.auth_method); + connection_params.client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ true); } else if (args.size() == 5) { - configuration.connection_url = args[0].safeGet(); - configuration.is_connection_string = false; + connection_params.endpoint.storage_account_url = args[0].safeGet(); + connection_params.endpoint.container_name = args[1].safeGet(); + blob_path = args[2].safeGet(); - configuration.container = args[1].safeGet(); - configuration.blob_path = args[2].safeGet(); - configuration.account_name = args[3].safeGet(); - configuration.account_key = args[4].safeGet(); + auto account_name = args[3].safeGet(); + auto account_key = args[4].safeGet(); + connection_params.auth_method = std::make_shared(account_name, account_key); + connection_params.client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ true); } else { @@ -117,16 +107,12 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) } BackupImpl::ArchiveParams archive_params; - if (hasRegisteredArchiveFileExtension(configuration.getPath())) + if (hasRegisteredArchiveFileExtension(blob_path)) { if (params.is_internal_backup) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Using archives with backups on clusters is disabled"); - auto path = configuration.getPath(); - auto filename = removeFileNameFromURL(path); - configuration.setPath(path); - - archive_params.archive_name = filename; + archive_params.archive_name = removeFileNameFromURL(blob_path); archive_params.compression_method = params.compression_method; archive_params.compression_level = params.compression_level; archive_params.password = params.password; @@ -141,7 +127,8 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) if (params.open_mode == IBackup::OpenMode::READ) { auto reader = std::make_shared( - configuration, + connection_params, + blob_path, params.allow_azure_native_copy, params.read_settings, params.write_settings, @@ -159,7 +146,8 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) else { auto writer = std::make_shared( - configuration, + connection_params, + blob_path, params.allow_azure_native_copy, params.read_settings, params.write_settings, diff --git a/src/Bridge/IBridge.cpp b/src/Bridge/IBridge.cpp index c25d7bd2fed..de48a4f2b84 100644 --- a/src/Bridge/IBridge.cpp +++ b/src/Bridge/IBridge.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/src/BridgeHelper/LibraryBridgeHelper.cpp b/src/BridgeHelper/LibraryBridgeHelper.cpp index 84bfe096e79..12ca5564e80 100644 --- a/src/BridgeHelper/LibraryBridgeHelper.cpp +++ b/src/BridgeHelper/LibraryBridgeHelper.cpp @@ -1,5 +1,7 @@ #include "LibraryBridgeHelper.h" +#include +#include #include namespace DB diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b18207e55ad..d985595154c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -607,6 +607,10 @@ if (TARGET ch_contrib::usearch) dbms_target_link_libraries(PUBLIC ch_contrib::usearch) endif() +if (TARGET ch_contrib::prometheus_protobufs) + dbms_target_link_libraries (PUBLIC ch_contrib::prometheus_protobufs) +endif() + if (TARGET ch_rust::skim) dbms_target_include_directories(PRIVATE $) dbms_target_link_libraries(PUBLIC ch_rust::skim) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 5ed4cd7f7ce..42b5a1332d5 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1206,11 +1206,8 @@ void ClientBase::receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, b if (local_format_error) std::rethrow_exception(local_format_error); - if (cancelled && is_interactive) - { + if (cancelled && is_interactive && !cancelled_printed.exchange(true)) output_stream << "Query was cancelled." << std::endl; - cancelled_printed = true; - } } @@ -1326,7 +1323,7 @@ void ClientBase::onEndOfStream() if (is_interactive) { - if (cancelled && !cancelled_printed) + if (cancelled && !cancelled_printed.exchange(true)) output_stream << "Query was cancelled." << std::endl; else if (!written_first_block) output_stream << "Ok." << std::endl; diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 756400137ad..d2b10d7ed2e 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -6,13 +6,14 @@ #include #include #include +#include #include #include #include +#include #include #include #include -#include #include #include #include @@ -338,8 +339,8 @@ protected: bool allow_repeated_settings = false; bool allow_merge_tree_settings = false; - bool cancelled = false; - bool cancelled_printed = false; + std::atomic_bool cancelled = false; + std::atomic_bool cancelled_printed = false; /// Unpacked descriptors and streams for the ease of use. int in_fd = STDIN_FILENO; diff --git a/src/Client/ConnectionEstablisher.cpp b/src/Client/ConnectionEstablisher.cpp index 303105751ad..4bab643ed31 100644 --- a/src/Client/ConnectionEstablisher.cpp +++ b/src/Client/ConnectionEstablisher.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace ProfileEvents { @@ -8,6 +9,7 @@ namespace ProfileEvents extern const Event DistributedConnectionUsable; extern const Event DistributedConnectionMissingTable; extern const Event DistributedConnectionStaleReplica; + extern const Event DistributedConnectionFailTry; } namespace DB @@ -97,6 +99,8 @@ void ConnectionEstablisher::run(ConnectionEstablisher::TryResult & result, std:: } catch (const Exception & e) { + ProfileEvents::increment(ProfileEvents::DistributedConnectionFailTry); + if (e.code() != ErrorCodes::NETWORK_ERROR && e.code() != ErrorCodes::SOCKET_TIMEOUT && e.code() != ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF && e.code() != ErrorCodes::DNS_ERROR) throw; diff --git a/src/Client/ConnectionPool.cpp b/src/Client/ConnectionPool.cpp index 5cabb1465d1..ed2e7c3c725 100644 --- a/src/Client/ConnectionPool.cpp +++ b/src/Client/ConnectionPool.cpp @@ -1,4 +1,5 @@ #include +#include #include @@ -85,4 +86,15 @@ ConnectionPoolFactory & ConnectionPoolFactory::instance() return ret; } +IConnectionPool::Entry ConnectionPool::get(const DB::ConnectionTimeouts& timeouts, const DB::Settings& settings, + bool force_connected) +{ + Entry entry = Base::get(settings.connection_pool_max_wait_ms.totalMilliseconds()); + + if (force_connected) + entry->forceConnected(timeouts); + + return entry; +} + } diff --git a/src/Client/ConnectionPool.h b/src/Client/ConnectionPool.h index 725a5e91ac0..d83ecdd75f9 100644 --- a/src/Client/ConnectionPool.h +++ b/src/Client/ConnectionPool.h @@ -4,12 +4,13 @@ #include #include #include -#include #include namespace DB { +struct Settings; + /** Interface for connection pools. * * Usage (using the usual `ConnectionPool` example) @@ -102,15 +103,7 @@ public: Entry get(const ConnectionTimeouts & timeouts, /// NOLINT const Settings & settings, - bool force_connected = true) override - { - Entry entry = Base::get(settings.connection_pool_max_wait_ms.totalMilliseconds()); - - if (force_connected) - entry->forceConnected(timeouts); - - return entry; - } + bool force_connected = true) override; std::string getDescription() const { diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp index 8c993f906e0..dd8348ea04f 100644 --- a/src/Client/HedgedConnections.cpp +++ b/src/Client/HedgedConnections.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -187,15 +188,22 @@ void HedgedConnections::sendQuery( modified_settings.group_by_two_level_threshold_bytes = 0; } - const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas == 0; + const bool enable_offset_parallel_processing = context->canUseOffsetParallelReplicas(); - if (offset_states.size() > 1 && enable_sample_offset_parallel_processing) + if (offset_states.size() > 1 && enable_offset_parallel_processing) { modified_settings.parallel_replicas_count = offset_states.size(); modified_settings.parallel_replica_offset = fd_to_replica_location[replica.packet_receiver->getFileDescriptor()].offset; } - replica.connection->sendQuery(timeouts, query, /* query_parameters */ {}, query_id, stage, &modified_settings, &client_info, with_pending_data, {}); + /// FIXME: Remove once we will make `allow_experimental_analyzer` obsolete setting. + /// Make the analyzer being set, so it will be effectively applied on the remote server. + /// In other words, the initiator always controls whether the analyzer enabled or not for + /// all servers involved in the distributed query processing. + modified_settings.set("allow_experimental_analyzer", static_cast(modified_settings.allow_experimental_analyzer)); + + replica.connection->sendQuery( + timeouts, query, /* query_parameters */ {}, query_id, stage, &modified_settings, &client_info, with_pending_data, {}); replica.change_replica_timeout.setRelative(timeouts.receive_data_timeout); replica.packet_receiver->setTimeout(hedged_connections_factory.getConnectionTimeouts().receive_timeout); }; diff --git a/src/Client/HedgedConnectionsFactory.cpp b/src/Client/HedgedConnectionsFactory.cpp index 0fa2bc12924..be7397b0fad 100644 --- a/src/Client/HedgedConnectionsFactory.cpp +++ b/src/Client/HedgedConnectionsFactory.cpp @@ -7,7 +7,6 @@ namespace ProfileEvents { extern const Event HedgedRequestsChangeReplica; - extern const Event DistributedConnectionFailTry; extern const Event DistributedConnectionFailAtAll; } @@ -327,7 +326,6 @@ HedgedConnectionsFactory::State HedgedConnectionsFactory::processFinishedConnect { ShuffledPool & shuffled_pool = shuffled_pools[index]; LOG_INFO(log, "Connection failed at try №{}, reason: {}", (shuffled_pool.error_count + 1), fail_message); - ProfileEvents::increment(ProfileEvents::DistributedConnectionFailTry); shuffled_pool.error_count = std::min(pool->getMaxErrorCup(), shuffled_pool.error_count + 1); shuffled_pool.slowdown_count = 0; diff --git a/src/Client/HedgedConnectionsFactory.h b/src/Client/HedgedConnectionsFactory.h index 51da85ea178..63e39819f58 100644 --- a/src/Client/HedgedConnectionsFactory.h +++ b/src/Client/HedgedConnectionsFactory.h @@ -8,13 +8,14 @@ #include #include #include -#include #include #include namespace DB { +struct Settings; + /** Class for establishing hedged connections with replicas. * The process of establishing connection is divided on stages, on each stage if * replica doesn't respond for a long time, we start establishing connection with diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp index 5d0fc8fd39e..207bf6c9e07 100644 --- a/src/Client/MultiplexedConnections.cpp +++ b/src/Client/MultiplexedConnections.cpp @@ -2,6 +2,8 @@ #include #include +#include +#include #include #include #include @@ -23,8 +25,8 @@ namespace ErrorCodes } -MultiplexedConnections::MultiplexedConnections(Connection & connection, const Settings & settings_, const ThrottlerPtr & throttler) - : settings(settings_) +MultiplexedConnections::MultiplexedConnections(Connection & connection, ContextPtr context_, const ThrottlerPtr & throttler) + : context(std::move(context_)), settings(context->getSettingsRef()) { connection.setThrottler(throttler); @@ -36,9 +38,9 @@ MultiplexedConnections::MultiplexedConnections(Connection & connection, const Se } -MultiplexedConnections::MultiplexedConnections(std::shared_ptr connection_ptr_, const Settings & settings_, const ThrottlerPtr & throttler) - : settings(settings_) - , connection_ptr(connection_ptr_) +MultiplexedConnections::MultiplexedConnections( + std::shared_ptr connection_ptr_, ContextPtr context_, const ThrottlerPtr & throttler) + : context(std::move(context_)), settings(context->getSettingsRef()), connection_ptr(connection_ptr_) { connection_ptr->setThrottler(throttler); @@ -50,9 +52,8 @@ MultiplexedConnections::MultiplexedConnections(std::shared_ptr conne } MultiplexedConnections::MultiplexedConnections( - std::vector && connections, - const Settings & settings_, const ThrottlerPtr & throttler) - : settings(settings_) + std::vector && connections, ContextPtr context_, const ThrottlerPtr & throttler) + : context(std::move(context_)), settings(context->getSettingsRef()) { /// If we didn't get any connections from pool and getMany() did not throw exceptions, this means that /// `skip_unavailable_shards` was set. Then just return. @@ -150,18 +151,24 @@ void MultiplexedConnections::sendQuery( } } - const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas == 0; + /// FIXME: Remove once we will make `allow_experimental_analyzer` obsolete setting. + /// Make the analyzer being set, so it will be effectively applied on the remote server. + /// In other words, the initiator always controls whether the analyzer enabled or not for + /// all servers involved in the distributed query processing. + modified_settings.set("allow_experimental_analyzer", static_cast(modified_settings.allow_experimental_analyzer)); + + const bool enable_offset_parallel_processing = context->canUseOffsetParallelReplicas(); size_t num_replicas = replica_states.size(); if (num_replicas > 1) { - if (enable_sample_offset_parallel_processing) + if (enable_offset_parallel_processing) /// Use multiple replicas for parallel query processing. modified_settings.parallel_replicas_count = num_replicas; for (size_t i = 0; i < num_replicas; ++i) { - if (enable_sample_offset_parallel_processing) + if (enable_offset_parallel_processing) modified_settings.parallel_replica_offset = i; replica_states[i].connection->sendQuery( diff --git a/src/Client/MultiplexedConnections.h b/src/Client/MultiplexedConnections.h index 9f7b47e0562..dec32e52d4f 100644 --- a/src/Client/MultiplexedConnections.h +++ b/src/Client/MultiplexedConnections.h @@ -10,7 +10,6 @@ namespace DB { - /** To retrieve data directly from multiple replicas (connections) from one shard * within a single thread. As a degenerate case, it can also work with one connection. * It is assumed that all functions except sendCancel are always executed in one thread. @@ -21,14 +20,12 @@ class MultiplexedConnections final : public IConnections { public: /// Accepts ready connection. - MultiplexedConnections(Connection & connection, const Settings & settings_, const ThrottlerPtr & throttler_); + MultiplexedConnections(Connection & connection, ContextPtr context_, const ThrottlerPtr & throttler_); /// Accepts ready connection and keep it alive before drain - MultiplexedConnections(std::shared_ptr connection_, const Settings & settings_, const ThrottlerPtr & throttler_); + MultiplexedConnections(std::shared_ptr connection_, ContextPtr context_, const ThrottlerPtr & throttler_); /// Accepts a vector of connections to replicas of one shard already taken from pool. - MultiplexedConnections( - std::vector && connections, - const Settings & settings_, const ThrottlerPtr & throttler_); + MultiplexedConnections(std::vector && connections, ContextPtr context_, const ThrottlerPtr & throttler_); void sendScalarsData(Scalars & data) override; void sendExternalTablesData(std::vector & data) override; @@ -86,6 +83,7 @@ private: /// Mark the replica as invalid. void invalidateReplica(ReplicaState & replica_state); + ContextPtr context; const Settings & settings; /// The current number of valid connections to the replicas of this shard. diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index f7e6b1a1ccc..cfd07c27765 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -267,7 +267,11 @@ bool ColumnAggregateFunction::structureEquals(const IColumn & to) const } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnAggregateFunction::insertRangeFrom(const IColumn & from, size_t start, size_t length) +#else +void ColumnAggregateFunction::doInsertRangeFrom(const IColumn & from, size_t start, size_t length) +#endif { const ColumnAggregateFunction & from_concrete = assert_cast(from); @@ -462,7 +466,11 @@ void ColumnAggregateFunction::insertFromWithOwnership(const IColumn & from, size insertMergeFrom(from, n); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnAggregateFunction::insertFrom(const IColumn & from, size_t n) +#else +void ColumnAggregateFunction::doInsertFrom(const IColumn & from, size_t n) +#endif { insertRangeFrom(from, n, 1); } diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index a75b27e835c..1be7a862438 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -145,7 +145,14 @@ public: void insertData(const char * pos, size_t length) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & from, size_t n) override; +#else + using IColumn::insertFrom; + + void doInsertFrom(const IColumn & from, size_t n) override; +#endif + void insertFrom(ConstAggregateDataPtr place); @@ -182,7 +189,11 @@ public: void protect() override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & from, size_t start, size_t length) override; +#else + void doInsertRangeFrom(const IColumn & from, size_t start, size_t length) override; +#endif void popBack(size_t n) override; @@ -201,7 +212,11 @@ public: MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t, size_t, const IColumn &, int) const override +#else + int doCompareAt(size_t, size_t, const IColumn &, int) const override +#endif { return 0; } diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 0b7e6541560..5d7350f3a79 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -337,7 +337,11 @@ bool ColumnArray::tryInsert(const Field & x) return true; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnArray::insertFrom(const IColumn & src_, size_t n) +#else +void ColumnArray::doInsertFrom(const IColumn & src_, size_t n) +#endif { const ColumnArray & src = assert_cast(src_); size_t size = src.sizeAt(n); @@ -392,7 +396,11 @@ int ColumnArray::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan : 1); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const +#else +int ColumnArray::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const +#endif { return compareAtImpl(n, m, rhs_, nan_direction_hint); } @@ -535,7 +543,11 @@ void ColumnArray::getExtremes(Field & min, Field & max) const } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnArray::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { if (length == 0) return; diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index 53eb5166df8..6cd3e2f6c3b 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -84,10 +84,18 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif void insert(const Field & x) override; bool tryInsert(const Field & x) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src_, size_t n) override; +#else + void doInsertFrom(const IColumn & src_, size_t n) override; +#endif void insertDefault() override; void popBack(size_t n) override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; @@ -95,7 +103,11 @@ public: ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; template ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; +#endif int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator & collator) const override; void getPermutation(PermutationSortDirection direction, PermutationSortStability stability, size_t limit, int nan_direction_hint, Permutation & res) const override; diff --git a/src/Columns/ColumnCompressed.h b/src/Columns/ColumnCompressed.h index 934adf07cf4..5e455709fec 100644 --- a/src/Columns/ColumnCompressed.h +++ b/src/Columns/ColumnCompressed.h @@ -85,7 +85,11 @@ public: bool isDefaultAt(size_t) const override { throwMustBeDecompressed(); } void insert(const Field &) override { throwMustBeDecompressed(); } bool tryInsert(const Field &) override { throwMustBeDecompressed(); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); } +#else + void doInsertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); } +#endif void insertData(const char *, size_t) override { throwMustBeDecompressed(); } void insertDefault() override { throwMustBeDecompressed(); } void popBack(size_t) override { throwMustBeDecompressed(); } @@ -100,7 +104,11 @@ public: void expand(const Filter &, bool) override { throwMustBeDecompressed(); } ColumnPtr permute(const Permutation &, size_t) const override { throwMustBeDecompressed(); } ColumnPtr index(const IColumn &, size_t) const override { throwMustBeDecompressed(); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); } +#else + int doCompareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); } +#endif void compareColumn(const IColumn &, size_t, PaddedPODArray *, PaddedPODArray &, int, int) const override { throwMustBeDecompressed(); diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index c2c0fa3027c..b55a1f42037 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -32,6 +32,8 @@ private: ColumnConst(const ColumnConst & src) = default; public: + bool isConst() const override { return true; } + ColumnPtr convertToFullColumn() const; ColumnPtr convertToFullColumnIfConst() const override @@ -121,7 +123,11 @@ public: return data->isNullAt(0); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn &, size_t /*start*/, size_t length) override +#else + void doInsertRangeFrom(const IColumn &, size_t /*start*/, size_t length) override +#endif { s += length; } @@ -145,12 +151,20 @@ public: ++s; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn &, size_t) override +#else + void doInsertFrom(const IColumn &, size_t) override +#endif { ++s; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; } +#else + void doInsertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; } +#endif void insertDefault() override { @@ -223,7 +237,11 @@ public: return data->allocatedBytes() + sizeof(s); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t, size_t, const IColumn & rhs, int nan_direction_hint) const override +#else + int doCompareAt(size_t, size_t, const IColumn & rhs, int nan_direction_hint) const override +#endif { return data->compareAt(0, 0, *assert_cast(rhs).data, nan_direction_hint); } diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp index eb9784c14dd..cf413f790a7 100644 --- a/src/Columns/ColumnDecimal.cpp +++ b/src/Columns/ColumnDecimal.cpp @@ -32,7 +32,11 @@ namespace ErrorCodes } template +#if !defined(ABORT_ON_LOGICAL_ERROR) int ColumnDecimal::compareAt(size_t n, size_t m, const IColumn & rhs_, int) const +#else +int ColumnDecimal::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int) const +#endif { auto & other = static_cast(rhs_); const T & a = data[n]; @@ -331,7 +335,11 @@ void ColumnDecimal::insertData(const char * src, size_t /*length*/) } template +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnDecimal::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnDecimal::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { const ColumnDecimal & src_vec = assert_cast(src); diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index c4510ba2922..32efeb643a6 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -55,9 +55,17 @@ public: void reserve(size_t n) override { data.reserve_exact(n); } void shrinkToFit() override { data.shrink_to_fit(); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast(src).getData()[n]); } +#else + void doInsertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast(src).getData()[n]); } +#endif +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertManyFrom(const IColumn & src, size_t position, size_t length) override +#else + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override +#endif { ValueType v = assert_cast(src).getData()[position]; data.resize_fill(data.size() + length, v); @@ -68,7 +76,11 @@ public: void insertManyDefaults(size_t length) override { data.resize_fill(data.size() + length); } void insert(const Field & x) override { data.push_back(x.get()); } bool tryInsert(const Field & x) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif void popBack(size_t n) override { @@ -92,7 +104,11 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; +#endif void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override; void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index 3c147b6f123..c735238f515 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -4,7 +4,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -213,7 +215,11 @@ bool ColumnDynamic::tryInsert(const DB::Field & x) } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnDynamic::insertFrom(const DB::IColumn & src_, size_t n) +#else +void ColumnDynamic::doInsertFrom(const DB::IColumn & src_, size_t n) +#endif { const auto & dynamic_src = assert_cast(src_); @@ -263,7 +269,11 @@ void ColumnDynamic::insertFrom(const DB::IColumn & src_, size_t n) variant_col.insertIntoVariantFrom(string_variant_discr, *tmp_string_column, 0); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnDynamic::insertRangeFrom(const DB::IColumn & src_, size_t start, size_t length) +#else +void ColumnDynamic::doInsertRangeFrom(const DB::IColumn & src_, size_t start, size_t length) +#endif { if (start + length > src_.size()) throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "Parameter out of bound in ColumnDynamic::insertRangeFrom method. " @@ -429,7 +439,11 @@ void ColumnDynamic::insertRangeFrom(const DB::IColumn & src_, size_t start, size } } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnDynamic::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length) +#else +void ColumnDynamic::doInsertManyFrom(const DB::IColumn & src_, size_t position, size_t length) +#endif { const auto & dynamic_src = assert_cast(src_); @@ -481,7 +495,7 @@ StringRef ColumnDynamic::serializeValueIntoArena(size_t n, DB::Arena & arena, co /// We cannot use Variant serialization here as it serializes discriminator + value, /// but Dynamic doesn't have fixed mapping discriminator <-> variant type /// as different Dynamic column can have different Variants. - /// Instead, we serialize null bit + variant type name (size + bytes) + value. + /// Instead, we serialize null bit + variant type in binary format (size + bytes) + value. const auto & variant_col = assert_cast(*variant_column); auto discr = variant_col.globalDiscriminatorAt(n); StringRef res; @@ -495,14 +509,15 @@ StringRef ColumnDynamic::serializeValueIntoArena(size_t n, DB::Arena & arena, co return res; } - const auto & variant_name = variant_info.variant_names[discr]; - size_t variant_name_size = variant_name.size(); - char * pos = arena.allocContinue(sizeof(UInt8) + sizeof(size_t) + variant_name.size(), begin); + const auto & variant_type = assert_cast(*variant_info.variant_type).getVariant(discr); + String variant_type_binary_data = encodeDataType(variant_type); + size_t variant_type_binary_data_size = variant_type_binary_data.size(); + char * pos = arena.allocContinue(sizeof(UInt8) + sizeof(size_t) + variant_type_binary_data.size(), begin); memcpy(pos, &null_bit, sizeof(UInt8)); - memcpy(pos + sizeof(UInt8), &variant_name_size, sizeof(size_t)); - memcpy(pos + sizeof(UInt8) + sizeof(size_t), variant_name.data(), variant_name.size()); + memcpy(pos + sizeof(UInt8), &variant_type_binary_data_size, sizeof(size_t)); + memcpy(pos + sizeof(UInt8) + sizeof(size_t), variant_type_binary_data.data(), variant_type_binary_data.size()); res.data = pos; - res.size = sizeof(UInt8) + sizeof(size_t) + variant_name.size(); + res.size = sizeof(UInt8) + sizeof(size_t) + variant_type_binary_data.size(); auto value_ref = variant_col.getVariantByGlobalDiscriminator(discr).serializeValueIntoArena(variant_col.offsetAt(n), arena, begin); res.data = value_ref.data - res.size; @@ -521,13 +536,15 @@ const char * ColumnDynamic::deserializeAndInsertFromArena(const char * pos) return pos; } - /// Read variant type name. - const size_t variant_name_size = unalignedLoad(pos); - pos += sizeof(variant_name_size); - String variant_name; - variant_name.resize(variant_name_size); - memcpy(variant_name.data(), pos, variant_name_size); - pos += variant_name_size; + /// Read variant type in binary format. + const size_t variant_type_binary_data_size = unalignedLoad(pos); + pos += sizeof(variant_type_binary_data_size); + String variant_type_binary_data; + variant_type_binary_data.resize(variant_type_binary_data_size); + memcpy(variant_type_binary_data.data(), pos, variant_type_binary_data_size); + pos += variant_type_binary_data_size; + auto variant_type = decodeDataType(variant_type_binary_data); + auto variant_name = variant_type->getName(); /// If we already have such variant, just deserialize it into corresponding variant column. auto it = variant_info.variant_name_to_discriminator.find(variant_name); if (it != variant_info.variant_name_to_discriminator.end()) @@ -537,7 +554,6 @@ const char * ColumnDynamic::deserializeAndInsertFromArena(const char * pos) } /// If we don't have such variant, add it. - auto variant_type = DataTypeFactory::instance().get(variant_name); if (likely(addNewVariant(variant_type))) { auto discr = variant_info.variant_name_to_discriminator[variant_name]; @@ -563,13 +579,13 @@ const char * ColumnDynamic::skipSerializedInArena(const char * pos) const if (null_bit) return pos; - const size_t variant_name_size = unalignedLoad(pos); - pos += sizeof(variant_name_size); - String variant_name; - variant_name.resize(variant_name_size); - memcpy(variant_name.data(), pos, variant_name_size); - pos += variant_name_size; - auto tmp_variant_column = DataTypeFactory::instance().get(variant_name)->createColumn(); + const size_t variant_type_binary_data_size = unalignedLoad(pos); + pos += sizeof(variant_type_binary_data_size); + String variant_type_binary_data; + variant_type_binary_data.resize(variant_type_binary_data_size); + memcpy(variant_type_binary_data.data(), pos, variant_type_binary_data_size); + pos += variant_type_binary_data_size; + auto tmp_variant_column = decodeDataType(variant_type_binary_data)->createColumn(); return tmp_variant_column->skipSerializedInArena(pos); } @@ -587,7 +603,11 @@ void ColumnDynamic::updateHashWithValue(size_t n, SipHash & hash) const variant_col.getVariantByGlobalDiscriminator(discr).updateHashWithValue(variant_col.offsetAt(n), hash); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int ColumnDynamic::compareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const +#else +int ColumnDynamic::doCompareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const +#endif { const auto & left_variant = assert_cast(*variant_column); const auto & right_dynamic = assert_cast(rhs); diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h index 27ad0dd583f..9abddc7a26d 100644 --- a/src/Columns/ColumnDynamic.h +++ b/src/Columns/ColumnDynamic.h @@ -142,9 +142,16 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; + +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src_, size_t n) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override; +#else + void doInsertFrom(const IColumn & src_, size_t n) override; + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; +#endif void insertDefault() override { @@ -213,7 +220,11 @@ public: return scattered_columns; } +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#endif bool hasEqualValues() const override { diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index d7e4eff2727..1c2de203a94 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -74,7 +74,11 @@ bool ColumnFixedString::tryInsert(const Field & x) return true; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnFixedString::insertFrom(const IColumn & src_, size_t index) +#else +void ColumnFixedString::doInsertFrom(const IColumn & src_, size_t index) +#endif { const ColumnFixedString & src = assert_cast(src_); @@ -86,7 +90,11 @@ void ColumnFixedString::insertFrom(const IColumn & src_, size_t index) memcpySmallAllowReadWriteOverflow15(chars.data() + old_size, &src.chars[n * index], n); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnFixedString::insertManyFrom(const IColumn & src, size_t position, size_t length) +#else +void ColumnFixedString::doInsertManyFrom(const IColumn & src, size_t position, size_t length) +#endif { const ColumnFixedString & src_concrete = assert_cast(src); if (n != src_concrete.getN()) @@ -219,7 +227,11 @@ size_t ColumnFixedString::estimateCardinalityInPermutedRange(const Permutation & return elements.size(); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnFixedString::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnFixedString::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { const ColumnFixedString & src_concrete = assert_cast(src); chassert(this->n == src_concrete.n); diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h index 7b46dc11cd6..6e88136fc50 100644 --- a/src/Columns/ColumnFixedString.h +++ b/src/Columns/ColumnFixedString.h @@ -98,9 +98,17 @@ public: bool tryInsert(const Field & x) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src_, size_t index) override; +#else + void doInsertFrom(const IColumn & src_, size_t index) override; +#endif +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertManyFrom(const IColumn & src, size_t position, size_t length) override; +#else + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; +#endif void insertData(const char * pos, size_t length) override; @@ -129,7 +137,11 @@ public: void updateHashFast(SipHash & hash) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override +#else + int doCompareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override +#endif { const ColumnFixedString & rhs = assert_cast(rhs_); chassert(this->n == rhs.n); @@ -144,7 +156,11 @@ public: size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override; diff --git a/src/Columns/ColumnFunction.cpp b/src/Columns/ColumnFunction.cpp index 0ab9d15ad50..fa57f35a823 100644 --- a/src/Columns/ColumnFunction.cpp +++ b/src/Columns/ColumnFunction.cpp @@ -72,7 +72,11 @@ ColumnPtr ColumnFunction::cut(size_t start, size_t length) const return ColumnFunction::create(length, function, capture, is_short_circuit_argument, is_function_compiled); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnFunction::insertFrom(const IColumn & src, size_t n) +#else +void ColumnFunction::doInsertFrom(const IColumn & src, size_t n) +#endif { const ColumnFunction & src_func = assert_cast(src); @@ -89,7 +93,11 @@ void ColumnFunction::insertFrom(const IColumn & src, size_t n) ++elements_size; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnFunction::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnFunction::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { const ColumnFunction & src_func = assert_cast(src); diff --git a/src/Columns/ColumnFunction.h b/src/Columns/ColumnFunction.h index 6fdc6679d3e..ba924c49a82 100644 --- a/src/Columns/ColumnFunction.h +++ b/src/Columns/ColumnFunction.h @@ -94,8 +94,16 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot insert into {}", getName()); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src, size_t n) override; +#else + void doInsertFrom(const IColumn & src, size_t n) override; +#endif +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn &, size_t start, size_t length) override; +#else + void doInsertRangeFrom(const IColumn &, size_t start, size_t length) override; +#endif void insertData(const char *, size_t) override { @@ -137,7 +145,11 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "popBack is not implemented for {}", getName()); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t, size_t, const IColumn &, int) const override +#else + int doCompareAt(size_t, size_t, const IColumn &, int) const override +#endif { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "compareAt is not implemented for {}", getName()); } diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index 208326fe629..eb694a10b0f 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -159,7 +159,11 @@ void ColumnLowCardinality::insertDefault() idx.insertPosition(getDictionary().getDefaultValueIndex()); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnLowCardinality::insertFrom(const IColumn & src, size_t n) +#else +void ColumnLowCardinality::doInsertFrom(const IColumn & src, size_t n) +#endif { const auto * low_cardinality_src = typeid_cast(&src); @@ -187,7 +191,11 @@ void ColumnLowCardinality::insertFromFullColumn(const IColumn & src, size_t n) idx.insertPosition(getDictionary().uniqueInsertFrom(src, n)); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnLowCardinality::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { const auto * low_cardinality_src = typeid_cast(&src); @@ -364,7 +372,11 @@ int ColumnLowCardinality::compareAtImpl(size_t n, size_t m, const IColumn & rhs, return getDictionary().compareAt(n_index, m_index, low_cardinality_column.getDictionary(), nan_direction_hint); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#else +int ColumnLowCardinality::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#endif { return compareAtImpl(n, m, rhs, nan_direction_hint); } diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index ac3b725b22f..e99be07cd8d 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -78,10 +78,18 @@ public: bool tryInsert(const Field & x) override; void insertDefault() override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src, size_t n) override; +#else + void doInsertFrom(const IColumn & src, size_t n) override; +#endif void insertFromFullColumn(const IColumn & src, size_t n); +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif void insertRangeFromFullColumn(const IColumn & src, size_t start, size_t length); void insertRangeFromDictionaryEncodedColumn(const IColumn & keys, const IColumn & positions); @@ -127,7 +135,11 @@ public: return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().index(indexes_, limit)); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#endif int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator &) const override; diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp index eecea1a273f..2dffddb2dc9 100644 --- a/src/Columns/ColumnMap.cpp +++ b/src/Columns/ColumnMap.cpp @@ -153,17 +153,29 @@ void ColumnMap::updateHashFast(SipHash & hash) const nested->updateHashFast(hash); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnMap::insertFrom(const IColumn & src, size_t n) +#else +void ColumnMap::doInsertFrom(const IColumn & src, size_t n) +#endif { nested->insertFrom(assert_cast(src).getNestedColumn(), n); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnMap::insertManyFrom(const IColumn & src, size_t position, size_t length) +#else +void ColumnMap::doInsertManyFrom(const IColumn & src, size_t position, size_t length) +#endif { assert_cast(*nested).insertManyFrom(assert_cast(src).getNestedColumn(), position, length); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnMap::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnMap::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { nested->insertRangeFrom( assert_cast(src).getNestedColumn(), @@ -210,7 +222,11 @@ MutableColumns ColumnMap::scatter(ColumnIndex num_columns, const Selector & sele return res; } +#if !defined(ABORT_ON_LOGICAL_ERROR) int ColumnMap::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#else +int ColumnMap::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#endif { const auto & rhs_map = assert_cast(rhs); return nested->compareAt(n, m, rhs_map.getNestedColumn(), nan_direction_hint); diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h index 52165d0d74e..a54071a2974 100644 --- a/src/Columns/ColumnMap.h +++ b/src/Columns/ColumnMap.h @@ -66,16 +66,28 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; + +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src_, size_t n) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertFrom(const IColumn & src_, size_t n) override; + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif + ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; void expand(const Filter & mask, bool inverted) override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr replicate(const Offsets & offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#endif void getExtremes(Field & min, Field & max) const override; void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override; diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 1d12a59fd59..f060e74b315 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -221,7 +221,11 @@ const char * ColumnNullable::skipSerializedInArena(const char * pos) const return pos; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnNullable::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnNullable::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { const ColumnNullable & nullable_col = assert_cast(src); getNullMapColumn().insertRangeFrom(*nullable_col.null_map, start, length); @@ -258,7 +262,11 @@ bool ColumnNullable::tryInsert(const Field & x) return true; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnNullable::insertFrom(const IColumn & src, size_t n) +#else +void ColumnNullable::doInsertFrom(const IColumn & src, size_t n) +#endif { const ColumnNullable & src_concrete = assert_cast(src); getNestedColumn().insertFrom(src_concrete.getNestedColumn(), n); @@ -266,7 +274,11 @@ void ColumnNullable::insertFrom(const IColumn & src, size_t n) } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnNullable::insertManyFrom(const IColumn & src, size_t position, size_t length) +#else +void ColumnNullable::doInsertManyFrom(const IColumn & src, size_t position, size_t length) +#endif { const ColumnNullable & src_concrete = assert_cast(src); getNestedColumn().insertManyFrom(src_concrete.getNestedColumn(), position, length); @@ -402,7 +414,11 @@ int ColumnNullable::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const +#else +int ColumnNullable::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const +#endif { return compareAtImpl(n, m, rhs_, null_direction_hint); } diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index 510a4cacf1e..a6d0483e527 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -69,11 +69,21 @@ public: char * serializeValueIntoMemory(size_t n, char * memory) const override; const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char * pos) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif void insert(const Field & x) override; bool tryInsert(const Field & x) override; + +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src, size_t n) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override; +#else + void doInsertFrom(const IColumn & src, size_t n) override; + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; +#endif void insertFromNotNullable(const IColumn & src, size_t n); void insertRangeFromNotNullable(const IColumn & src, size_t start, size_t length); @@ -90,7 +100,11 @@ public: void expand(const Filter & mask, bool inverted) override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; +#endif #if USE_EMBEDDED_COMPILER diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index 90ef974010c..adcd42b16e9 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -763,12 +763,20 @@ void ColumnObject::get(size_t n, Field & res) const } } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnObject::insertFrom(const IColumn & src, size_t n) +#else +void ColumnObject::doInsertFrom(const IColumn & src, size_t n) +#endif { insert(src[n]); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnObject::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { const auto & src_object = assert_cast(src); diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index e2936b27994..fadf2e18779 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -209,8 +209,15 @@ public: void insert(const Field & field) override; bool tryInsert(const Field & field) override; void insertDefault() override; + +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src, size_t n) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertFrom(const IColumn & src, size_t n) override; + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif + void popBack(size_t length) override; Field operator[](size_t n) const override; void get(size_t n, Field & res) const override; @@ -228,7 +235,11 @@ public: /// Order of rows in ColumnObject is undefined. void getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation & res) const override; void updatePermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation &, EqualRanges &) const override {} +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; } +#else + int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; } +#endif void getExtremes(Field & min, Field & max) const override; /// All other methods throw exception. diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp index 5190ceb49e5..809586d8810 100644 --- a/src/Columns/ColumnSparse.cpp +++ b/src/Columns/ColumnSparse.cpp @@ -174,7 +174,11 @@ const char * ColumnSparse::skipSerializedInArena(const char * pos) const return values->skipSerializedInArena(pos); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnSparse::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnSparse::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { if (length == 0) return; @@ -248,7 +252,11 @@ bool ColumnSparse::tryInsert(const Field & x) return true; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnSparse::insertFrom(const IColumn & src, size_t n) +#else +void ColumnSparse::doInsertFrom(const IColumn & src, size_t n) +#endif { if (const auto * src_sparse = typeid_cast(&src)) { @@ -446,7 +454,11 @@ ColumnPtr ColumnSparse::indexImpl(const PaddedPODArray & indexes, size_t l return ColumnSparse::create(std::move(res_values), std::move(res_offsets), limit); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int ColumnSparse::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const +#else +int ColumnSparse::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const +#endif { if (const auto * rhs_sparse = typeid_cast(&rhs_)) return values->compareAt(getValueIndex(n), rhs_sparse->getValueIndex(m), rhs_sparse->getValuesColumn(), null_direction_hint); diff --git a/src/Columns/ColumnSparse.h b/src/Columns/ColumnSparse.h index 12b2def7cf1..3e34d1de94a 100644 --- a/src/Columns/ColumnSparse.h +++ b/src/Columns/ColumnSparse.h @@ -81,10 +81,18 @@ public: char * serializeValueIntoMemory(size_t n, char * memory) const override; const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char *) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif void insert(const Field & x) override; bool tryInsert(const Field & x) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src, size_t n) override; +#else + void doInsertFrom(const IColumn & src, size_t n) override; +#endif void insertDefault() override; void insertManyDefaults(size_t length) override; @@ -98,7 +106,11 @@ public: template ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; +#endif void compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray * row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index a84aea73486..1eda9714d62 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -39,7 +39,11 @@ ColumnString::ColumnString(const ColumnString & src) last_offset, chars.size()); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnString::insertManyFrom(const IColumn & src, size_t position, size_t length) +#else +void ColumnString::doInsertManyFrom(const IColumn & src, size_t position, size_t length) +#endif { const ColumnString & src_concrete = assert_cast(src); const UInt8 * src_buf = &src_concrete.chars[src_concrete.offsets[position - 1]]; @@ -129,7 +133,11 @@ void ColumnString::updateWeakHash32(WeakHash32 & hash) const } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnString::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnString::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { if (length == 0) return; diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index 39d4684fd89..602ffac65e8 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -142,7 +142,11 @@ public: return true; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src_, size_t n) override +#else + void doInsertFrom(const IColumn & src_, size_t n) override +#endif { const ColumnString & src = assert_cast(src_); const size_t size_to_append = src.offsets[n] - src.offsets[n - 1]; /// -1th index is Ok, see PaddedPODArray. @@ -165,7 +169,11 @@ public: } } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertManyFrom(const IColumn & src, size_t position, size_t length) override; +#else + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; +#endif void insertData(const char * pos, size_t length) override { @@ -212,7 +220,11 @@ public: hash.update(reinterpret_cast(chars.data()), chars.size() * sizeof(chars[0])); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; @@ -238,7 +250,11 @@ public: offsets.push_back(offsets.back() + 1); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override +#endif { const ColumnString & rhs = assert_cast(rhs_); return memcmpSmallAllowOverflow15(chars.data() + offsetAt(n), sizeAt(n) - 1, rhs.chars.data() + rhs.offsetAt(m), rhs.sizeAt(m) - 1); diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 2159495b68f..9b822d7f570 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -205,7 +205,11 @@ bool ColumnTuple::tryInsert(const Field & x) return true; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnTuple::insertFrom(const IColumn & src_, size_t n) +#else +void ColumnTuple::doInsertFrom(const IColumn & src_, size_t n) +#endif { const ColumnTuple & src = assert_cast(src_); @@ -218,7 +222,11 @@ void ColumnTuple::insertFrom(const IColumn & src_, size_t n) columns[i]->insertFrom(*src.columns[i], n); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnTuple::insertManyFrom(const IColumn & src, size_t position, size_t length) +#else +void ColumnTuple::doInsertManyFrom(const IColumn & src, size_t position, size_t length) +#endif { const ColumnTuple & src_tuple = assert_cast(src); @@ -318,7 +326,11 @@ void ColumnTuple::updateHashFast(SipHash & hash) const column->updateHashFast(hash); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnTuple::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnTuple::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { column_length += length; const size_t tuple_size = columns.size(); @@ -470,7 +482,11 @@ int ColumnTuple::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_ return 0; } +#if !defined(ABORT_ON_LOGICAL_ERROR) int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#else +int ColumnTuple::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#endif { return compareAtImpl(n, m, rhs, nan_direction_hint); } @@ -711,7 +727,13 @@ void ColumnTuple::takeDynamicStructureFromSourceColumns(const Columns & source_c ColumnPtr ColumnTuple::compress() const { if (columns.empty()) - return Ptr(); + { + return ColumnCompressed::create(size(), 0, + [n = column_length] + { + return ColumnTuple::create(n); + }); + } size_t byte_size = 0; Columns compressed; diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index 0103f81b242..38e479791d4 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -65,8 +65,15 @@ public: void insertData(const char * pos, size_t length) override; void insert(const Field & x) override; bool tryInsert(const Field & x) override; + +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src_, size_t n) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override; +#else + void doInsertFrom(const IColumn & src_, size_t n) override; + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; +#endif + void insertDefault() override; void popBack(size_t n) override; StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; @@ -76,14 +83,22 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; void expand(const Filter & mask, bool inverted) override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr replicate(const Offsets & offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#endif int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const override; void getExtremes(Field & min, Field & max) const override; void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h index 0311efd4c83..ec1f8e0a4d5 100644 --- a/src/Columns/ColumnUnique.h +++ b/src/Columns/ColumnUnique.h @@ -90,7 +90,11 @@ public: return getNestedColumn()->updateHashWithValue(n, hash_func); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#endif void getExtremes(Field & min, Field & max) const override { column_holder->getExtremes(min, max); } bool valuesHaveFixedSize() const override { return column_holder->valuesHaveFixedSize(); } @@ -488,7 +492,11 @@ const char * ColumnUnique::skipSerializedInArena(const char *) const } template +#if !defined(ABORT_ON_LOGICAL_ERROR) int ColumnUnique::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#else +int ColumnUnique::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#endif { if (is_nullable) { diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp index ec47f5dfa74..ee5de4c2dde 100644 --- a/src/Columns/ColumnVariant.cpp +++ b/src/Columns/ColumnVariant.cpp @@ -595,17 +595,29 @@ void ColumnVariant::insertManyFromImpl(const DB::IColumn & src_, size_t position } } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnVariant::insertFrom(const IColumn & src_, size_t n) +#else +void ColumnVariant::doInsertFrom(const IColumn & src_, size_t n) +#endif { insertFromImpl(src_, n, nullptr); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t length) +#else +void ColumnVariant::doInsertRangeFrom(const IColumn & src_, size_t start, size_t length) +#endif { insertRangeFromImpl(src_, start, length, nullptr); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnVariant::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length) +#else +void ColumnVariant::doInsertManyFrom(const DB::IColumn & src_, size_t position, size_t length) +#endif { insertManyFromImpl(src_, position, length, nullptr); } @@ -1174,7 +1186,11 @@ bool ColumnVariant::hasEqualValues() const return local_discriminators->hasEqualValues() && variants[localDiscriminatorAt(0)]->hasEqualValues(); } +#if !defined(ABORT_ON_LOGICAL_ERROR) int ColumnVariant::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#else +int ColumnVariant::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#endif { const auto & rhs_variant = assert_cast(rhs); Discriminator left_discr = globalDiscriminatorAt(n); diff --git a/src/Columns/ColumnVariant.h b/src/Columns/ColumnVariant.h index e5a4498f340..d91b8e93a7d 100644 --- a/src/Columns/ColumnVariant.h +++ b/src/Columns/ColumnVariant.h @@ -180,9 +180,19 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src_, size_t n) override; void insertRangeFrom(const IColumn & src_, size_t start, size_t length) override; void insertManyFrom(const IColumn & src_, size_t position, size_t length) override; +#else + using IColumn::insertFrom; + using IColumn::insertManyFrom; + using IColumn::insertRangeFrom; + + void doInsertFrom(const IColumn & src_, size_t n) override; + void doInsertRangeFrom(const IColumn & src_, size_t start, size_t length) override; + void doInsertManyFrom(const IColumn & src_, size_t position, size_t length) override; +#endif /// Methods for insertion from another Variant but with known mapping between global discriminators. void insertFrom(const IColumn & src_, size_t n, const std::vector & global_discriminators_mapping); @@ -213,7 +223,11 @@ public: ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; ColumnPtr replicate(const Offsets & replicate_offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#endif bool hasEqualValues() const override; void getExtremes(Field & min, Field & max) const override; void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 35d9f5386ed..19d1b800961 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -503,7 +503,11 @@ bool ColumnVector::tryInsert(const DB::Field & x) } template +#if !defined(ABORT_ON_LOGICAL_ERROR) void ColumnVector::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else +void ColumnVector::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { const ColumnVector & src_vec = assert_cast(src); diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index bbd27c91a70..3a0acf5e312 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -64,12 +64,20 @@ public: return data.size(); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src, size_t n) override +#else + void doInsertFrom(const IColumn & src, size_t n) override +#endif { data.push_back(assert_cast(src).getData()[n]); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertManyFrom(const IColumn & src, size_t position, size_t length) override +#else + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override +#endif { ValueType v = assert_cast(src).getData()[position]; data.resize_fill(data.size() + length, v); @@ -142,7 +150,11 @@ public: } /// This method implemented in header because it could be possibly devirtualized. +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override +#else + int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override +#endif { return CompareHelper::compare(data[n], assert_cast(rhs_).data[m], nan_direction_hint); } @@ -228,7 +240,11 @@ public: bool tryInsert(const DB::Field & x) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override; diff --git a/src/Columns/IColumn.cpp b/src/Columns/IColumn.cpp index 90cccef2b03..552e52cf51c 100644 --- a/src/Columns/IColumn.cpp +++ b/src/Columns/IColumn.cpp @@ -46,7 +46,11 @@ String IColumn::dumpStructure() const return res.str(); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void IColumn::insertFrom(const IColumn & src, size_t n) +#else +void IColumn::doInsertFrom(const IColumn & src, size_t n) +#endif { insert(src[n]); } diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index afa301d5c1c..4b6f34e5aa2 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -1,15 +1,14 @@ #pragma once -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include #include "config.h" - class SipHash; class Collator; @@ -180,18 +179,42 @@ public: /// Appends n-th element from other column with the same type. /// Is used in merge-sort and merges. It could be implemented in inherited classes more optimally than default implementation. +#if !defined(ABORT_ON_LOGICAL_ERROR) virtual void insertFrom(const IColumn & src, size_t n); +#else + void insertFrom(const IColumn & src, size_t n) + { + assertTypeEquality(src); + doInsertFrom(src, n); + } +#endif /// Appends range of elements from other column with the same type. /// Could be used to concatenate columns. +#if !defined(ABORT_ON_LOGICAL_ERROR) virtual void insertRangeFrom(const IColumn & src, size_t start, size_t length) = 0; +#else + void insertRangeFrom(const IColumn & src, size_t start, size_t length) + { + assertTypeEquality(src); + doInsertRangeFrom(src, start, length); + } +#endif /// Appends one element from other column with the same type multiple times. +#if !defined(ABORT_ON_LOGICAL_ERROR) virtual void insertManyFrom(const IColumn & src, size_t position, size_t length) { for (size_t i = 0; i < length; ++i) insertFrom(src, position); } +#else + void insertManyFrom(const IColumn & src, size_t position, size_t length) + { + assertTypeEquality(src); + doInsertManyFrom(src, position, length); + } +#endif /// Appends one field multiple times. Can be optimized in inherited classes. virtual void insertMany(const Field & field, size_t length) @@ -322,7 +345,15 @@ public: * * For non Nullable and non floating point types, nan_direction_hint is ignored. */ +#if !defined(ABORT_ON_LOGICAL_ERROR) [[nodiscard]] virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0; +#else + [[nodiscard]] int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const + { + assertTypeEquality(rhs); + return doCompareAt(n, m, rhs, nan_direction_hint); + } +#endif #if USE_EMBEDDED_COMPILER @@ -610,6 +641,8 @@ public: [[nodiscard]] virtual bool isSparse() const { return false; } + [[nodiscard]] virtual bool isConst() const { return false; } + [[nodiscard]] virtual bool isCollationSupported() const { return false; } virtual ~IColumn() = default; @@ -633,6 +666,29 @@ protected: Equals equals, Sort full_sort, PartialSort partial_sort) const; + +#if defined(ABORT_ON_LOGICAL_ERROR) + virtual void doInsertFrom(const IColumn & src, size_t n); + + virtual void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) = 0; + + virtual void doInsertManyFrom(const IColumn & src, size_t position, size_t length) + { + for (size_t i = 0; i < length; ++i) + insertFrom(src, position); + } + + virtual int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0; + +private: + void assertTypeEquality(const IColumn & rhs) const + { + /// For Sparse and Const columns, we can compare only internal types. It is considered normal to e.g. insert from normal vector column to a sparse vector column. + /// This case is specifically handled in ColumnSparse implementation. Similar situation with Const column. + /// For the rest of column types we can compare the types directly. + chassert((isConst() || isSparse()) ? getDataType() == rhs.getDataType() : typeid(*this) == typeid(rhs)); + } +#endif }; using ColumnPtr = IColumn::Ptr; diff --git a/src/Columns/IColumnDummy.h b/src/Columns/IColumnDummy.h index 27f420fbc71..c19fb704d9b 100644 --- a/src/Columns/IColumnDummy.h +++ b/src/Columns/IColumnDummy.h @@ -26,7 +26,11 @@ public: size_t byteSize() const override { return 0; } size_t byteSizeAt(size_t) const override { return 0; } size_t allocatedBytes() const override { return 0; } +#if !defined(ABORT_ON_LOGICAL_ERROR) int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; } +#else + int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; } +#endif void compareColumn(const IColumn &, size_t, PaddedPODArray *, PaddedPODArray &, int, int) const override { } @@ -67,12 +71,20 @@ public: { } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn &, size_t) override +#else + void doInsertFrom(const IColumn &, size_t) override +#endif { ++s; } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & /*src*/, size_t /*start*/, size_t length) override +#else + void doInsertRangeFrom(const IColumn & /*src*/, size_t /*start*/, size_t length) override +#endif { s += length; } diff --git a/src/Columns/IColumnUnique.h b/src/Columns/IColumnUnique.h index f71f19a5da6..3398452b7ee 100644 --- a/src/Columns/IColumnUnique.h +++ b/src/Columns/IColumnUnique.h @@ -85,7 +85,11 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method tryInsert is not supported for ColumnUnique."); } +#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn &, size_t, size_t) override +#else + void doInsertRangeFrom(const IColumn &, size_t, size_t) override +#endif { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method insertRangeFrom is not supported for ColumnUnique."); } diff --git a/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp b/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp index 325cf5559cd..645f6ed79f3 100644 --- a/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp +++ b/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp @@ -52,7 +52,11 @@ static ColumnPtr mockColumn(const DataTypePtr & type, size_t rows) } +#if !defined(ABORT_ON_LOGICAL_ERROR) static NO_INLINE void insertManyFrom(IColumn & dst, const IColumn & src) +#else +static NO_INLINE void doInsertManyFrom(IColumn & dst, const IColumn & src) +#endif { size_t size = src.size(); dst.insertManyFrom(src, size / 2, size); diff --git a/src/Common/AtomicLogger.h b/src/Common/AtomicLogger.h index 0ece9e8a09a..c1bbdb41866 100644 --- a/src/Common/AtomicLogger.h +++ b/src/Common/AtomicLogger.h @@ -1,5 +1,7 @@ #pragma once +#include +#include #include #include diff --git a/src/Common/BinStringDecodeHelper.h b/src/Common/BinStringDecodeHelper.h index df3e014cfad..03c175fd37f 100644 --- a/src/Common/BinStringDecodeHelper.h +++ b/src/Common/BinStringDecodeHelper.h @@ -5,7 +5,7 @@ namespace DB { -static void inline hexStringDecode(const char * pos, const char * end, char *& out, size_t word_size = 2) +static void inline hexStringDecode(const char * pos, const char * end, char *& out, size_t word_size) { if ((end - pos) & 1) { @@ -23,7 +23,7 @@ static void inline hexStringDecode(const char * pos, const char * end, char *& o ++out; } -static void inline binStringDecode(const char * pos, const char * end, char *& out) +static void inline binStringDecode(const char * pos, const char * end, char *& out, size_t word_size) { if (pos == end) { @@ -53,7 +53,7 @@ static void inline binStringDecode(const char * pos, const char * end, char *& o ++out; } - assert((end - pos) % 8 == 0); + chassert((end - pos) % word_size == 0); while (end - pos != 0) { diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp index 8a4792f0a5a..d36c7fd08aa 100644 --- a/src/Common/CgroupsMemoryUsageObserver.cpp +++ b/src/Common/CgroupsMemoryUsageObserver.cpp @@ -12,7 +12,9 @@ #include #include +#include #include +#include #include #include "config.h" @@ -22,24 +24,169 @@ #define STRINGIFY(x) STRINGIFY_HELPER(x) #endif +using namespace DB; namespace DB { namespace ErrorCodes { - extern const int CANNOT_CLOSE_FILE; - extern const int CANNOT_OPEN_FILE; - extern const int FILE_DOESNT_EXIST; - extern const int INCORRECT_DATA; +extern const int FILE_DOESNT_EXIST; +extern const int INCORRECT_DATA; } -CgroupsMemoryUsageObserver::CgroupsMemoryUsageObserver(std::chrono::seconds wait_time_) - : log(getLogger("CgroupsMemoryUsageObserver")) - , wait_time(wait_time_) - , memory_usage_file(log) +} + +namespace { - LOG_INFO(log, "Initialized cgroups memory limit observer, wait time is {} sec", wait_time.count()); + +/// Format is +/// kernel 5 +/// rss 15 +/// [...] +uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, const std::string & key) +{ + while (!buf.eof()) + { + std::string current_key; + readStringUntilWhitespace(current_key, buf); + if (current_key != key) + { + std::string dummy; + readStringUntilNewlineInto(dummy, buf); + buf.ignore(); + continue; + } + + assertChar(' ', buf); + uint64_t value = 0; + readIntText(value, buf); + return value; + } + + throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot find '{}' in '{}'", key, buf.getFileName()); +} + +struct CgroupsV1Reader : ICgroupsReader +{ + explicit CgroupsV1Reader(const std::filesystem::path & stat_file_dir) : buf(stat_file_dir / "memory.stat") { } + + uint64_t readMemoryUsage() override + { + std::lock_guard lock(mutex); + buf.rewind(); + return readMetricFromStatFile(buf, "rss"); + } + +private: + std::mutex mutex; + ReadBufferFromFile buf TSA_GUARDED_BY(mutex); +}; + +struct CgroupsV2Reader : ICgroupsReader +{ + explicit CgroupsV2Reader(const std::filesystem::path & stat_file_dir) + : current_buf(stat_file_dir / "memory.current"), stat_buf(stat_file_dir / "memory.stat") + { + } + + uint64_t readMemoryUsage() override + { + std::lock_guard lock(mutex); + current_buf.rewind(); + stat_buf.rewind(); + + int64_t mem_usage = 0; + /// memory.current contains a single number + /// the reason why we subtract it described here: https://github.com/ClickHouse/ClickHouse/issues/64652#issuecomment-2149630667 + readIntText(mem_usage, current_buf); + mem_usage -= readMetricFromStatFile(stat_buf, "inactive_file"); + chassert(mem_usage >= 0, "Negative memory usage"); + return mem_usage; + } + +private: + std::mutex mutex; + ReadBufferFromFile current_buf TSA_GUARDED_BY(mutex); + ReadBufferFromFile stat_buf TSA_GUARDED_BY(mutex); +}; + +/// Caveats: +/// - All of the logic in this file assumes that the current process is the only process in the +/// containing cgroup (or more precisely: the only process with significant memory consumption). +/// If this is not the case, then other processe's memory consumption may affect the internal +/// memory tracker ... +/// - Cgroups v1 and v2 allow nested cgroup hierarchies. As v1 is deprecated for over half a +/// decade and will go away at some point, hierarchical detection is only implemented for v2. +/// - I did not test what happens if a host has v1 and v2 simultaneously enabled. I believe such +/// systems existed only for a short transition period. + +std::optional getCgroupsV2Path() +{ + if (!cgroupsV2Enabled()) + return {}; + + if (!cgroupsV2MemoryControllerEnabled()) + return {}; + + String cgroup = cgroupV2OfProcess(); + auto current_cgroup = cgroup.empty() ? default_cgroups_mount : (default_cgroups_mount / cgroup); + + /// Return the bottom-most nested current memory file. If there is no such file at the current + /// level, try again at the parent level as memory settings are inherited. + while (current_cgroup != default_cgroups_mount.parent_path()) + { + const auto current_path = current_cgroup / "memory.current"; + const auto stat_path = current_cgroup / "memory.stat"; + if (std::filesystem::exists(current_path) && std::filesystem::exists(stat_path)) + return {current_cgroup}; + current_cgroup = current_cgroup.parent_path(); + } + return {}; +} + +std::optional getCgroupsV1Path() +{ + auto path = default_cgroups_mount / "memory/memory.stat"; + if (!std::filesystem::exists(path)) + return {}; + return {default_cgroups_mount / "memory"}; +} + +std::pair getCgroupsPath() +{ + auto v2_path = getCgroupsV2Path(); + if (v2_path.has_value()) + return {*v2_path, CgroupsMemoryUsageObserver::CgroupsVersion::V2}; + + auto v1_path = getCgroupsV1Path(); + if (v1_path.has_value()) + return {*v1_path, CgroupsMemoryUsageObserver::CgroupsVersion::V1}; + + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot find cgroups v1 or v2 current memory file"); +} + +} + +namespace DB +{ + +CgroupsMemoryUsageObserver::CgroupsMemoryUsageObserver(std::chrono::seconds wait_time_) + : log(getLogger("CgroupsMemoryUsageObserver")), wait_time(wait_time_) +{ + const auto [cgroup_path, version] = getCgroupsPath(); + + if (version == CgroupsVersion::V2) + cgroup_reader = std::make_unique(cgroup_path); + else + cgroup_reader = std::make_unique(cgroup_path); + + LOG_INFO( + log, + "Will read the current memory usage from '{}' (cgroups version: {}), wait time is {} sec", + cgroup_path, + (version == CgroupsVersion::V1) ? "v1" : "v2", + wait_time.count()); } CgroupsMemoryUsageObserver::~CgroupsMemoryUsageObserver() @@ -79,12 +226,13 @@ void CgroupsMemoryUsageObserver::setMemoryUsageLimits(uint64_t hard_limit_, uint { LOG_WARNING(log, "Exceeded soft memory limit ({})", ReadableSize(soft_limit_)); -#if USE_JEMALLOC +# if USE_JEMALLOC LOG_INFO(log, "Purging jemalloc arenas"); mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", nullptr, nullptr, nullptr, 0); -#endif +# endif /// Reset current usage in memory tracker. Expect zero for free_memory_in_allocator_arenas as we just purged them. - uint64_t memory_usage = memory_usage_file.readMemoryUsage(); + uint64_t memory_usage = cgroup_reader->readMemoryUsage(); + LOG_TRACE(log, "Read current memory usage {} bytes ({}) from cgroups", memory_usage, ReadableSize(memory_usage)); MemoryTracker::setRSS(memory_usage, 0); LOG_INFO(log, "Purged jemalloc arenas. Current memory usage is {}", ReadableSize(memory_usage)); @@ -104,152 +252,6 @@ void CgroupsMemoryUsageObserver::setOnMemoryAmountAvailableChangedFn(OnMemoryAmo on_memory_amount_available_changed = on_memory_amount_available_changed_; } -namespace -{ - -/// Caveats: -/// - All of the logic in this file assumes that the current process is the only process in the -/// containing cgroup (or more precisely: the only process with significant memory consumption). -/// If this is not the case, then other processe's memory consumption may affect the internal -/// memory tracker ... -/// - Cgroups v1 and v2 allow nested cgroup hierarchies. As v1 is deprecated for over half a -/// decade and will go away at some point, hierarchical detection is only implemented for v2. -/// - I did not test what happens if a host has v1 and v2 simultaneously enabled. I believe such -/// systems existed only for a short transition period. - -std::optional getCgroupsV2FileName() -{ - if (!cgroupsV2Enabled()) - return {}; - - if (!cgroupsV2MemoryControllerEnabled()) - return {}; - - String cgroup = cgroupV2OfProcess(); - auto current_cgroup = cgroup.empty() ? default_cgroups_mount : (default_cgroups_mount / cgroup); - - /// Return the bottom-most nested current memory file. If there is no such file at the current - /// level, try again at the parent level as memory settings are inherited. - while (current_cgroup != default_cgroups_mount.parent_path()) - { - auto path = current_cgroup / "memory.current"; - if (std::filesystem::exists(path)) - return {path}; - current_cgroup = current_cgroup.parent_path(); - } - return {}; -} - -std::optional getCgroupsV1FileName() -{ - auto path = default_cgroups_mount / "memory/memory.stat"; - if (!std::filesystem::exists(path)) - return {}; - return {path}; -} - -std::pair getCgroupsFileName() -{ - auto v2_file_name = getCgroupsV2FileName(); - if (v2_file_name.has_value()) - return {*v2_file_name, CgroupsMemoryUsageObserver::CgroupsVersion::V2}; - - auto v1_file_name = getCgroupsV1FileName(); - if (v1_file_name.has_value()) - return {*v1_file_name, CgroupsMemoryUsageObserver::CgroupsVersion::V1}; - - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot find cgroups v1 or v2 current memory file"); -} - -} - -CgroupsMemoryUsageObserver::MemoryUsageFile::MemoryUsageFile(LoggerPtr log_) - : log(log_) -{ - std::tie(file_name, version) = getCgroupsFileName(); - - LOG_INFO(log, "Will read the current memory usage from '{}' (cgroups version: {})", file_name, (version == CgroupsVersion::V1) ? "v1" : "v2"); - - fd = ::open(file_name.data(), O_RDONLY); - if (fd == -1) - ErrnoException::throwFromPath( - (errno == ENOENT) ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE, - file_name, "Cannot open file '{}'", file_name); -} - -CgroupsMemoryUsageObserver::MemoryUsageFile::~MemoryUsageFile() -{ - assert(fd != -1); - if (::close(fd) != 0) - { - try - { - ErrnoException::throwFromPath( - ErrorCodes::CANNOT_CLOSE_FILE, - file_name, "Cannot close file '{}'", file_name); - } - catch (const ErrnoException &) - { - tryLogCurrentException(log, __PRETTY_FUNCTION__); - } - } -} - -uint64_t CgroupsMemoryUsageObserver::MemoryUsageFile::readMemoryUsage() const -{ - /// File read is probably not read is thread-safe, just to be sure - std::lock_guard lock(mutex); - - ReadBufferFromFileDescriptor buf(fd); - buf.rewind(); - - uint64_t mem_usage = 0; - - switch (version) - { - case CgroupsVersion::V1: - { - /// Format is - /// kernel 5 - /// rss 15 - /// [...] - std::string key; - bool found_rss = false; - - while (!buf.eof()) - { - readStringUntilWhitespace(key, buf); - if (key != "rss") - { - std::string dummy; - readStringUntilNewlineInto(dummy, buf); - buf.ignore(); - continue; - } - - assertChar(' ', buf); - readIntText(mem_usage, buf); - found_rss = true; - break; - } - - if (!found_rss) - throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot find 'rss' in '{}'", file_name); - - break; - } - case CgroupsVersion::V2: - { - readIntText(mem_usage, buf); - break; - } - } - - LOG_TRACE(log, "Read current memory usage {} from cgroups", ReadableSize(mem_usage)); - - return mem_usage; -} - void CgroupsMemoryUsageObserver::startThread() { if (!thread.joinable()) @@ -301,7 +303,8 @@ void CgroupsMemoryUsageObserver::runThread() std::lock_guard limit_lock(limit_mutex); if (soft_limit > 0 && hard_limit > 0) { - uint64_t memory_usage = memory_usage_file.readMemoryUsage(); + uint64_t memory_usage = cgroup_reader->readMemoryUsage(); + LOG_TRACE(log, "Read current memory usage {} bytes ({}) from cgroups", memory_usage, ReadableSize(memory_usage)); if (memory_usage > hard_limit) { if (last_memory_usage <= hard_limit) diff --git a/src/Common/CgroupsMemoryUsageObserver.h b/src/Common/CgroupsMemoryUsageObserver.h index edc1cee750a..b848a2bff3c 100644 --- a/src/Common/CgroupsMemoryUsageObserver.h +++ b/src/Common/CgroupsMemoryUsageObserver.h @@ -3,11 +3,19 @@ #include #include +#include #include namespace DB { +struct ICgroupsReader +{ + virtual ~ICgroupsReader() = default; + + virtual uint64_t readMemoryUsage() = 0; +}; + /// Does two things: /// 1. Periodically reads the memory usage of the process from Linux cgroups. /// You can specify soft or hard memory limits: @@ -61,27 +69,12 @@ private: uint64_t last_memory_usage = 0; /// how much memory does the process use uint64_t last_available_memory_amount; /// how much memory can the process use - /// Represents the cgroup virtual file that shows the memory consumption of the process's cgroup. - struct MemoryUsageFile - { - public: - explicit MemoryUsageFile(LoggerPtr log_); - ~MemoryUsageFile(); - uint64_t readMemoryUsage() const; - private: - LoggerPtr log; - mutable std::mutex mutex; - int fd TSA_GUARDED_BY(mutex) = -1; - CgroupsVersion version; - std::string file_name; - }; - - MemoryUsageFile memory_usage_file; - void stopThread(); void runThread(); + std::unique_ptr cgroup_reader; + std::mutex thread_mutex; std::condition_variable cond; ThreadFromGlobalPool thread; diff --git a/src/Common/ConcurrencyControl.h b/src/Common/ConcurrencyControl.h index ba94502962c..9d35d7cb8b0 100644 --- a/src/Common/ConcurrencyControl.h +++ b/src/Common/ConcurrencyControl.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include diff --git a/src/Common/ConcurrentBoundedQueue.h b/src/Common/ConcurrentBoundedQueue.h index 922607da813..16b9488c98d 100644 --- a/src/Common/ConcurrentBoundedQueue.h +++ b/src/Common/ConcurrentBoundedQueue.h @@ -1,8 +1,6 @@ #pragma once #include -#include -#include #include #include #include @@ -200,22 +198,18 @@ public: */ bool finish() { - bool was_finished_before = false; - { std::lock_guard lock(queue_mutex); if (is_finished) return true; - was_finished_before = is_finished; is_finished = true; } pop_condition.notify_all(); push_condition.notify_all(); - - return was_finished_before; + return false; } /// Returns if queue is finished diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index c9832e8efd5..67d6036aa51 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -316,7 +316,6 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, } else if (replace) { - with_element.removeAttribute("replace"); NodePtr new_node = config->importNode(with_node, true); config_root->replaceChild(new_node, config_node); } diff --git a/src/Common/Coverage.cpp b/src/Common/Coverage.cpp new file mode 100644 index 00000000000..a21efe62fb6 --- /dev/null +++ b/src/Common/Coverage.cpp @@ -0,0 +1,65 @@ +#include + +#if defined(SANITIZE_COVERAGE) + +#include +#include + +#include +#include + +#include +#include + +#include + +/// Macros to avoid using strlen(), since it may fail if SSE is not supported. +#define writeError(data) do \ + { \ + static_assert(__builtin_constant_p(data)); \ + if (!writeRetry(STDERR_FILENO, data, sizeof(data) - 1)) \ + _Exit(1); \ + } while (false) + +__attribute__((no_sanitize("coverage"))) +void dumpCoverage() +{ + /// A user can request to dump the coverage information into files at exit. + /// This is useful for non-server applications such as clickhouse-format or clickhouse-client, + /// that cannot introspect it with SQL functions at runtime. + + /// The CLICKHOUSE_WRITE_COVERAGE environment variable defines a prefix for a filename 'prefix.pid' + /// containing the list of addresses of covered . + + /// The format is even simpler than Clang's "sancov": an array of 64-bit addresses, native byte order, no header. + + if (const char * coverage_filename_prefix = getenv("CLICKHOUSE_WRITE_COVERAGE")) // NOLINT(concurrency-mt-unsafe) + { + auto dump = [](const std::string & name, auto span) + { + /// Write only non-zeros. + std::vector data; + data.reserve(span.size()); + for (auto addr : span) + if (addr) + data.push_back(addr); + + int fd = ::open(name.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0400); + if (-1 == fd) + { + writeError("Cannot open a file to write the coverage data\n"); + } + else + { + if (!writeRetry(fd, reinterpret_cast(data.data()), data.size() * sizeof(data[0]))) + writeError("Cannot write the coverage data to a file\n"); + if (0 != ::close(fd)) + writeError("Cannot close the file with coverage data\n"); + } + }; + + dump(fmt::format("{}.{}", coverage_filename_prefix, getpid()), getCumulativeCoverage()); + } +} +#endif + diff --git a/src/Common/Coverage.h b/src/Common/Coverage.h new file mode 100644 index 00000000000..aa6dd2825ed --- /dev/null +++ b/src/Common/Coverage.h @@ -0,0 +1,5 @@ +#pragma once + +#if defined(SANITIZE_COVERAGE) +void dumpCoverage(); +#endif diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 8516a88c7af..7c97e73f278 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -267,7 +267,7 @@ M(AsyncInsertCacheSize, "Number of async insert hash id in cache") \ M(S3Requests, "S3 requests count") \ M(KeeperAliveConnections, "Number of alive connections") \ - M(KeeperOutstandingRequets, "Number of outstanding requests") \ + M(KeeperOutstandingRequests, "Number of outstanding requests") \ M(ThreadsInOvercommitTracker, "Number of waiting threads inside of OvercommitTracker") \ M(IOUringPendingEvents, "Number of io_uring SQEs waiting to be submitted") \ M(IOUringInFlightEvents, "Number of io_uring SQEs in flight") \ diff --git a/src/Common/DateLUT.cpp b/src/Common/DateLUT.cpp index 3a20fb1a125..f9bbe2fbf40 100644 --- a/src/Common/DateLUT.cpp +++ b/src/Common/DateLUT.cpp @@ -2,7 +2,9 @@ #include #include +#include #include +#include #include #include diff --git a/src/Common/EnvironmentChecks.cpp b/src/Common/EnvironmentChecks.cpp new file mode 100644 index 00000000000..d69e8cbaa3d --- /dev/null +++ b/src/Common/EnvironmentChecks.cpp @@ -0,0 +1,234 @@ +#include +#include + +#include + +#include +#include +#include + +#include + +#include + +namespace +{ + +enum class InstructionFail : uint8_t +{ + NONE = 0, + SSE3 = 1, + SSSE3 = 2, + SSE4_1 = 3, + SSE4_2 = 4, + POPCNT = 5, + AVX = 6, + AVX2 = 7, + AVX512 = 8 +}; + +auto instructionFailToString(InstructionFail fail) +{ + switch (fail) + { +#define ret(x) return std::make_tuple(STDERR_FILENO, x, sizeof(x) - 1) + case InstructionFail::NONE: + ret("NONE"); + case InstructionFail::SSE3: + ret("SSE3"); + case InstructionFail::SSSE3: + ret("SSSE3"); + case InstructionFail::SSE4_1: + ret("SSE4.1"); + case InstructionFail::SSE4_2: + ret("SSE4.2"); + case InstructionFail::POPCNT: + ret("POPCNT"); + case InstructionFail::AVX: + ret("AVX"); + case InstructionFail::AVX2: + ret("AVX2"); + case InstructionFail::AVX512: + ret("AVX512"); +#undef ret + } +} + + +sigjmp_buf jmpbuf; + +[[noreturn]] void sigIllCheckHandler(int, siginfo_t *, void *) +{ + siglongjmp(jmpbuf, 1); +} + +/// Check if necessary SSE extensions are available by trying to execute some sse instructions. +/// If instruction is unavailable, SIGILL will be sent by kernel. +void checkRequiredInstructionsImpl(volatile InstructionFail & fail) +{ +#if defined(__SSE3__) + fail = InstructionFail::SSE3; + __asm__ volatile ("addsubpd %%xmm0, %%xmm0" : : : "xmm0"); +#endif + +#if defined(__SSSE3__) + fail = InstructionFail::SSSE3; + __asm__ volatile ("pabsw %%xmm0, %%xmm0" : : : "xmm0"); + +#endif + +#if defined(__SSE4_1__) + fail = InstructionFail::SSE4_1; + __asm__ volatile ("pmaxud %%xmm0, %%xmm0" : : : "xmm0"); +#endif + +#if defined(__SSE4_2__) + fail = InstructionFail::SSE4_2; + __asm__ volatile ("pcmpgtq %%xmm0, %%xmm0" : : : "xmm0"); +#endif + + /// Defined by -msse4.2 +#if defined(__POPCNT__) + fail = InstructionFail::POPCNT; + { + uint64_t a = 0; + uint64_t b = 0; + __asm__ volatile ("popcnt %1, %0" : "=r"(a) :"r"(b) :); + } +#endif + +#if defined(__AVX__) + fail = InstructionFail::AVX; + __asm__ volatile ("vaddpd %%ymm0, %%ymm0, %%ymm0" : : : "ymm0"); +#endif + +#if defined(__AVX2__) + fail = InstructionFail::AVX2; + __asm__ volatile ("vpabsw %%ymm0, %%ymm0" : : : "ymm0"); +#endif + +#if defined(__AVX512__) + fail = InstructionFail::AVX512; + __asm__ volatile ("vpabsw %%zmm0, %%zmm0" : : : "zmm0"); +#endif + + fail = InstructionFail::NONE; +} + +/// Macros to avoid using strlen(), since it may fail if SSE is not supported. +#define writeError(data) do \ + { \ + static_assert(__builtin_constant_p(data)); \ + if (!writeRetry(STDERR_FILENO, data, sizeof(data) - 1)) \ + _Exit(1); \ + } while (false) + +/// Check SSE and others instructions availability. Calls exit on fail. +/// This function must be called as early as possible, even before main, because static initializers may use unavailable instructions. +void checkRequiredInstructions() +{ + struct sigaction sa{}; + struct sigaction sa_old{}; + sa.sa_sigaction = sigIllCheckHandler; + sa.sa_flags = SA_SIGINFO; + auto signal = SIGILL; + if (sigemptyset(&sa.sa_mask) != 0 + || sigaddset(&sa.sa_mask, signal) != 0 + || sigaction(signal, &sa, &sa_old) != 0) + { + /// You may wonder about strlen. + /// Typical implementation of strlen is using SSE4.2 or AVX2. + /// But this is not the case because it's compiler builtin and is executed at compile time. + + writeError("Can not set signal handler\n"); + _Exit(1); + } + + volatile InstructionFail fail = InstructionFail::NONE; + + if (sigsetjmp(jmpbuf, 1)) + { + writeError("Instruction check fail. The CPU does not support "); + if (!std::apply(writeRetry, instructionFailToString(fail))) + _Exit(1); + writeError(" instruction set.\n"); + _Exit(1); + } + + checkRequiredInstructionsImpl(fail); + + if (sigaction(signal, &sa_old, nullptr)) + { + writeError("Can not set signal handler\n"); + _Exit(1); + } +} + +struct Checker +{ + Checker() + { + checkRequiredInstructions(); + } +} checker +#ifndef OS_DARWIN + __attribute__((init_priority(101))) /// Run before other static initializers. +#endif +; + +} + + +#if !defined(USE_MUSL) +/// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete. +void checkHarmfulEnvironmentVariables(char ** argv) +{ + std::initializer_list harmful_env_variables = { + /// The list is a selection from "man ld-linux". + "LD_PRELOAD", + "LD_LIBRARY_PATH", + "LD_ORIGIN_PATH", + "LD_AUDIT", + "LD_DYNAMIC_WEAK", + /// The list is a selection from "man dyld" (osx). + "DYLD_LIBRARY_PATH", + "DYLD_FALLBACK_LIBRARY_PATH", + "DYLD_VERSIONED_LIBRARY_PATH", + "DYLD_INSERT_LIBRARIES", + }; + + bool require_reexec = false; + for (const auto * var : harmful_env_variables) + { + if (const char * value = getenv(var); value && value[0]) // NOLINT(concurrency-mt-unsafe) + { + /// NOTE: setenv() is used over unsetenv() since unsetenv() marked as harmful + if (setenv(var, "", true)) // NOLINT(concurrency-mt-unsafe) // this is safe if not called concurrently + { + fmt::print(stderr, "Cannot override {} environment variable", var); + _exit(1); + } + require_reexec = true; + } + } + + if (require_reexec) + { + /// Use execvp() over execv() to search in PATH. + /// + /// This should be safe, since: + /// - if argv[0] is relative path - it is OK + /// - if argv[0] has only basename, the it will search in PATH, like shell will do. + /// + /// Also note, that this (search in PATH) because there is no easy and + /// portable way to get absolute path of argv[0]. + /// - on linux there is /proc/self/exec and AT_EXECFN + /// - but on other OSes there is no such thing (especially on OSX). + /// + /// And since static linking will be done someday anyway, + /// let's not pollute the code base with special cases. + int error = execvp(argv[0], argv); + _exit(error); + } +} +#endif diff --git a/src/Common/EnvironmentChecks.h b/src/Common/EnvironmentChecks.h new file mode 100644 index 00000000000..6d355a69ff9 --- /dev/null +++ b/src/Common/EnvironmentChecks.h @@ -0,0 +1,5 @@ +#pragma once + +#if !defined(USE_MUSL) +void checkHarmfulEnvironmentVariables(char ** argv); +#endif diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index 1f4b0aea8f2..111280074dd 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -38,10 +38,19 @@ namespace ErrorCodes extern const int CANNOT_MREMAP; } +void abortOnFailedAssertion(const String & description, void * const * trace, size_t trace_offset, size_t trace_size) +{ + auto & logger = Poco::Logger::root(); + LOG_FATAL(&logger, "Logical error: '{}'.", description); + if (trace) + LOG_FATAL(&logger, "Stack trace (when copying this message, always include the lines below):\n\n{}", StackTrace::toString(trace, trace_offset, trace_size)); + abort(); +} + void abortOnFailedAssertion(const String & description) { - LOG_FATAL(&Poco::Logger::root(), "Logical error: '{}'.", description); - abort(); + StackTrace st; + abortOnFailedAssertion(description, st.getFramePointers().data(), st.getOffset(), st.getSize()); } bool terminate_on_any_exception = false; @@ -58,7 +67,7 @@ void handle_error_code(const std::string & msg, int code, bool remote, const Exc #ifdef ABORT_ON_LOGICAL_ERROR if (code == ErrorCodes::LOGICAL_ERROR) { - abortOnFailedAssertion(msg); + abortOnFailedAssertion(msg, trace.data(), 0, trace.size()); } #endif diff --git a/src/Common/Exception.h b/src/Common/Exception.h index 87ef7101cdc..a4774a89f6a 100644 --- a/src/Common/Exception.h +++ b/src/Common/Exception.h @@ -25,8 +25,6 @@ namespace DB class AtomicLogger; -[[noreturn]] void abortOnFailedAssertion(const String & description); - /// This flag can be set for testing purposes - to check that no exceptions are thrown. extern bool terminate_on_any_exception; @@ -167,6 +165,8 @@ protected: mutable std::vector capture_thread_frame_pointers; }; +[[noreturn]] void abortOnFailedAssertion(const String & description, void * const * trace, size_t trace_offset, size_t trace_size); +[[noreturn]] void abortOnFailedAssertion(const String & description); std::string getExceptionStackTraceString(const std::exception & e); std::string getExceptionStackTraceString(std::exception_ptr e); diff --git a/src/Common/FieldBinaryEncoding.cpp b/src/Common/FieldBinaryEncoding.cpp new file mode 100644 index 00000000000..6c1a8496fe6 --- /dev/null +++ b/src/Common/FieldBinaryEncoding.cpp @@ -0,0 +1,389 @@ +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNSUPPORTED_METHOD; + extern const int INCORRECT_DATA; +} + +namespace +{ + +enum class FieldBinaryTypeIndex: uint8_t +{ + Null = 0x00, + UInt64 = 0x01, + Int64 = 0x02, + UInt128 = 0x03, + Int128 = 0x04, + UInt256 = 0x05, + Int256 = 0x06, + Float64 = 0x07, + Decimal32 = 0x08, + Decimal64 = 0x09, + Decimal128 = 0x0A, + Decimal256 = 0x0B, + String = 0x0C, + Array = 0x0D, + Tuple = 0x0E, + Map = 0x0F, + IPv4 = 0x10, + IPv6 = 0x11, + UUID = 0x12, + Bool = 0x13, + Object = 0x14, + AggregateFunctionState = 0x15, + + NegativeInfinity = 0xFE, + PositiveInfinity = 0xFF, +}; + +class FieldVisitorEncodeBinary +{ +public: + void operator() (const Null & x, WriteBuffer & buf) const; + void operator() (const UInt64 & x, WriteBuffer & buf) const; + void operator() (const UInt128 & x, WriteBuffer & buf) const; + void operator() (const UInt256 & x, WriteBuffer & buf) const; + void operator() (const Int64 & x, WriteBuffer & buf) const; + void operator() (const Int128 & x, WriteBuffer & buf) const; + void operator() (const Int256 & x, WriteBuffer & buf) const; + void operator() (const UUID & x, WriteBuffer & buf) const; + void operator() (const IPv4 & x, WriteBuffer & buf) const; + void operator() (const IPv6 & x, WriteBuffer & buf) const; + void operator() (const Float64 & x, WriteBuffer & buf) const; + void operator() (const String & x, WriteBuffer & buf) const; + void operator() (const Array & x, WriteBuffer & buf) const; + void operator() (const Tuple & x, WriteBuffer & buf) const; + void operator() (const Map & x, WriteBuffer & buf) const; + void operator() (const Object & x, WriteBuffer & buf) const; + void operator() (const DecimalField & x, WriteBuffer & buf) const; + void operator() (const DecimalField & x, WriteBuffer & buf) const; + void operator() (const DecimalField & x, WriteBuffer & buf) const; + void operator() (const DecimalField & x, WriteBuffer & buf) const; + void operator() (const AggregateFunctionStateData & x, WriteBuffer & buf) const; + [[noreturn]] void operator() (const CustomType & x, WriteBuffer & buf) const; + void operator() (const bool & x, WriteBuffer & buf) const; +}; + +void FieldVisitorEncodeBinary::operator() (const Null & x, WriteBuffer & buf) const +{ + if (x.isNull()) + writeBinary(UInt8(FieldBinaryTypeIndex::Null), buf); + else if (x.isPositiveInfinity()) + writeBinary(UInt8(FieldBinaryTypeIndex::PositiveInfinity), buf); + else if (x.isNegativeInfinity()) + writeBinary(UInt8(FieldBinaryTypeIndex::NegativeInfinity), buf); +} + +void FieldVisitorEncodeBinary::operator() (const UInt64 & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::UInt64), buf); + writeVarUInt(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const Int64 & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Int64), buf); + writeVarInt(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const Float64 & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Float64), buf); + writeBinaryLittleEndian(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const String & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::String), buf); + writeStringBinary(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const UInt128 & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::UInt128), buf); + writeBinaryLittleEndian(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const Int128 & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Int128), buf); + writeBinaryLittleEndian(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const UInt256 & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::UInt256), buf); + writeBinaryLittleEndian(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const Int256 & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Int256), buf); + writeBinaryLittleEndian(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const UUID & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::UUID), buf); + writeBinaryLittleEndian(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const IPv4 & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::IPv4), buf); + writeBinaryLittleEndian(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const IPv6 & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::IPv6), buf); + writeBinaryLittleEndian(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const DecimalField & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Decimal32), buf); + writeVarUInt(x.getScale(), buf); + writeBinaryLittleEndian(x.getValue(), buf); +} + +void FieldVisitorEncodeBinary::operator() (const DecimalField & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Decimal64), buf); + writeVarUInt(x.getScale(), buf); + writeBinaryLittleEndian(x.getValue(), buf); +} + +void FieldVisitorEncodeBinary::operator() (const DecimalField & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Decimal128), buf); + writeVarUInt(x.getScale(), buf); + writeBinaryLittleEndian(x.getValue(), buf); +} + +void FieldVisitorEncodeBinary::operator() (const DecimalField & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Decimal256), buf); + writeVarUInt(x.getScale(), buf); + writeBinaryLittleEndian(x.getValue(), buf); +} + +void FieldVisitorEncodeBinary::operator() (const AggregateFunctionStateData & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::AggregateFunctionState), buf); + writeStringBinary(x.name, buf); + writeStringBinary(x.data, buf); +} + +void FieldVisitorEncodeBinary::operator() (const Array & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Array), buf); + size_t size = x.size(); + writeVarUInt(size, buf); + for (size_t i = 0; i < size; ++i) + Field::dispatch([&buf] (const auto & value) { FieldVisitorEncodeBinary()(value, buf); }, x[i]); +} + +void FieldVisitorEncodeBinary::operator() (const Tuple & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Tuple), buf); + size_t size = x.size(); + writeVarUInt(size, buf); + for (size_t i = 0; i < size; ++i) + Field::dispatch([&buf] (const auto & value) { FieldVisitorEncodeBinary()(value, buf); }, x[i]); +} + +void FieldVisitorEncodeBinary::operator() (const Map & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Map), buf); + size_t size = x.size(); + writeVarUInt(size, buf); + for (size_t i = 0; i < size; ++i) + { + const Tuple & key_and_value = x[i].get(); + Field::dispatch([&buf] (const auto & value) { FieldVisitorEncodeBinary()(value, buf); }, key_and_value[0]); + Field::dispatch([&buf] (const auto & value) { FieldVisitorEncodeBinary()(value, buf); }, key_and_value[1]); + } +} + +void FieldVisitorEncodeBinary::operator() (const Object & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Object), buf); + + size_t size = x.size(); + writeVarUInt(size, buf); + for (const auto & [key, value] : x) + { + writeStringBinary(key, buf); + Field::dispatch([&buf] (const auto & val) { FieldVisitorEncodeBinary()(val, buf); }, value); + } +} + +void FieldVisitorEncodeBinary::operator()(const bool & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Bool), buf); + writeBinary(static_cast(x), buf); +} + +[[noreturn]] void FieldVisitorEncodeBinary::operator()(const CustomType &, WriteBuffer &) const +{ + /// TODO: Support binary encoding/decoding for custom types somehow. + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Binary encoding of Field with custom type is not supported"); +} + +template +Field decodeBigInteger(ReadBuffer & buf) +{ + T value; + readBinaryLittleEndian(value, buf); + return value; +} + +template +DecimalField decodeDecimal(ReadBuffer & buf) +{ + UInt32 scale; + readVarUInt(scale, buf); + T value; + readBinaryLittleEndian(value, buf); + return DecimalField(value, scale); +} + +template +T decodeValueLittleEndian(ReadBuffer & buf) +{ + T value; + readBinaryLittleEndian(value, buf); + return value; +} + +template +T decodeArrayLikeField(ReadBuffer & buf) +{ + size_t size; + readVarUInt(size, buf); + T value; + for (size_t i = 0; i != size; ++i) + value.push_back(decodeField(buf)); + return value; +} + +} +void encodeField(const Field & x, WriteBuffer & buf) +{ + Field::dispatch([&buf] (const auto & val) { FieldVisitorEncodeBinary()(val, buf); }, x); +} + +Field decodeField(ReadBuffer & buf) +{ + UInt8 type; + readBinary(type, buf); + switch (FieldBinaryTypeIndex(type)) + { + case FieldBinaryTypeIndex::Null: + return Null(); + case FieldBinaryTypeIndex::PositiveInfinity: + return POSITIVE_INFINITY; + case FieldBinaryTypeIndex::NegativeInfinity: + return NEGATIVE_INFINITY; + case FieldBinaryTypeIndex::Int64: + { + Int64 value; + readVarInt(value, buf); + return value; + } + case FieldBinaryTypeIndex::UInt64: + { + UInt64 value; + readVarUInt(value, buf); + return value; + } + case FieldBinaryTypeIndex::Int128: + return decodeBigInteger(buf); + case FieldBinaryTypeIndex::UInt128: + return decodeBigInteger(buf); + case FieldBinaryTypeIndex::Int256: + return decodeBigInteger(buf); + case FieldBinaryTypeIndex::UInt256: + return decodeBigInteger(buf); + case FieldBinaryTypeIndex::Float64: + return decodeValueLittleEndian(buf); + case FieldBinaryTypeIndex::Decimal32: + return decodeDecimal(buf); + case FieldBinaryTypeIndex::Decimal64: + return decodeDecimal(buf); + case FieldBinaryTypeIndex::Decimal128: + return decodeDecimal(buf); + case FieldBinaryTypeIndex::Decimal256: + return decodeDecimal(buf); + case FieldBinaryTypeIndex::String: + { + String value; + readStringBinary(value, buf); + return value; + } + case FieldBinaryTypeIndex::UUID: + return decodeValueLittleEndian(buf); + case FieldBinaryTypeIndex::IPv4: + return decodeValueLittleEndian(buf); + case FieldBinaryTypeIndex::IPv6: + return decodeValueLittleEndian(buf); + case FieldBinaryTypeIndex::Bool: + { + bool value; + readBinary(value, buf); + return value; + } + case FieldBinaryTypeIndex::Array: + return decodeArrayLikeField(buf); + case FieldBinaryTypeIndex::Tuple: + return decodeArrayLikeField(buf); + case FieldBinaryTypeIndex::Map: + { + size_t size; + readVarUInt(size, buf); + Map map; + for (size_t i = 0; i != size; ++i) + { + Tuple key_and_value; + key_and_value.push_back(decodeField(buf)); + key_and_value.push_back(decodeField(buf)); + map.push_back(key_and_value); + } + return map; + } + case FieldBinaryTypeIndex::Object: + { + size_t size; + readVarUInt(size, buf); + Object value; + for (size_t i = 0; i != size; ++i) + { + String name; + readStringBinary(name, buf); + value[name] = decodeField(buf); + } + return value; + } + case FieldBinaryTypeIndex::AggregateFunctionState: + { + String name; + readStringBinary(name, buf); + String data; + readStringBinary(data, buf); + return AggregateFunctionStateData{.name = name, .data = data}; + } + } + + throw Exception(ErrorCodes::INCORRECT_DATA, "Unknown Field type: {0:#04x}", UInt64(type)); +} + +} diff --git a/src/Common/FieldBinaryEncoding.h b/src/Common/FieldBinaryEncoding.h new file mode 100644 index 00000000000..aa6694cb03e --- /dev/null +++ b/src/Common/FieldBinaryEncoding.h @@ -0,0 +1,43 @@ +#pragma once + +#include + +namespace DB +{ + +/** +Binary encoding for Fields: +|--------------------------|--------------------------------------------------------------------------------------------------------------------------------| +| Field type | Binary encoding | +|--------------------------|--------------------------------------------------------------------------------------------------------------------------------| +| `Null` | `0x00` | +| `UInt64` | `0x01` | +| `Int64` | `0x02` | +| `UInt128` | `0x03` | +| `Int128` | `0x04` | +| `UInt128` | `0x05` | +| `Int128` | `0x06` | +| `Float64` | `0x07` | +| `Decimal32` | `0x08` | +| `Decimal64` | `0x09` | +| `Decimal128` | `0x0A` | +| `Decimal256` | `0x0B` | +| `String` | `0x0C` | +| `Array` | `0x0D...` | +| `Tuple` | `0x0E...` | +| `Map` | `0x0F...` | +| `IPv4` | `0x10` | +| `IPv6` | `0x11` | +| `UUID` | `0x12` | +| `Bool` | `0x13` | +| `Object` | `0x14...` | +| `AggregateFunctionState` | `0x15` | +| `Negative infinity` | `0xFE` | +| `Positive infinity` | `0xFF` | +|--------------------------|--------------------------------------------------------------------------------------------------------------------------------| +*/ + +void encodeField(const Field &, WriteBuffer & buf); +Field decodeField(ReadBuffer & buf); + +} diff --git a/src/Common/GWPAsan.cpp b/src/Common/GWPAsan.cpp index 488f8e2c5dc..48fbd07ec34 100644 --- a/src/Common/GWPAsan.cpp +++ b/src/Common/GWPAsan.cpp @@ -57,9 +57,12 @@ static bool guarded_alloc_initialized = [] opts.MaxSimultaneousAllocations = 1024; if (!env_options_raw || !std::string_view{env_options_raw}.contains("SampleRate")) - opts.SampleRate = 50000; + opts.SampleRate = 10000; + + const char * collect_stacktraces = std::getenv("GWP_ASAN_COLLECT_STACKTRACES"); // NOLINT(concurrency-mt-unsafe) + if (collect_stacktraces && std::string_view{collect_stacktraces} == "1") + opts.Backtrace = getBackTrace; - opts.Backtrace = getBackTrace; GuardedAlloc.init(opts); return true; diff --git a/src/Common/GetPriorityForLoadBalancing.cpp b/src/Common/GetPriorityForLoadBalancing.cpp index dc5704ef6b5..77b7867b45d 100644 --- a/src/Common/GetPriorityForLoadBalancing.cpp +++ b/src/Common/GetPriorityForLoadBalancing.cpp @@ -1,3 +1,4 @@ +#include #include #include diff --git a/src/Common/GetPriorityForLoadBalancing.h b/src/Common/GetPriorityForLoadBalancing.h index 01dae9a1289..d4c68803866 100644 --- a/src/Common/GetPriorityForLoadBalancing.h +++ b/src/Common/GetPriorityForLoadBalancing.h @@ -1,6 +1,10 @@ #pragma once -#include +#include +#include + +#include +#include namespace DB { diff --git a/src/Common/IntervalKind.h b/src/Common/IntervalKind.h index f8e1fe87276..d66ea6018d4 100644 --- a/src/Common/IntervalKind.h +++ b/src/Common/IntervalKind.h @@ -7,19 +7,20 @@ namespace DB /// Kind of a temporal interval. struct IntervalKind { + /// note: The order and numbers are important and used in binary encoding, append new interval kinds to the end of list. enum class Kind : uint8_t { - Nanosecond, - Microsecond, - Millisecond, - Second, - Minute, - Hour, - Day, - Week, - Month, - Quarter, - Year, + Nanosecond = 0x00, + Microsecond = 0x01, + Millisecond = 0x02, + Second = 0x03, + Minute = 0x04, + Hour = 0x05, + Day = 0x06, + Week = 0x07, + Month = 0x08, + Quarter = 0x09, + Year = 0x0A, }; Kind kind = Kind::Second; diff --git a/src/Common/JSONParsers/SimdJSONParser.h b/src/Common/JSONParsers/SimdJSONParser.h index 827d142266a..db679b14f52 100644 --- a/src/Common/JSONParsers/SimdJSONParser.h +++ b/src/Common/JSONParsers/SimdJSONParser.h @@ -14,6 +14,7 @@ namespace DB { + namespace ErrorCodes { extern const int CANNOT_ALLOCATE_MEMORY; diff --git a/src/Common/Jemalloc.cpp b/src/Common/Jemalloc.cpp index fbe2f62c944..d7cc246db6a 100644 --- a/src/Common/Jemalloc.cpp +++ b/src/Common/Jemalloc.cpp @@ -46,6 +46,20 @@ void checkJemallocProfilingEnabled() "set: MALLOC_CONF=background_thread:true,prof:true"); } +template +void setJemallocValue(const char * name, T value) +{ + T old_value; + size_t old_value_size = sizeof(T); + if (mallctl(name, &old_value, &old_value_size, reinterpret_cast(&value), sizeof(T))) + { + LOG_WARNING(getLogger("Jemalloc"), "mallctl for {} failed", name); + return; + } + + LOG_INFO(getLogger("Jemalloc"), "Value for {} set to {} (from {})", name, value, old_value); +} + void setJemallocProfileActive(bool value) { checkJemallocProfilingEnabled(); @@ -58,7 +72,7 @@ void setJemallocProfileActive(bool value) return; } - mallctl("prof.active", nullptr, nullptr, &value, sizeof(bool)); + setJemallocValue("prof.active", value); LOG_TRACE(getLogger("SystemJemalloc"), "Profiling is {}", value ? "enabled" : "disabled"); } @@ -84,6 +98,16 @@ std::string flushJemallocProfile(const std::string & file_prefix) return profile_dump_path; } +void setJemallocBackgroundThreads(bool enabled) +{ + setJemallocValue("background_thread", enabled); +} + +void setJemallocMaxBackgroundThreads(size_t max_threads) +{ + setJemallocValue("max_background_threads", max_threads); +} + } #endif diff --git a/src/Common/Jemalloc.h b/src/Common/Jemalloc.h index 80ff0f1a319..499a906fd3d 100644 --- a/src/Common/Jemalloc.h +++ b/src/Common/Jemalloc.h @@ -17,6 +17,10 @@ void setJemallocProfileActive(bool value); std::string flushJemallocProfile(const std::string & file_prefix); +void setJemallocBackgroundThreads(bool enabled); + +void setJemallocMaxBackgroundThreads(size_t max_threads); + } #endif diff --git a/src/Common/NamedCollections/NamedCollectionsFactory.cpp b/src/Common/NamedCollections/NamedCollectionsFactory.cpp index 14105a8651d..bc283d2be1a 100644 --- a/src/Common/NamedCollections/NamedCollectionsFactory.cpp +++ b/src/Common/NamedCollections/NamedCollectionsFactory.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include namespace DB @@ -235,7 +236,7 @@ bool NamedCollectionFactory::loadIfNot(std::lock_guard & lock) loadFromConfig(context->getConfigRef(), lock); loadFromSQL(lock); - if (metadata_storage->supportsPeriodicUpdate()) + if (metadata_storage->isReplicated()) { update_task = context->getSchedulePool().createTask("NamedCollectionsMetadataStorage", [this]{ updateFunc(); }); update_task->activate(); @@ -357,6 +358,13 @@ void NamedCollectionFactory::reloadFromSQL() add(std::move(collections), lock); } +bool NamedCollectionFactory::usesReplicatedStorage() +{ + std::lock_guard lock(mutex); + loadIfNot(lock); + return metadata_storage->isReplicated(); +} + void NamedCollectionFactory::updateFunc() { LOG_TRACE(log, "Named collections background updating thread started"); diff --git a/src/Common/NamedCollections/NamedCollectionsFactory.h b/src/Common/NamedCollections/NamedCollectionsFactory.h index 6ee5940e686..a0721ad8a50 100644 --- a/src/Common/NamedCollections/NamedCollectionsFactory.h +++ b/src/Common/NamedCollections/NamedCollectionsFactory.h @@ -34,6 +34,8 @@ public: void updateFromSQL(const ASTAlterNamedCollectionQuery & query); + bool usesReplicatedStorage(); + void loadIfNot(); void shutdown(); diff --git a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp index 32fdb25abd3..3c6561fa51e 100644 --- a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp +++ b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -67,7 +68,7 @@ public: virtual bool removeIfExists(const std::string & path) = 0; - virtual bool supportsPeriodicUpdate() const = 0; + virtual bool isReplicated() const = 0; virtual bool waitUpdate(size_t /* timeout */) { return false; } }; @@ -89,7 +90,7 @@ public: ~LocalStorage() override = default; - bool supportsPeriodicUpdate() const override { return false; } + bool isReplicated() const override { return false; } std::vector list() const override { @@ -221,7 +222,7 @@ public: ~ZooKeeperStorage() override = default; - bool supportsPeriodicUpdate() const override { return true; } + bool isReplicated() const override { return true; } /// Return true if children changed. bool waitUpdate(size_t timeout) override @@ -465,14 +466,14 @@ void NamedCollectionsMetadataStorage::writeCreateQuery(const ASTCreateNamedColle storage->write(getFileName(query.collection_name), serializeAST(*normalized_query), replace); } -bool NamedCollectionsMetadataStorage::supportsPeriodicUpdate() const +bool NamedCollectionsMetadataStorage::isReplicated() const { - return storage->supportsPeriodicUpdate(); + return storage->isReplicated(); } bool NamedCollectionsMetadataStorage::waitUpdate() { - if (!storage->supportsPeriodicUpdate()) + if (!storage->isReplicated()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Periodic updates are not supported"); const auto & config = Context::getGlobalContextInstance()->getConfigRef(); diff --git a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h index 3c089fe2fa2..c3468fbc468 100644 --- a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h +++ b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h @@ -30,7 +30,7 @@ public: /// Return true if update was made bool waitUpdate(); - bool supportsPeriodicUpdate() const; + bool isReplicated() const; private: class INamedCollectionsStorage; diff --git a/src/Common/PoolWithFailoverBase.h b/src/Common/PoolWithFailoverBase.h index 2359137012c..3d4de773a36 100644 --- a/src/Common/PoolWithFailoverBase.h +++ b/src/Common/PoolWithFailoverBase.h @@ -28,7 +28,6 @@ namespace ErrorCodes namespace ProfileEvents { - extern const Event DistributedConnectionFailTry; extern const Event DistributedConnectionFailAtAll; extern const Event DistributedConnectionSkipReadOnlyReplica; } @@ -285,7 +284,6 @@ PoolWithFailoverBase::getMany( else { LOG_WARNING(log, "Connection failed at try №{}, reason: {}", (shuffled_pool.error_count + 1), fail_message); - ProfileEvents::increment(ProfileEvents::DistributedConnectionFailTry); shuffled_pool.error_count = std::min(max_error_cap, shuffled_pool.error_count + 1); diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index d98373b6c55..871ba7cab8b 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -238,7 +238,12 @@ \ M(CannotRemoveEphemeralNode, "Number of times an error happened while trying to remove ephemeral node. This is not an issue, because our implementation of ZooKeeper library guarantee that the session will expire and the node will be removed.") \ \ - M(RegexpCreated, "Compiled regular expressions. Identical regular expressions compiled just once and cached forever.") \ + M(RegexpWithMultipleNeedlesCreated, "Regular expressions with multiple needles (VectorScan library) compiled.") \ + M(RegexpWithMultipleNeedlesGlobalCacheHit, "Number of times we fetched compiled regular expression with multiple needles (VectorScan library) from the global cache.") \ + M(RegexpWithMultipleNeedlesGlobalCacheMiss, "Number of times we failed to fetch compiled regular expression with multiple needles (VectorScan library) from the global cache.") \ + M(RegexpLocalCacheHit, "Number of times we fetched compiled regular expression from a local cache.") \ + M(RegexpLocalCacheMiss, "Number of times we failed to fetch compiled regular expression from a local cache.") \ + \ M(ContextLock, "Number of times the lock of Context was acquired or tried to acquire. This is global lock.") \ M(ContextLockWaitMicroseconds, "Context lock wait time in microseconds") \ \ @@ -447,14 +452,18 @@ The server successfully detected this situation and will download merged part fr M(QueryMemoryLimitExceeded, "Number of times when memory limit exceeded for query.") \ \ M(AzureGetObject, "Number of Azure API GetObject calls.") \ - M(AzureUploadPart, "Number of Azure blob storage API UploadPart calls") \ + M(AzureUpload, "Number of Azure blob storage API Upload calls") \ + M(AzureStageBlock, "Number of Azure blob storage API StageBlock calls") \ + M(AzureCommitBlockList, "Number of Azure blob storage API CommitBlockList calls") \ M(AzureCopyObject, "Number of Azure blob storage API CopyObject calls") \ M(AzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \ M(AzureListObjects, "Number of Azure blob storage API ListObjects calls.") \ M(AzureGetProperties, "Number of Azure blob storage API GetProperties calls.") \ \ M(DiskAzureGetObject, "Number of Disk Azure API GetObject calls.") \ - M(DiskAzureUploadPart, "Number of Disk Azure blob storage API UploadPart calls") \ + M(DiskAzureUpload, "Number of Disk Azure blob storage API Upload calls") \ + M(DiskAzureStageBlock, "Number of Disk Azure blob storage API StageBlock calls") \ + M(DiskAzureCommitBlockList, "Number of Disk Azure blob storage API CommitBlockList calls") \ M(DiskAzureCopyObject, "Number of Disk Azure blob storage API CopyObject calls") \ M(DiskAzureListObjects, "Number of Disk Azure blob storage API ListObjects calls.") \ M(DiskAzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \ @@ -499,6 +508,7 @@ The server successfully detected this situation and will download merged part fr M(FileSegmentHolderCompleteMicroseconds, "File segments holder complete() time") \ M(FileSegmentFailToIncreasePriority, "Number of times the priority was not increased due to a high contention on the cache lock") \ M(FilesystemCacheFailToReserveSpaceBecauseOfLockContention, "Number of times space reservation was skipped due to a high contention on the cache lock") \ + M(FilesystemCacheFailToReserveSpaceBecauseOfCacheResize, "Number of times space reservation was skipped due to the cache is being resized") \ M(FilesystemCacheHoldFileSegments, "Filesystem cache file segments count, which were hold") \ M(FilesystemCacheUnusedHoldFileSegments, "Filesystem cache file segments count, which were hold, but not used (because of seek or LIMIT n, etc)") \ M(FilesystemCacheFreeSpaceKeepingThreadRun, "Number of times background thread executed free space keeping job") \ @@ -559,6 +569,7 @@ The server successfully detected this situation and will download merged part fr M(AggregationPreallocatedElementsInHashTables, "How many elements were preallocated in hash tables for aggregation.") \ M(AggregationHashTablesInitializedAsTwoLevel, "How many hash tables were inited as two-level for aggregation.") \ M(AggregationOptimizedEqualRangesOfKeys, "For how many blocks optimization of equal ranges of keys was applied") \ + M(HashJoinPreallocatedElementsInHashTables, "How many elements were preallocated in hash tables for hash join.") \ \ M(MetadataFromKeeperCacheHit, "Number of times an object storage metadata request was answered from cache without making request to Keeper") \ M(MetadataFromKeeperCacheMiss, "Number of times an object storage metadata request had to be answered from Keeper") \ @@ -611,6 +622,13 @@ The server successfully detected this situation and will download merged part fr M(KeeperPacketsReceived, "Packets received by keeper server") \ M(KeeperRequestTotal, "Total requests number on keeper server") \ M(KeeperLatency, "Keeper latency") \ + M(KeeperTotalElapsedMicroseconds, "Keeper total latency for a single request") \ + M(KeeperProcessElapsedMicroseconds, "Keeper commit latency for a single request") \ + M(KeeperPreprocessElapsedMicroseconds, "Keeper preprocessing latency for a single reuquest") \ + M(KeeperStorageLockWaitMicroseconds, "Time spent waiting for acquiring Keeper storage lock") \ + M(KeeperCommitWaitElapsedMicroseconds, "Time spent waiting for certain log to be committed") \ + M(KeeperBatchMaxCount, "Number of times the size of batch was limited by the amount") \ + M(KeeperBatchMaxTotalSize, "Number of times the size of batch was limited by the total bytes size") \ M(KeeperCommits, "Number of successful commits") \ M(KeeperCommitsFailed, "Number of failed commits") \ M(KeeperSnapshotCreations, "Number of snapshots creations")\ diff --git a/src/Common/ProgressIndication.cpp b/src/Common/ProgressIndication.cpp index 0b482cb09be..8f0fb3cac6c 100644 --- a/src/Common/ProgressIndication.cpp +++ b/src/Common/ProgressIndication.cpp @@ -1,6 +1,7 @@ #include "ProgressIndication.h" #include #include +#include #include #include #include diff --git a/src/Common/ProgressIndication.h b/src/Common/ProgressIndication.h index ae39fb49bcc..188ec4ed764 100644 --- a/src/Common/ProgressIndication.h +++ b/src/Common/ProgressIndication.h @@ -1,14 +1,15 @@ #pragma once -#include -#include -#include #include #include #include #include #include +#include +#include +#include +#include namespace DB { diff --git a/src/Client/QueryFuzzer.cpp b/src/Common/QueryFuzzer.cpp similarity index 97% rename from src/Client/QueryFuzzer.cpp rename to src/Common/QueryFuzzer.cpp index f5b700ea529..161c38f20e0 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Common/QueryFuzzer.cpp @@ -68,22 +68,21 @@ Field QueryFuzzer::getRandomField(int type) { case 0: { - return bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values) - / sizeof(*bad_int64_values))]; + return bad_int64_values[fuzz_rand() % std::size(bad_int64_values)]; } case 1: { static constexpr double values[] = {NAN, INFINITY, -INFINITY, 0., -0., 0.0001, 0.5, 0.9999, 1., 1.0001, 2., 10.0001, 100.0001, 1000.0001, 1e10, 1e20, - FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % (sizeof(values) / sizeof(*values))]; + FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % std::size(values)]; } case 2: { static constexpr UInt64 scales[] = {0, 1, 2, 10}; return DecimalField( - bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values) / sizeof(*bad_int64_values))], - static_cast(scales[fuzz_rand() % (sizeof(scales) / sizeof(*scales))]) + bad_int64_values[fuzz_rand() % std::size(bad_int64_values)], + static_cast(scales[fuzz_rand() % std::size(scales)]) ); } default: @@ -165,7 +164,8 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.erase(arr.begin() + pos); - std::cerr << "erased\n"; + if (debug_stream) + *debug_stream << "erased\n"; } if (fuzz_rand() % 5 == 0) @@ -174,12 +174,14 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.insert(arr.begin() + pos, fuzzField(arr[pos])); - std::cerr << fmt::format("inserted (pos {})\n", pos); + if (debug_stream) + *debug_stream << fmt::format("inserted (pos {})\n", pos); } else { arr.insert(arr.begin(), getRandomField(0)); - std::cerr << "inserted (0)\n"; + if (debug_stream) + *debug_stream << "inserted (0)\n"; } } @@ -197,7 +199,9 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.erase(arr.begin() + pos); - std::cerr << "erased\n"; + + if (debug_stream) + *debug_stream << "erased\n"; } if (fuzz_rand() % 5 == 0) @@ -206,12 +210,16 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.insert(arr.begin() + pos, fuzzField(arr[pos])); - std::cerr << fmt::format("inserted (pos {})\n", pos); + + if (debug_stream) + *debug_stream << fmt::format("inserted (pos {})\n", pos); } else { arr.insert(arr.begin(), getRandomField(0)); - std::cerr << "inserted (0)\n"; + + if (debug_stream) + *debug_stream << "inserted (0)\n"; } } @@ -344,7 +352,8 @@ void QueryFuzzer::fuzzOrderByList(IAST * ast) } else { - std::cerr << "No random column.\n"; + if (debug_stream) + *debug_stream << "No random column.\n"; } } @@ -378,7 +387,8 @@ void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast) if (col) impl->children.insert(pos, col); else - std::cerr << "No random column.\n"; + if (debug_stream) + *debug_stream << "No random column.\n"; } // We don't have to recurse here to fuzz the children, this is handled by @@ -1361,11 +1371,15 @@ void QueryFuzzer::fuzzMain(ASTPtr & ast) collectFuzzInfoMain(ast); fuzz(ast); - std::cout << std::endl; - WriteBufferFromOStream ast_buf(std::cout, 4096); - formatAST(*ast, ast_buf, false /*highlight*/); - ast_buf.finalize(); - std::cout << std::endl << std::endl; + if (out_stream) + { + *out_stream << std::endl; + + WriteBufferFromOStream ast_buf(*out_stream, 4096); + formatAST(*ast, ast_buf, false /*highlight*/); + ast_buf.finalize(); + *out_stream << std::endl << std::endl; + } } } diff --git a/src/Client/QueryFuzzer.h b/src/Common/QueryFuzzer.h similarity index 91% rename from src/Client/QueryFuzzer.h rename to src/Common/QueryFuzzer.h index 6165e589cae..35d088809f2 100644 --- a/src/Client/QueryFuzzer.h +++ b/src/Common/QueryFuzzer.h @@ -35,9 +35,31 @@ struct ASTWindowDefinition; * queries, so you want to feed it a lot of queries to get some interesting mix * of them. Normally we feed SQL regression tests to it. */ -struct QueryFuzzer +class QueryFuzzer { - pcg64 fuzz_rand{randomSeed()}; +public: + explicit QueryFuzzer(pcg64 fuzz_rand_ = randomSeed(), std::ostream * out_stream_ = nullptr, std::ostream * debug_stream_ = nullptr) + : fuzz_rand(fuzz_rand_) + , out_stream(out_stream_) + , debug_stream(debug_stream_) + { + } + + // This is the only function you have to call -- it will modify the passed + // ASTPtr to point to new AST with some random changes. + void fuzzMain(ASTPtr & ast); + + ASTs getInsertQueriesForFuzzedTables(const String & full_query); + ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query); + void notifyQueryFailed(ASTPtr ast); + + static bool isSuitableForFuzzing(const ASTCreateQuery & create); + +private: + pcg64 fuzz_rand; + + std::ostream * out_stream = nullptr; + std::ostream * debug_stream = nullptr; // We add elements to expression lists with fixed probability. Some elements // are so large, that the expected number of elements we add to them is @@ -66,10 +88,6 @@ struct QueryFuzzer std::unordered_map index_of_fuzzed_table; std::set created_tables_hashes; - // This is the only function you have to call -- it will modify the passed - // ASTPtr to point to new AST with some random changes. - void fuzzMain(ASTPtr & ast); - // Various helper functions follow, normally you shouldn't have to call them. Field getRandomField(int type); Field fuzzField(Field field); @@ -77,9 +95,6 @@ struct QueryFuzzer ASTPtr getRandomExpressionList(); DataTypePtr fuzzDataType(DataTypePtr type); DataTypePtr getRandomType(); - ASTs getInsertQueriesForFuzzedTables(const String & full_query); - ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query); - void notifyQueryFailed(ASTPtr ast); void replaceWithColumnLike(ASTPtr & ast); void replaceWithTableLike(ASTPtr & ast); void fuzzOrderByElement(ASTOrderByElement * elem); @@ -102,8 +117,6 @@ struct QueryFuzzer void addTableLike(ASTPtr ast); void addColumnLike(ASTPtr ast); void collectFuzzInfoRecurse(ASTPtr ast); - - static bool isSuitableForFuzzing(const ASTCreateQuery & create); }; } diff --git a/src/Common/RemoteHostFilter.h b/src/Common/RemoteHostFilter.h index 2b91306f405..4c8983205fa 100644 --- a/src/Common/RemoteHostFilter.h +++ b/src/Common/RemoteHostFilter.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 239e957bdfe..34f6f0b7535 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -545,7 +545,7 @@ std::string StackTrace::toString() const return toStringCached(frame_pointers, offset, size); } -std::string StackTrace::toString(void ** frame_pointers_raw, size_t offset, size_t size) +std::string StackTrace::toString(void * const * frame_pointers_raw, size_t offset, size_t size) { __msan_unpoison(frame_pointers_raw, size * sizeof(*frame_pointers_raw)); diff --git a/src/Common/StackTrace.h b/src/Common/StackTrace.h index 4ce9a9281f3..2078828f3d7 100644 --- a/src/Common/StackTrace.h +++ b/src/Common/StackTrace.h @@ -59,7 +59,7 @@ public: const FramePointers & getFramePointers() const { return frame_pointers; } std::string toString() const; - static std::string toString(void ** frame_pointers, size_t offset, size_t size); + static std::string toString(void * const * frame_pointers, size_t offset, size_t size); static void dropCache(); /// @param fatal - if true, will process inline frames (slower) diff --git a/src/Common/ThreadFuzzer.cpp b/src/Common/ThreadFuzzer.cpp index 7b96d1f075b..719ac9593ff 100644 --- a/src/Common/ThreadFuzzer.cpp +++ b/src/Common/ThreadFuzzer.cpp @@ -12,13 +12,14 @@ #include #include -#include +#include #include #include +#include +#include #include -#include #include "config.h" // USE_JEMALLOC diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.cpp b/src/Common/ZooKeeper/ZooKeeperCommon.cpp index 48bb510e589..dff14f74681 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.cpp +++ b/src/Common/ZooKeeper/ZooKeeperCommon.cpp @@ -9,7 +9,6 @@ #include #include #include -#include namespace Coordination @@ -29,7 +28,7 @@ void ZooKeeperResponse::write(WriteBuffer & out) const Coordination::write(buf.str(), out); } -std::string ZooKeeperRequest::toString() const +std::string ZooKeeperRequest::toString(bool short_format) const { return fmt::format( "XID = {}\n" @@ -37,7 +36,7 @@ std::string ZooKeeperRequest::toString() const "Additional info:\n{}", xid, getOpNum(), - toStringImpl()); + toStringImpl(short_format)); } void ZooKeeperRequest::write(WriteBuffer & out) const @@ -60,7 +59,7 @@ void ZooKeeperSyncRequest::readImpl(ReadBuffer & in) Coordination::read(path, in); } -std::string ZooKeeperSyncRequest::toStringImpl() const +std::string ZooKeeperSyncRequest::toStringImpl(bool /*short_format*/) const { return fmt::format("path = {}", path); } @@ -91,7 +90,7 @@ void ZooKeeperReconfigRequest::readImpl(ReadBuffer & in) Coordination::read(version, in); } -std::string ZooKeeperReconfigRequest::toStringImpl() const +std::string ZooKeeperReconfigRequest::toStringImpl(bool /*short_format*/) const { return fmt::format( "joining = {}\nleaving = {}\nnew_members = {}\nversion = {}", @@ -145,7 +144,7 @@ void ZooKeeperAuthRequest::readImpl(ReadBuffer & in) Coordination::read(data, in); } -std::string ZooKeeperAuthRequest::toStringImpl() const +std::string ZooKeeperAuthRequest::toStringImpl(bool /*short_format*/) const { return fmt::format( "type = {}\n" @@ -191,7 +190,7 @@ void ZooKeeperCreateRequest::readImpl(ReadBuffer & in) is_sequential = true; } -std::string ZooKeeperCreateRequest::toStringImpl() const +std::string ZooKeeperCreateRequest::toStringImpl(bool /*short_format*/) const { return fmt::format( "path = {}\n" @@ -218,7 +217,7 @@ void ZooKeeperRemoveRequest::writeImpl(WriteBuffer & out) const Coordination::write(version, out); } -std::string ZooKeeperRemoveRequest::toStringImpl() const +std::string ZooKeeperRemoveRequest::toStringImpl(bool /*short_format*/) const { return fmt::format( "path = {}\n" @@ -245,7 +244,7 @@ void ZooKeeperExistsRequest::readImpl(ReadBuffer & in) Coordination::read(has_watch, in); } -std::string ZooKeeperExistsRequest::toStringImpl() const +std::string ZooKeeperExistsRequest::toStringImpl(bool /*short_format*/) const { return fmt::format("path = {}", path); } @@ -272,7 +271,7 @@ void ZooKeeperGetRequest::readImpl(ReadBuffer & in) Coordination::read(has_watch, in); } -std::string ZooKeeperGetRequest::toStringImpl() const +std::string ZooKeeperGetRequest::toStringImpl(bool /*short_format*/) const { return fmt::format("path = {}", path); } @@ -303,7 +302,7 @@ void ZooKeeperSetRequest::readImpl(ReadBuffer & in) Coordination::read(version, in); } -std::string ZooKeeperSetRequest::toStringImpl() const +std::string ZooKeeperSetRequest::toStringImpl(bool /*short_format*/) const { return fmt::format( "path = {}\n" @@ -334,7 +333,7 @@ void ZooKeeperListRequest::readImpl(ReadBuffer & in) Coordination::read(has_watch, in); } -std::string ZooKeeperListRequest::toStringImpl() const +std::string ZooKeeperListRequest::toStringImpl(bool /*short_format*/) const { return fmt::format("path = {}", path); } @@ -356,7 +355,7 @@ void ZooKeeperFilteredListRequest::readImpl(ReadBuffer & in) list_request_type = static_cast(read_request_type); } -std::string ZooKeeperFilteredListRequest::toStringImpl() const +std::string ZooKeeperFilteredListRequest::toStringImpl(bool /*short_format*/) const { return fmt::format( "path = {}\n" @@ -401,7 +400,7 @@ void ZooKeeperSetACLRequest::readImpl(ReadBuffer & in) Coordination::read(version, in); } -std::string ZooKeeperSetACLRequest::toStringImpl() const +std::string ZooKeeperSetACLRequest::toStringImpl(bool /*short_format*/) const { return fmt::format("path = {}\nversion = {}", path, version); } @@ -426,7 +425,7 @@ void ZooKeeperGetACLRequest::writeImpl(WriteBuffer & out) const Coordination::write(path, out); } -std::string ZooKeeperGetACLRequest::toStringImpl() const +std::string ZooKeeperGetACLRequest::toStringImpl(bool /*short_format*/) const { return fmt::format("path = {}", path); } @@ -455,7 +454,7 @@ void ZooKeeperCheckRequest::readImpl(ReadBuffer & in) Coordination::read(version, in); } -std::string ZooKeeperCheckRequest::toStringImpl() const +std::string ZooKeeperCheckRequest::toStringImpl(bool /*short_format*/) const { return fmt::format("path = {}\nversion = {}", path, version); } @@ -600,8 +599,11 @@ void ZooKeeperMultiRequest::readImpl(ReadBuffer & in) } } -std::string ZooKeeperMultiRequest::toStringImpl() const +std::string ZooKeeperMultiRequest::toStringImpl(bool short_format) const { + if (short_format) + return fmt::format("Subrequests size = {}", requests.size()); + auto out = fmt::memory_buffer(); for (const auto & request : requests) { diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.h b/src/Common/ZooKeeper/ZooKeeperCommon.h index 490c2dce4f8..fd6ec3cd375 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.h +++ b/src/Common/ZooKeeper/ZooKeeperCommon.h @@ -63,12 +63,12 @@ struct ZooKeeperRequest : virtual Request /// Writes length, xid, op_num, then the rest. void write(WriteBuffer & out) const; - std::string toString() const; + std::string toString(bool short_format = false) const; virtual void writeImpl(WriteBuffer &) const = 0; virtual void readImpl(ReadBuffer &) = 0; - virtual std::string toStringImpl() const { return ""; } + virtual std::string toStringImpl(bool /*short_format*/) const { return ""; } static std::shared_ptr read(ReadBuffer & in); @@ -98,7 +98,7 @@ struct ZooKeeperSyncRequest final : ZooKeeperRequest OpNum getOpNum() const override { return OpNum::Sync; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return false; } @@ -123,7 +123,7 @@ struct ZooKeeperReconfigRequest final : ZooKeeperRequest OpNum getOpNum() const override { return OpNum::Reconfig; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return false; } @@ -176,7 +176,7 @@ struct ZooKeeperAuthRequest final : ZooKeeperRequest OpNum getOpNum() const override { return OpNum::Auth; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return false; } @@ -229,7 +229,7 @@ struct ZooKeeperCreateRequest final : public CreateRequest, ZooKeeperRequest OpNum getOpNum() const override { return not_exists ? OpNum::CreateIfNotExists : OpNum::Create; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return false; } @@ -266,7 +266,7 @@ struct ZooKeeperRemoveRequest final : RemoveRequest, ZooKeeperRequest OpNum getOpNum() const override { return OpNum::Remove; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return false; } @@ -293,7 +293,7 @@ struct ZooKeeperExistsRequest final : ExistsRequest, ZooKeeperRequest OpNum getOpNum() const override { return OpNum::Exists; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return true; } @@ -320,7 +320,7 @@ struct ZooKeeperGetRequest final : GetRequest, ZooKeeperRequest OpNum getOpNum() const override { return OpNum::Get; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return true; } @@ -347,7 +347,7 @@ struct ZooKeeperSetRequest final : SetRequest, ZooKeeperRequest OpNum getOpNum() const override { return OpNum::Set; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return false; } @@ -375,7 +375,7 @@ struct ZooKeeperListRequest : ListRequest, ZooKeeperRequest OpNum getOpNum() const override { return OpNum::List; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return true; } @@ -395,7 +395,7 @@ struct ZooKeeperFilteredListRequest final : ZooKeeperListRequest OpNum getOpNum() const override { return OpNum::FilteredList; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; size_t bytesSize() const override { return ZooKeeperListRequest::bytesSize() + sizeof(list_request_type); } }; @@ -428,7 +428,7 @@ struct ZooKeeperCheckRequest : CheckRequest, ZooKeeperRequest OpNum getOpNum() const override { return not_exists ? OpNum::CheckNotExists : OpNum::Check; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return true; } @@ -469,7 +469,7 @@ struct ZooKeeperSetACLRequest final : SetACLRequest, ZooKeeperRequest OpNum getOpNum() const override { return OpNum::SetACL; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return false; } @@ -490,7 +490,7 @@ struct ZooKeeperGetACLRequest final : GetACLRequest, ZooKeeperRequest OpNum getOpNum() const override { return OpNum::GetACL; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return true; } @@ -516,7 +516,7 @@ struct ZooKeeperMultiRequest final : MultiRequest, ZooKeeperRequest void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override; diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 8653af51308..2728f953bea 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -996,6 +996,10 @@ void ZooKeeper::receiveEvent() if (request_info.callback) request_info.callback(*response); + + /// Finalize current session if we receive a hardware error from ZooKeeper + if (err != Error::ZOK && isHardwareError(err)) + finalize(/*error_send*/ false, /*error_receive*/ true, fmt::format("Hardware error: {}", err)); } diff --git a/src/Common/assert_cast.h b/src/Common/assert_cast.h index 0b73ba1cc12..f9d0bf0e595 100644 --- a/src/Common/assert_cast.h +++ b/src/Common/assert_cast.h @@ -25,7 +25,7 @@ namespace DB template inline To assert_cast(From && from) { -#ifndef NDEBUG +#ifdef ABORT_ON_LOGICAL_ERROR try { if constexpr (std::is_pointer_v) diff --git a/src/Common/config.h.in b/src/Common/config.h.in index ad2ca2652d1..f68701d5d10 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -63,6 +63,7 @@ #cmakedefine01 USE_BCRYPT #cmakedefine01 USE_LIBARCHIVE #cmakedefine01 USE_POCKETFFT +#cmakedefine01 USE_PROMETHEUS_PROTOBUFS /// This is needed for .incbin in assembly. For some reason, include paths don't work there in presence of LTO. /// That's why we use absolute paths. diff --git a/src/Common/examples/CMakeLists.txt b/src/Common/examples/CMakeLists.txt index 410576c2b4a..c133e9f5617 100644 --- a/src/Common/examples/CMakeLists.txt +++ b/src/Common/examples/CMakeLists.txt @@ -31,8 +31,10 @@ target_link_libraries (arena_with_free_lists PRIVATE dbms) clickhouse_add_executable (lru_hash_map_perf lru_hash_map_perf.cpp) target_link_libraries (lru_hash_map_perf PRIVATE dbms) -clickhouse_add_executable (thread_creation_latency thread_creation_latency.cpp) -target_link_libraries (thread_creation_latency PRIVATE clickhouse_common_io) +if (OS_LINUX) + clickhouse_add_executable (thread_creation_latency thread_creation_latency.cpp) + target_link_libraries (thread_creation_latency PRIVATE clickhouse_common_io) +endif() clickhouse_add_executable (array_cache array_cache.cpp) target_link_libraries (array_cache PRIVATE clickhouse_common_io) diff --git a/src/Common/formatIPv6.h b/src/Common/formatIPv6.h index bb83e0381ef..abeda95ed0d 100644 --- a/src/Common/formatIPv6.h +++ b/src/Common/formatIPv6.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include diff --git a/src/Common/mysqlxx/Pool.cpp b/src/Common/mysqlxx/Pool.cpp index cc5b18214c8..546e9e91dc7 100644 --- a/src/Common/mysqlxx/Pool.cpp +++ b/src/Common/mysqlxx/Pool.cpp @@ -228,7 +228,6 @@ Pool::Entry Pool::tryGet() for (auto connection_it = connections.cbegin(); connection_it != connections.cend();) { Connection * connection_ptr = *connection_it; - /// Fixme: There is a race condition here b/c we do not synchronize with Pool::Entry's copy-assignment operator if (connection_ptr->ref_count == 0) { { diff --git a/src/Common/mysqlxx/mysqlxx/Pool.h b/src/Common/mysqlxx/mysqlxx/Pool.h index 6e509d8bdd6..f1ef81e28dd 100644 --- a/src/Common/mysqlxx/mysqlxx/Pool.h +++ b/src/Common/mysqlxx/mysqlxx/Pool.h @@ -64,17 +64,6 @@ public: decrementRefCount(); } - Entry & operator= (const Entry & src) /// NOLINT - { - pool = src.pool; - if (data) - decrementRefCount(); - data = src.data; - if (data) - incrementRefCount(); - return * this; - } - bool isNull() const { return data == nullptr; diff --git a/src/Common/mysqlxx/tests/mysqlxx_pool_test.cpp b/src/Common/mysqlxx/tests/mysqlxx_pool_test.cpp index 61d6a117285..121767edc84 100644 --- a/src/Common/mysqlxx/tests/mysqlxx_pool_test.cpp +++ b/src/Common/mysqlxx/tests/mysqlxx_pool_test.cpp @@ -13,13 +13,11 @@ mysqlxx::Pool::Entry getWithFailover(mysqlxx::Pool & connections_pool) constexpr size_t max_tries = 3; - mysqlxx::Pool::Entry worker_connection; - for (size_t try_no = 1; try_no <= max_tries; ++try_no) { try { - worker_connection = connections_pool.tryGet(); + mysqlxx::Pool::Entry worker_connection = connections_pool.tryGet(); if (!worker_connection.isNull()) { diff --git a/src/Compression/CompressionFactory.cpp b/src/Compression/CompressionFactory.cpp index 68e0131c91b..2e7aa0d086f 100644 --- a/src/Compression/CompressionFactory.cpp +++ b/src/Compression/CompressionFactory.cpp @@ -185,7 +185,6 @@ void registerCodecDeflateQpl(CompressionCodecFactory & factory); /// Keeper use only general-purpose codecs, so we don't need these special codecs /// in standalone build -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD void registerCodecDelta(CompressionCodecFactory & factory); void registerCodecT64(CompressionCodecFactory & factory); void registerCodecDoubleDelta(CompressionCodecFactory & factory); @@ -193,7 +192,6 @@ void registerCodecGorilla(CompressionCodecFactory & factory); void registerCodecEncrypted(CompressionCodecFactory & factory); void registerCodecFPC(CompressionCodecFactory & factory); void registerCodecGCD(CompressionCodecFactory & factory); -#endif CompressionCodecFactory::CompressionCodecFactory() { @@ -205,7 +203,6 @@ CompressionCodecFactory::CompressionCodecFactory() #endif registerCodecLZ4HC(*this); registerCodecMultiple(*this); -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD registerCodecDelta(*this); registerCodecT64(*this); registerCodecDoubleDelta(*this); @@ -216,7 +213,6 @@ CompressionCodecFactory::CompressionCodecFactory() registerCodecDeflateQpl(*this); #endif registerCodecGCD(*this); -#endif default_codec = get("LZ4", {}); } diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h index 2e8dbe75e90..0f833c17e1b 100644 --- a/src/Coordination/Changelog.h +++ b/src/Coordination/Changelog.h @@ -5,6 +5,8 @@ #include #include +#include +#include #include #include diff --git a/src/Coordination/CoordinationSettings.cpp b/src/Coordination/CoordinationSettings.cpp index 05f691ca76b..d72d39fd7e1 100644 --- a/src/Coordination/CoordinationSettings.cpp +++ b/src/Coordination/CoordinationSettings.cpp @@ -169,6 +169,23 @@ void KeeperConfigurationAndSettings::dump(WriteBufferFromOwnString & buf) const writeText("async_replication=", buf); write_bool(coordination_settings->async_replication); + + writeText("latest_logs_cache_size_threshold=", buf); + write_int(coordination_settings->latest_logs_cache_size_threshold); + writeText("commit_logs_cache_size_threshold=", buf); + write_int(coordination_settings->commit_logs_cache_size_threshold); + + writeText("disk_move_retries_wait_ms=", buf); + write_int(coordination_settings->disk_move_retries_wait_ms); + writeText("disk_move_retries_during_init=", buf); + write_int(coordination_settings->disk_move_retries_during_init); + + writeText("log_slow_total_threshold_ms=", buf); + write_int(coordination_settings->log_slow_total_threshold_ms); + writeText("log_slow_cpu_threshold_ms=", buf); + write_int(coordination_settings->log_slow_cpu_threshold_ms); + writeText("log_slow_connection_operation_threshold_ms=", buf); + write_int(coordination_settings->log_slow_connection_operation_threshold_ms); } KeeperConfigurationAndSettingsPtr diff --git a/src/Coordination/CoordinationSettings.h b/src/Coordination/CoordinationSettings.h index a32552616ee..6e23a56ef97 100644 --- a/src/Coordination/CoordinationSettings.h +++ b/src/Coordination/CoordinationSettings.h @@ -58,7 +58,10 @@ struct Settings; M(UInt64, latest_logs_cache_size_threshold, 1 * 1024 * 1024 * 1024, "Maximum total size of in-memory cache of latest log entries.", 0) \ M(UInt64, commit_logs_cache_size_threshold, 500 * 1024 * 1024, "Maximum total size of in-memory cache of log entries needed next for commit.", 0) \ M(UInt64, disk_move_retries_wait_ms, 1000, "How long to wait between retries after a failure which happened while a file was being moved between disks.", 0) \ - M(UInt64, disk_move_retries_during_init, 100, "The amount of retries after a failure which happened while a file was being moved between disks during initialization.", 0) + M(UInt64, disk_move_retries_during_init, 100, "The amount of retries after a failure which happened while a file was being moved between disks during initialization.", 0) \ + M(UInt64, log_slow_total_threshold_ms, 5000, "Requests for which the total latency is larger than this settings will be logged", 0) \ + M(UInt64, log_slow_cpu_threshold_ms, 100, "Requests for which the CPU (preprocessing and processing) latency is larger than this settings will be logged", 0) \ + M(UInt64, log_slow_connection_operation_threshold_ms, 1000, "Log message if a certain operation took too long inside a single connection", 0) DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS) diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp index 8de9f8dfa1c..d0ecbe744a0 100644 --- a/src/Coordination/FourLetterCommand.cpp +++ b/src/Coordination/FourLetterCommand.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include diff --git a/src/Coordination/FourLetterCommand.h b/src/Coordination/FourLetterCommand.h index 82b30a0b5f6..e3289982b0d 100644 --- a/src/Coordination/FourLetterCommand.h +++ b/src/Coordination/FourLetterCommand.h @@ -2,8 +2,11 @@ #include "config.h" +#include +#include #include #include +#include #include namespace DB diff --git a/src/Coordination/KeeperConstants.cpp b/src/Coordination/KeeperConstants.cpp index 51bf037c1c9..7589e3393be 100644 --- a/src/Coordination/KeeperConstants.cpp +++ b/src/Coordination/KeeperConstants.cpp @@ -150,12 +150,18 @@ M(S3PutObject) \ M(S3GetObject) \ \ - M(AzureUploadPart) \ - M(DiskAzureUploadPart) \ + M(AzureUpload) \ + M(DiskAzureUpload) \ + M(AzureStageBlock) \ + M(DiskAzureStageBlock) \ + M(AzureCommitBlockList) \ + M(DiskAzureCommitBlockList) \ M(AzureCopyObject) \ M(DiskAzureCopyObject) \ M(AzureDeleteObjects) \ + M(DiskAzureDeleteObjects) \ M(AzureListObjects) \ + M(DiskAzureListObjects) \ \ M(DiskS3DeleteObjects) \ M(DiskS3CopyObject) \ @@ -238,6 +244,13 @@ M(KeeperPacketsReceived) \ M(KeeperRequestTotal) \ M(KeeperLatency) \ + M(KeeperTotalElapsedMicroseconds) \ + M(KeeperProcessElapsedMicroseconds) \ + M(KeeperPreprocessElapsedMicroseconds) \ + M(KeeperStorageLockWaitMicroseconds) \ + M(KeeperCommitWaitElapsedMicroseconds) \ + M(KeeperBatchMaxCount) \ + M(KeeperBatchMaxTotalSize) \ M(KeeperCommits) \ M(KeeperCommitsFailed) \ M(KeeperSnapshotCreations) \ @@ -359,7 +372,7 @@ extern const std::vector keeper_profile_events M(AsynchronousReadWait) \ M(S3Requests) \ M(KeeperAliveConnections) \ - M(KeeperOutstandingRequets) \ + M(KeeperOutstandingRequests) \ M(ThreadsInOvercommitTracker) \ M(IOUringPendingEvents) \ M(IOUringInFlightEvents) \ diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index a36a074ce89..4eab03e3c2c 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -3,6 +3,7 @@ #include +#include #include #include #include diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index b4389da082d..8c7e6405153 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -5,7 +5,8 @@ #include #include -#include "Common/ZooKeeper/IKeeper.h" +#include +#include #include #include #include @@ -13,6 +14,7 @@ #include #include #include +#include #include @@ -28,7 +30,14 @@ namespace CurrentMetrics { extern const Metric KeeperAliveConnections; - extern const Metric KeeperOutstandingRequets; + extern const Metric KeeperOutstandingRequests; +} + +namespace ProfileEvents +{ + extern const Event KeeperCommitWaitElapsedMicroseconds; + extern const Event KeeperBatchMaxCount; + extern const Event KeeperBatchMaxTotalSize; } using namespace std::chrono_literals; @@ -119,6 +128,7 @@ void KeeperDispatcher::requestThread() auto coordination_settings = configuration_and_settings->coordination_settings; uint64_t max_wait = coordination_settings->operation_timeout_ms.totalMilliseconds(); uint64_t max_batch_bytes_size = coordination_settings->max_requests_batch_bytes_size; + size_t max_batch_size = coordination_settings->max_requests_batch_size; /// The code below do a very simple thing: batch all write (quorum) requests into vector until /// previous write batch is not finished or max_batch size achieved. The main complexity goes from @@ -131,7 +141,7 @@ void KeeperDispatcher::requestThread() { if (requests_queue->tryPop(request, max_wait)) { - CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets); + CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequests); if (shutdown_called) break; @@ -163,7 +173,7 @@ void KeeperDispatcher::requestThread() /// Trying to get batch requests as fast as possible if (requests_queue->tryPop(request)) { - CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets); + CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequests); /// Don't append read request into batch, we have to process them separately if (!coordination_settings->quorum_reads && request.request->isReadRequest()) { @@ -188,7 +198,6 @@ void KeeperDispatcher::requestThread() return false; }; - size_t max_batch_size = coordination_settings->max_requests_batch_size; while (!shutdown_called && current_batch.size() < max_batch_size && !has_reconfig_request && current_batch_bytes_size < max_batch_bytes_size && try_get_request()) ; @@ -225,6 +234,12 @@ void KeeperDispatcher::requestThread() /// Process collected write requests batch if (!current_batch.empty()) { + if (current_batch.size() == max_batch_size) + ProfileEvents::increment(ProfileEvents::KeeperBatchMaxCount, 1); + + if (current_batch_bytes_size == max_batch_bytes_size) + ProfileEvents::increment(ProfileEvents::KeeperBatchMaxTotalSize, 1); + LOG_TRACE(log, "Processing requests batch, size: {}, bytes: {}", current_batch.size(), current_batch_bytes_size); auto result = server->putRequestBatch(current_batch); @@ -243,6 +258,8 @@ void KeeperDispatcher::requestThread() /// If we will execute read or reconfig next, we have to process result now if (execute_requests_after_write) { + Stopwatch watch; + SCOPE_EXIT(ProfileEvents::increment(ProfileEvents::KeeperCommitWaitElapsedMicroseconds, watch.elapsedMicroseconds())); if (prev_result) result_buf = forceWaitAndProcessResult( prev_result, prev_batch, /*clear_requests_on_success=*/!execute_requests_after_write); @@ -305,7 +322,7 @@ void KeeperDispatcher::responseThread() try { - setResponse(response_for_session.session_id, response_for_session.response); + setResponse(response_for_session.session_id, response_for_session.response, response_for_session.request); } catch (...) { @@ -319,19 +336,13 @@ void KeeperDispatcher::snapshotThread() { setThreadName("KeeperSnpT"); const auto & shutdown_called = keeper_context->isShutdownCalled(); - while (!shutdown_called) + CreateSnapshotTask task; + while (snapshots_queue.pop(task)) { - CreateSnapshotTask task; - if (!snapshots_queue.pop(task)) - break; - try { auto snapshot_file_info = task.create_snapshot(std::move(task.snapshot), /*execute_only_cleanup=*/shutdown_called); - if (shutdown_called) - break; - if (!snapshot_file_info) continue; @@ -346,7 +357,7 @@ void KeeperDispatcher::snapshotThread() } } -void KeeperDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response) +void KeeperDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response, Coordination::ZooKeeperRequestPtr request) { std::lock_guard lock(session_to_response_callback_mutex); @@ -360,7 +371,7 @@ void KeeperDispatcher::setResponse(int64_t session_id, const Coordination::ZooKe return; auto callback = new_session_id_response_callback[session_id_resp.internal_id]; - callback(response); + callback(response, request); new_session_id_response_callback.erase(session_id_resp.internal_id); } else /// Normal response, just write to client @@ -371,7 +382,7 @@ void KeeperDispatcher::setResponse(int64_t session_id, const Coordination::ZooKe if (session_response_callback == session_to_response_callback.end()) return; - session_response_callback->second(response); + session_response_callback->second(response, request); /// Session closed, no more writes if (response->xid != Coordination::WATCH_XID && response->getOpNum() == Coordination::OpNum::Close) @@ -410,7 +421,7 @@ bool KeeperDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & requ { throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Cannot push request to queue within operation timeout"); } - CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequets); + CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequests); return true; } @@ -534,7 +545,7 @@ void KeeperDispatcher::shutdown() /// Set session expired for all pending requests while (requests_queue && requests_queue->tryPop(request_for_session)) { - CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets); + CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequests); auto response = request_for_session.request->makeResponse(); response->error = Coordination::Error::ZSESSIONEXPIRED; setResponse(request_for_session.session_id, response); @@ -661,7 +672,7 @@ void KeeperDispatcher::sessionCleanerTask() }; if (!requests_queue->push(std::move(request_info))) LOG_INFO(log, "Cannot push close request to queue while cleaning outdated sessions"); - CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequets); + CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequests); /// Remove session from registered sessions finishSession(dead_session); @@ -762,21 +773,27 @@ int64_t KeeperDispatcher::getSessionID(int64_t session_timeout_ms) { std::lock_guard lock(session_to_response_callback_mutex); - new_session_id_response_callback[request->internal_id] = [promise, internal_id = request->internal_id] (const Coordination::ZooKeeperResponsePtr & response) + new_session_id_response_callback[request->internal_id] + = [promise, internal_id = request->internal_id]( + const Coordination::ZooKeeperResponsePtr & response, Coordination::ZooKeeperRequestPtr /*request*/) { if (response->getOpNum() != Coordination::OpNum::SessionID) - promise->set_exception(std::make_exception_ptr(Exception(ErrorCodes::LOGICAL_ERROR, - "Incorrect response of type {} instead of SessionID response", response->getOpNum()))); + promise->set_exception(std::make_exception_ptr(Exception( + ErrorCodes::LOGICAL_ERROR, "Incorrect response of type {} instead of SessionID response", response->getOpNum()))); auto session_id_response = dynamic_cast(*response); if (session_id_response.internal_id != internal_id) { - promise->set_exception(std::make_exception_ptr(Exception(ErrorCodes::LOGICAL_ERROR, - "Incorrect response with internal id {} instead of {}", session_id_response.internal_id, internal_id))); + promise->set_exception(std::make_exception_ptr(Exception( + ErrorCodes::LOGICAL_ERROR, + "Incorrect response with internal id {} instead of {}", + session_id_response.internal_id, + internal_id))); } if (response->error != Coordination::Error::ZOK) - promise->set_exception(std::make_exception_ptr(zkutil::KeeperException::fromMessage(response->error, "SessionID request failed with error"))); + promise->set_exception( + std::make_exception_ptr(zkutil::KeeperException::fromMessage(response->error, "SessionID request failed with error"))); promise->set_value(session_id_response.session_id); }; @@ -785,7 +802,7 @@ int64_t KeeperDispatcher::getSessionID(int64_t session_timeout_ms) /// Push new session request to queue if (!requests_queue->tryPush(std::move(request_info), session_timeout_ms)) throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Cannot push session id request to queue within session timeout"); - CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequets); + CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequests); if (future.wait_for(std::chrono::milliseconds(session_timeout_ms)) != std::future_status::ready) throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Cannot receive session id within session timeout"); diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index 2e0c73131d5..41fb8451ee4 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -20,7 +19,7 @@ namespace DB { -using ZooKeeperResponseCallback = std::function; +using ZooKeeperResponseCallback = std::function; /// Highlevel wrapper for ClickHouse Keeper. /// Process user requests via consensus and return responses. @@ -92,7 +91,7 @@ private: void clusterUpdateWithReconfigDisabledThread(); void clusterUpdateThread(); - void setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response); + void setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response, Coordination::ZooKeeperRequestPtr request = nullptr); /// Add error responses for requests to responses queue. /// Clears requests. diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 09a1132e96d..dc9658e895f 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -3,9 +3,7 @@ #include "config.h" -#include -#include -#include +#include #include #include #include @@ -30,6 +28,10 @@ #include #include +#include +#include +#include + #pragma clang diagnostic ignored "-Wdeprecated-declarations" #include diff --git a/src/Coordination/KeeperServer.h b/src/Coordination/KeeperServer.h index 5e45a552cba..2eb630b4dd6 100644 --- a/src/Coordination/KeeperServer.h +++ b/src/Coordination/KeeperServer.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include #include @@ -17,6 +16,9 @@ namespace DB using RaftAppendResult = nuraft::ptr>>; +struct KeeperConfigurationAndSettings; +using KeeperConfigurationAndSettingsPtr = std::shared_ptr; + class KeeperServer { private: diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index e4d661dfe17..d9b9ba528ee 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -1,12 +1,15 @@ #include #include +#include +#include +#include +#include #include #include -#include #include -#include #include #include +#include #include #include #include @@ -17,7 +20,6 @@ #include #include #include -#include namespace ProfileEvents @@ -31,6 +33,7 @@ namespace ProfileEvents extern const Event KeeperSnapshotApplysFailed; extern const Event KeeperReadSnapshot; extern const Event KeeperSaveSnapshot; + extern const Event KeeperStorageLockWaitMicroseconds; } namespace DB @@ -151,6 +154,20 @@ void assertDigest( } } +struct TSA_SCOPED_LOCKABLE LockGuardWithStats final +{ + std::unique_lock lock; + explicit LockGuardWithStats(std::mutex & mutex) TSA_ACQUIRE(mutex) + { + Stopwatch watch; + std::unique_lock l(mutex); + ProfileEvents::increment(ProfileEvents::KeeperStorageLockWaitMicroseconds, watch.elapsedMicroseconds()); + lock = std::move(l); + } + + ~LockGuardWithStats() TSA_RELEASE() = default; +}; + } nuraft::ptr KeeperStateMachine::pre_commit(uint64_t log_idx, nuraft::buffer & data) @@ -272,7 +289,7 @@ bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & req if (op_num == Coordination::OpNum::SessionID || op_num == Coordination::OpNum::Reconfig) return true; - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); if (storage->isFinalized()) return false; @@ -302,7 +319,7 @@ bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & req void KeeperStateMachine::reconfigure(const KeeperStorage::RequestForSession& request_for_session) { - std::lock_guard _(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); KeeperStorage::ResponseForSession response = processReconfiguration(request_for_session); if (!responses_queue.push(response)) { @@ -391,7 +408,7 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n if (!keeper_context->localLogsPreprocessed() && !preprocess(*request_for_session)) return nullptr; - auto try_push = [this](const KeeperStorage::ResponseForSession& response) + auto try_push = [&](const KeeperStorage::ResponseForSession & response) { if (!responses_queue.push(response)) { @@ -416,8 +433,9 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n KeeperStorage::ResponseForSession response_for_session; response_for_session.session_id = -1; response_for_session.response = response; + response_for_session.request = request_for_session->request; - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); session_id = storage->getSessionID(session_id_request.session_timeout_ms); LOG_DEBUG(log, "Session ID response {} with timeout {}", session_id, session_id_request.session_timeout_ms); response->session_id = session_id; @@ -426,16 +444,23 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n else { if (op_num == Coordination::OpNum::Close) + { std::lock_guard lock(request_cache_mutex); parsed_request_cache.erase(request_for_session->session_id); } - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); KeeperStorage::ResponsesForSessions responses_for_sessions = storage->processRequest(request_for_session->request, request_for_session->session_id, request_for_session->zxid); + for (auto & response_for_session : responses_for_sessions) + { + if (response_for_session.response->xid != Coordination::WATCH_XID) + response_for_session.request = request_for_session->request; + try_push(response_for_session); + } if (keeper_context->digestEnabled() && request_for_session->digest) assertDigest(*request_for_session->digest, storage->getNodesDigest(true), *request_for_session->request, request_for_session->log_idx, true); @@ -482,7 +507,7 @@ bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s) } { /// deserialize and apply snapshot to storage - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); SnapshotDeserializationResult snapshot_deserialization_result; if (latest_snapshot_ptr) @@ -534,7 +559,7 @@ void KeeperStateMachine::rollbackRequest(const KeeperStorage::RequestForSession if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID) return; - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); storage->rollbackRequest(request_for_session.zxid, allow_missing); } @@ -561,7 +586,7 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res auto snapshot_meta_copy = nuraft::snapshot::deserialize(*snp_buf); CreateSnapshotTask snapshot_task; { /// lock storage for a short period time to turn on "snapshot mode". After that we can read consistent storage state without locking. - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); snapshot_task.snapshot = std::make_shared(storage.get(), snapshot_meta_copy, getClusterConfig()); } @@ -569,7 +594,7 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res snapshot_task.create_snapshot = [this, when_done](KeeperStorageSnapshotPtr && snapshot, bool execute_only_cleanup) { nuraft::ptr exception(nullptr); - bool ret = true; + bool ret = false; if (!execute_only_cleanup) { try @@ -599,7 +624,8 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res else { auto snapshot_buf = snapshot_manager.serializeSnapshotToBuffer(*snapshot); - auto snapshot_info = snapshot_manager.serializeSnapshotBufferToDisk(*snapshot_buf, snapshot->snapshot_meta->get_last_log_idx()); + auto snapshot_info = snapshot_manager.serializeSnapshotBufferToDisk( + *snapshot_buf, snapshot->snapshot_meta->get_last_log_idx()); latest_snapshot_info = std::move(snapshot_info); latest_snapshot_buf = std::move(snapshot_buf); } @@ -612,18 +638,19 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res latest_snapshot_info->path); } } + + ret = true; } catch (...) { ProfileEvents::increment(ProfileEvents::KeeperSnapshotCreationsFailed); LOG_TRACE(log, "Exception happened during snapshot"); tryLogCurrentException(log); - ret = false; } } { /// Destroy snapshot with lock - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); LOG_TRACE(log, "Clearing garbage after snapshot"); /// Turn off "snapshot mode" and clear outdate part of storage state storage->clearGarbageAfterSnapshot(); @@ -764,107 +791,112 @@ int KeeperStateMachine::read_logical_snp_obj( void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSession & request_for_session) { /// Pure local request, just process it with storage - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); auto responses = storage->processRequest( request_for_session.request, request_for_session.session_id, std::nullopt, true /*check_acl*/, true /*is_local*/); - for (const auto & response : responses) - if (!responses_queue.push(response)) - LOG_WARNING(log, "Failed to push response with session id {} to the queue, probably because of shutdown", response.session_id); + + for (auto & response_for_session : responses) + { + if (response_for_session.response->xid != Coordination::WATCH_XID) + response_for_session.request = request_for_session.request; + if (!responses_queue.push(response_for_session)) + LOG_WARNING(log, "Failed to push response with session id {} to the queue, probably because of shutdown", response_for_session.session_id); + } } void KeeperStateMachine::shutdownStorage() { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); storage->finalize(); } std::vector KeeperStateMachine::getDeadSessions() { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getDeadSessions(); } int64_t KeeperStateMachine::getNextZxid() const { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getNextZXID(); } KeeperStorage::Digest KeeperStateMachine::getNodesDigest() const { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getNodesDigest(false); } uint64_t KeeperStateMachine::getLastProcessedZxid() const { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getZXID(); } uint64_t KeeperStateMachine::getNodesCount() const { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getNodesCount(); } uint64_t KeeperStateMachine::getTotalWatchesCount() const { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getTotalWatchesCount(); } uint64_t KeeperStateMachine::getWatchedPathsCount() const { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getWatchedPathsCount(); } uint64_t KeeperStateMachine::getSessionsWithWatchesCount() const { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getSessionsWithWatchesCount(); } uint64_t KeeperStateMachine::getTotalEphemeralNodesCount() const { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getTotalEphemeralNodesCount(); } uint64_t KeeperStateMachine::getSessionWithEphemeralNodesCount() const { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getSessionWithEphemeralNodesCount(); } void KeeperStateMachine::dumpWatches(WriteBufferFromOwnString & buf) const { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); storage->dumpWatches(buf); } void KeeperStateMachine::dumpWatchesByPath(WriteBufferFromOwnString & buf) const { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); storage->dumpWatchesByPath(buf); } void KeeperStateMachine::dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); storage->dumpSessionsAndEphemerals(buf); } uint64_t KeeperStateMachine::getApproximateDataSize() const { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getApproximateDataSize(); } uint64_t KeeperStateMachine::getKeyArenaSize() const { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getArenaDataSize(); } @@ -905,7 +937,7 @@ ClusterConfigPtr KeeperStateMachine::getClusterConfig() const void KeeperStateMachine::recalculateStorageStats() { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); LOG_INFO(log, "Recalculating storage stats"); storage->recalculateStats(); LOG_INFO(log, "Done recalculating storage stats"); diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index ee6109f0a17..6357fd170df 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include #include @@ -182,8 +181,7 @@ private: KeeperSnapshotManagerS3 * snapshot_manager_s3; - KeeperStorage::ResponseForSession processReconfiguration( - const KeeperStorage::RequestForSession& request_for_session) + KeeperStorage::ResponseForSession processReconfiguration(const KeeperStorage::RequestForSession & request_for_session) TSA_REQUIRES(storage_and_responses_lock); }; } diff --git a/src/Coordination/KeeperStateManager.h b/src/Coordination/KeeperStateManager.h index 60f6dbe7b62..e5ab5fb4807 100644 --- a/src/Coordination/KeeperStateManager.h +++ b/src/Coordination/KeeperStateManager.h @@ -1,14 +1,13 @@ #pragma once -#include #include #include -#include +#include +#include #include #include #include "Coordination/KeeperStateMachine.h" #include "Coordination/RaftServerConfig.h" -#include namespace DB { diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index d6225baaf4c..efb57ae96e2 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -20,11 +20,12 @@ #include #include +#include #include #include +#include #include #include -#include #include #include @@ -40,6 +41,8 @@ namespace ProfileEvents extern const Event KeeperGetRequest; extern const Event KeeperListRequest; extern const Event KeeperExistsRequest; + extern const Event KeeperPreprocessElapsedMicroseconds; + extern const Event KeeperProcessElapsedMicroseconds; } namespace DB @@ -2309,6 +2312,20 @@ void KeeperStorage::preprocessRequest( std::optional digest, int64_t log_idx) { + Stopwatch watch; + SCOPE_EXIT({ + auto elapsed = watch.elapsedMicroseconds(); + if (auto elapsed_ms = elapsed / 1000; elapsed_ms > keeper_context->getCoordinationSettings()->log_slow_cpu_threshold_ms) + { + LOG_INFO( + getLogger("KeeperStorage"), + "Preprocessing a request took too long ({}ms).\nRequest info: {}", + elapsed_ms, + zk_request->toString(/*short_format=*/true)); + } + ProfileEvents::increment(ProfileEvents::KeeperPreprocessElapsedMicroseconds, elapsed); + }); + if (!initialized) throw Exception(ErrorCodes::LOGICAL_ERROR, "KeeperStorage system nodes are not initialized"); @@ -2409,6 +2426,20 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( bool check_acl, bool is_local) { + Stopwatch watch; + SCOPE_EXIT({ + auto elapsed = watch.elapsedMicroseconds(); + if (auto elapsed_ms = elapsed / 1000; elapsed_ms > keeper_context->getCoordinationSettings()->log_slow_cpu_threshold_ms) + { + LOG_INFO( + getLogger("KeeperStorage"), + "Processing a request took too long ({}ms).\nRequest info: {}", + elapsed_ms, + zk_request->toString(/*short_format=*/true)); + } + ProfileEvents::increment(ProfileEvents::KeeperProcessElapsedMicroseconds, elapsed); + }); + if (!initialized) throw Exception(ErrorCodes::LOGICAL_ERROR, "KeeperStorage system nodes are not initialized"); diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h index d5e9a64e69c..f7812ad8877 100644 --- a/src/Coordination/KeeperStorage.h +++ b/src/Coordination/KeeperStorage.h @@ -206,6 +206,7 @@ public: { int64_t session_id; Coordination::ZooKeeperResponsePtr response; + Coordination::ZooKeeperRequestPtr request = nullptr; }; using ResponsesForSessions = std::vector; diff --git a/src/Coordination/SnapshotableHashTable.h b/src/Coordination/SnapshotableHashTable.h index 70858930115..5f2b14e17b0 100644 --- a/src/Coordination/SnapshotableHashTable.h +++ b/src/Coordination/SnapshotableHashTable.h @@ -3,6 +3,7 @@ #include #include +#include namespace DB { diff --git a/src/Coordination/Standalone/Context.cpp b/src/Coordination/Standalone/Context.cpp deleted file mode 100644 index 2017adcc58d..00000000000 --- a/src/Coordination/Standalone/Context.cpp +++ /dev/null @@ -1,486 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include - -#include -#include - -namespace ProfileEvents -{ - extern const Event ContextLock; - extern const Event ContextLockWaitMicroseconds; -} - -namespace CurrentMetrics -{ - extern const Metric ContextLockWait; - extern const Metric BackgroundSchedulePoolTask; - extern const Metric BackgroundSchedulePoolSize; - extern const Metric IOWriterThreads; - extern const Metric IOWriterThreadsActive; - extern const Metric IOWriterThreadsScheduled; -} - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; - extern const int UNSUPPORTED_METHOD; -} - -struct ContextSharedPart : boost::noncopyable -{ - ContextSharedPart() - : macros(std::make_unique()) - {} - - ~ContextSharedPart() - { - if (keeper_dispatcher) - { - try - { - keeper_dispatcher->shutdown(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - - /// Wait for thread pool for background reads and writes, - /// since it may use per-user MemoryTracker which will be destroyed here. - if (asynchronous_remote_fs_reader) - { - try - { - asynchronous_remote_fs_reader->wait(); - asynchronous_remote_fs_reader.reset(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - - if (asynchronous_local_fs_reader) - { - try - { - asynchronous_local_fs_reader->wait(); - asynchronous_local_fs_reader.reset(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - - if (synchronous_local_fs_reader) - { - try - { - synchronous_local_fs_reader->wait(); - synchronous_local_fs_reader.reset(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - - if (threadpool_writer) - { - try - { - threadpool_writer->wait(); - threadpool_writer.reset(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - } - - /// For access of most of shared objects. - mutable SharedMutex mutex; - - ServerSettings server_settings; - - String path; /// Path to the data directory, with a slash at the end. - ConfigurationPtr config; /// Global configuration settings. - MultiVersion macros; /// Substitutions extracted from config. - OnceFlag schedule_pool_initialized; - mutable std::unique_ptr schedule_pool; /// A thread pool that can run different jobs in background - RemoteHostFilter remote_host_filter; /// Allowed URL from config.xml - - mutable OnceFlag readers_initialized; - mutable std::unique_ptr asynchronous_remote_fs_reader; - mutable std::unique_ptr asynchronous_local_fs_reader; - mutable std::unique_ptr synchronous_local_fs_reader; - -#if USE_LIBURING - mutable OnceFlag io_uring_reader_initialized; - mutable std::unique_ptr io_uring_reader; -#endif - - mutable OnceFlag threadpool_writer_initialized; - mutable std::unique_ptr threadpool_writer; - - mutable ThrottlerPtr remote_read_throttler; /// A server-wide throttler for remote IO reads - mutable ThrottlerPtr remote_write_throttler; /// A server-wide throttler for remote IO writes - - mutable ThrottlerPtr local_read_throttler; /// A server-wide throttler for local IO reads - mutable ThrottlerPtr local_write_throttler; /// A server-wide throttler for local IO writes - - std::optional storage_s3_settings TSA_GUARDED_BY(mutex); /// Settings of S3 storage - - mutable std::mutex keeper_dispatcher_mutex; - mutable std::shared_ptr keeper_dispatcher TSA_GUARDED_BY(keeper_dispatcher_mutex); -}; - -ContextData::ContextData() = default; -ContextData::ContextData(const ContextData &) = default; - -Context::Context() = default; -Context::Context(const Context & rhs) : ContextData(rhs), std::enable_shared_from_this(rhs) {} -Context::~Context() = default; - -SharedContextHolder::SharedContextHolder(SharedContextHolder &&) noexcept = default; -SharedContextHolder & SharedContextHolder::operator=(SharedContextHolder &&) noexcept = default; -SharedContextHolder::SharedContextHolder() = default; -SharedContextHolder::~SharedContextHolder() = default; -SharedContextHolder::SharedContextHolder(std::unique_ptr shared_context) - : shared(std::move(shared_context)) {} - -void SharedContextHolder::reset() { shared.reset(); } - -void Context::makeGlobalContext() -{ - initGlobal(); - global_context = shared_from_this(); -} - -ContextMutablePtr Context::createGlobal(ContextSharedPart * shared_part) -{ - auto res = std::shared_ptr(new Context); - res->shared = shared_part; - return res; -} - -void Context::initGlobal() -{ - assert(!global_context_instance); - global_context_instance = shared_from_this(); -} - -SharedContextHolder Context::createShared() -{ - return SharedContextHolder(std::make_unique()); -} - - -ContextMutablePtr Context::getGlobalContext() const -{ - auto ptr = global_context.lock(); - if (!ptr) throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no global context or global context has expired"); - return ptr; -} - -std::unique_lock Context::getGlobalLock() const -{ - ProfileEvents::increment(ProfileEvents::ContextLock); - CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; - Stopwatch watch; - auto lock = std::unique_lock(shared->mutex); - ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds()); - return lock; -} - -std::shared_lock Context::getGlobalSharedLock() const -{ - ProfileEvents::increment(ProfileEvents::ContextLock); - CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; - Stopwatch watch; - auto lock = std::shared_lock(shared->mutex); - ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds()); - return lock; -} - -std::unique_lock Context::getLocalLock() const -{ - ProfileEvents::increment(ProfileEvents::ContextLock); - CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; - Stopwatch watch; - auto lock = std::unique_lock(mutex); - ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds()); - return lock; -} - -std::shared_lock Context::getLocalSharedLock() const -{ - ProfileEvents::increment(ProfileEvents::ContextLock); - CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; - Stopwatch watch; - auto lock = std::shared_lock(mutex); - ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds()); - return lock; -} - -String Context::getPath() const -{ - auto lock = getGlobalSharedLock(); - return shared->path; -} - -void Context::setPath(const String & path) -{ - auto lock = getGlobalLock(); - shared->path = path; -} - -MultiVersion::Version Context::getMacros() const -{ - return shared->macros.get(); -} - -void Context::setMacros(std::unique_ptr && macros) -{ - shared->macros.set(std::move(macros)); -} - -BackgroundSchedulePool & Context::getSchedulePool() const -{ - callOnce(shared->schedule_pool_initialized, [&] { - shared->schedule_pool = std::make_unique( - shared->server_settings.background_schedule_pool_size, - CurrentMetrics::BackgroundSchedulePoolTask, - CurrentMetrics::BackgroundSchedulePoolSize, - "BgSchPool"); - }); - - return *shared->schedule_pool; -} - -void Context::setRemoteHostFilter(const Poco::Util::AbstractConfiguration & config) -{ - shared->remote_host_filter.setValuesFromConfig(config); -} - -const RemoteHostFilter & Context::getRemoteHostFilter() const -{ - return shared->remote_host_filter; -} - -IAsynchronousReader & Context::getThreadPoolReader(FilesystemReaderType type) const -{ - callOnce(shared->readers_initialized, [&] { - const auto & config = getConfigRef(); - shared->asynchronous_remote_fs_reader = createThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER, config); - shared->asynchronous_local_fs_reader = createThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER, config); - shared->synchronous_local_fs_reader = createThreadPoolReader(FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER, config); - }); - - switch (type) - { - case FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER: - return *shared->asynchronous_remote_fs_reader; - case FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER: - return *shared->asynchronous_local_fs_reader; - case FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER: - return *shared->synchronous_local_fs_reader; - } -} - -#if USE_LIBURING -IOUringReader & Context::getIOUringReader() const -{ - callOnce(shared->io_uring_reader_initialized, [&] { - shared->io_uring_reader = createIOUringReader(); - }); - - return *shared->io_uring_reader; -} -#endif - -std::shared_ptr Context::getFilesystemCacheLog() const -{ - return nullptr; -} - -std::shared_ptr Context::getFilesystemReadPrefetchesLog() const -{ - return nullptr; -} - -std::shared_ptr Context::getBlobStorageLog() const -{ - return nullptr; -} - -void Context::setConfig(const ConfigurationPtr & config) -{ - auto lock = getGlobalLock(); - shared->config = config; -} - -const Poco::Util::AbstractConfiguration & Context::getConfigRef() const -{ - auto lock = getGlobalSharedLock(); - return shared->config ? *shared->config : Poco::Util::Application::instance().config(); -} - -std::shared_ptr Context::getAsyncReadCounters() const -{ - auto lock = getLocalLock(); - if (!async_read_counters) - async_read_counters = std::make_shared(); - return async_read_counters; -} - -ThreadPool & Context::getThreadPoolWriter() const -{ - callOnce(shared->threadpool_writer_initialized, [&] { - const auto & config = getConfigRef(); - auto pool_size = config.getUInt(".threadpool_writer_pool_size", 100); - auto queue_size = config.getUInt(".threadpool_writer_queue_size", 1000000); - - shared->threadpool_writer = std::make_unique( - CurrentMetrics::IOWriterThreads, CurrentMetrics::IOWriterThreadsActive, CurrentMetrics::IOWriterThreadsScheduled, pool_size, pool_size, queue_size); - }); - - return *shared->threadpool_writer; -} - -ThrottlerPtr Context::getRemoteReadThrottler() const -{ - return nullptr; -} - -ThrottlerPtr Context::getRemoteWriteThrottler() const -{ - return nullptr; -} - -ThrottlerPtr Context::getLocalReadThrottler() const -{ - return nullptr; -} - -ThrottlerPtr Context::getLocalWriteThrottler() const -{ - return nullptr; -} - -ReadSettings Context::getReadSettings() const -{ - return ReadSettings{}; -} - -ResourceManagerPtr Context::getResourceManager() const -{ - return nullptr; -} - -ClassifierPtr Context::getWorkloadClassifier() const -{ - return nullptr; -} - -void Context::initializeKeeperDispatcher([[maybe_unused]] bool start_async) const -{ - const auto & config_ref = getConfigRef(); - - std::lock_guard lock(shared->keeper_dispatcher_mutex); - - if (shared->keeper_dispatcher) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to initialize Keeper multiple times"); - - if (config_ref.has("keeper_server")) - { - shared->keeper_dispatcher = std::make_shared(); - shared->keeper_dispatcher->initialize(config_ref, true, start_async, getMacros()); - } -} - -std::shared_ptr Context::getKeeperDispatcher() const -{ - std::lock_guard lock(shared->keeper_dispatcher_mutex); - if (!shared->keeper_dispatcher) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Keeper must be initialized before requests"); - - return shared->keeper_dispatcher; -} - -std::shared_ptr Context::tryGetKeeperDispatcher() const -{ - std::lock_guard lock(shared->keeper_dispatcher_mutex); - return shared->keeper_dispatcher; -} - -void Context::shutdownKeeperDispatcher() const -{ - std::lock_guard lock(shared->keeper_dispatcher_mutex); - if (shared->keeper_dispatcher) - { - shared->keeper_dispatcher->shutdown(); - shared->keeper_dispatcher.reset(); - } -} - -void Context::updateKeeperConfiguration([[maybe_unused]] const Poco::Util::AbstractConfiguration & config_) -{ - std::lock_guard lock(shared->keeper_dispatcher_mutex); - if (!shared->keeper_dispatcher) - return; - - shared->keeper_dispatcher->updateConfiguration(config_, getMacros()); -} - -std::shared_ptr Context::getZooKeeper() const -{ - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Cannot connect to ZooKeeper from Keeper"); -} - -const S3SettingsByEndpoint & Context::getStorageS3Settings() const -{ - std::lock_guard lock(shared->mutex); - - if (!shared->storage_s3_settings) - { - const auto & config = shared->config ? *shared->config : Poco::Util::Application::instance().config(); - shared->storage_s3_settings.emplace().loadFromConfig(config, "s3", getSettingsRef()); - } - - return *shared->storage_s3_settings; -} - -const ServerSettings & Context::getServerSettings() const -{ - return shared->server_settings; -} - -bool Context::hasTraceCollector() const -{ - return false; -} - -bool Context::isBackgroundOperationContext() const -{ - return false; -} - -} diff --git a/src/Coordination/Standalone/Context.h b/src/Coordination/Standalone/Context.h deleted file mode 100644 index d3bbfececed..00000000000 --- a/src/Coordination/Standalone/Context.h +++ /dev/null @@ -1,178 +0,0 @@ -#pragma once - -#include - -#include - -#include -#include -#include - -#include - -#include -#include -#include - -#include -#include - -#include - -#include - -#include "config.h" -namespace zkutil -{ - class ZooKeeper; - using ZooKeeperPtr = std::shared_ptr; -} - -namespace DB -{ - -struct ContextSharedPart; -class Macros; -class FilesystemCacheLog; -class FilesystemReadPrefetchesLog; -class BlobStorageLog; -class IOUringReader; -class S3SettingsByEndpoint; - -/// A small class which owns ContextShared. -/// We don't use something like unique_ptr directly to allow ContextShared type to be incomplete. -struct SharedContextHolder -{ - ~SharedContextHolder(); - SharedContextHolder(); - explicit SharedContextHolder(std::unique_ptr shared_context); - SharedContextHolder(SharedContextHolder &&) noexcept; - - SharedContextHolder & operator=(SharedContextHolder &&) noexcept; - - ContextSharedPart * get() const { return shared.get(); } - void reset(); -private: - std::unique_ptr shared; -}; - -class ContextData -{ -protected: - ContextWeakMutablePtr global_context; - inline static ContextPtr global_context_instance; - ContextSharedPart * shared; - - /// Query metrics for reading data asynchronously with IAsynchronousReader. - mutable std::shared_ptr async_read_counters; - - Settings settings; /// Setting for query execution. - -public: - /// Use copy constructor or createGlobal() instead - ContextData(); - ContextData(const ContextData &); -}; - -class Context : public ContextData, public std::enable_shared_from_this -{ -private: - /// ContextData mutex - mutable SharedMutex mutex; - - Context(); - Context(const Context &); - - std::unique_lock getGlobalLock() const; - - std::shared_lock getGlobalSharedLock() const; - - std::unique_lock getLocalLock() const; - - std::shared_lock getLocalSharedLock() const; - -public: - /// Create initial Context with ContextShared and etc. - static ContextMutablePtr createGlobal(ContextSharedPart * shared_part); - static SharedContextHolder createShared(); - - ContextMutablePtr getGlobalContext() const; - static ContextPtr getGlobalContextInstance() { return global_context_instance; } - - void makeGlobalContext(); - void initGlobal(); - - ~Context(); - - using ConfigurationPtr = Poco::AutoPtr; - - /// Global application configuration settings. - void setConfig(const ConfigurationPtr & config); - const Poco::Util::AbstractConfiguration & getConfigRef() const; - - const Settings & getSettingsRef() const { return settings; } - - String getPath() const; - void setPath(const String & path); - - MultiVersion::Version getMacros() const; - void setMacros(std::unique_ptr && macros); - - BackgroundSchedulePool & getSchedulePool() const; - - /// Storage of allowed hosts from config.xml - void setRemoteHostFilter(const Poco::Util::AbstractConfiguration & config); - const RemoteHostFilter & getRemoteHostFilter() const; - - std::shared_ptr getFilesystemCacheLog() const; - std::shared_ptr getFilesystemReadPrefetchesLog() const; - std::shared_ptr getBlobStorageLog() const; - - enum class ApplicationType : uint8_t - { - KEEPER, - SERVER, - }; - - void setApplicationType(ApplicationType) {} - ApplicationType getApplicationType() const { return ApplicationType::KEEPER; } - - IAsynchronousReader & getThreadPoolReader(FilesystemReaderType type) const; -#if USE_LIBURING - IOUringReader & getIOUringReader() const; -#endif - std::shared_ptr getAsyncReadCounters() const; - ThreadPool & getThreadPoolWriter() const; - - ThrottlerPtr getRemoteReadThrottler() const; - ThrottlerPtr getRemoteWriteThrottler() const; - - ThrottlerPtr getLocalReadThrottler() const; - ThrottlerPtr getLocalWriteThrottler() const; - - ReadSettings getReadSettings() const; - - /// Resource management related - ResourceManagerPtr getResourceManager() const; - ClassifierPtr getWorkloadClassifier() const; - - std::shared_ptr getKeeperDispatcher() const; - std::shared_ptr tryGetKeeperDispatcher() const; - void initializeKeeperDispatcher(bool start_async) const; - void shutdownKeeperDispatcher() const; - void updateKeeperConfiguration(const Poco::Util::AbstractConfiguration & config); - - zkutil::ZooKeeperPtr getZooKeeper() const; - - const S3SettingsByEndpoint & getStorageS3Settings() const; - - const String & getUserName() const { static std::string user; return user; } - - const ServerSettings & getServerSettings() const; - - bool hasTraceCollector() const; - - bool isBackgroundOperationContext() const; -}; - -} diff --git a/src/Coordination/Standalone/Settings.cpp b/src/Coordination/Standalone/Settings.cpp deleted file mode 100644 index 12a7a42ffac..00000000000 --- a/src/Coordination/Standalone/Settings.cpp +++ /dev/null @@ -1,24 +0,0 @@ -#include - -namespace DB -{ - -IMPLEMENT_SETTINGS_TRAITS(SettingsTraits, LIST_OF_SETTINGS) - -std::vector Settings::getAllRegisteredNames() const -{ - std::vector all_settings; - for (const auto & setting_field : all()) - { - all_settings.push_back(setting_field.getName()); - } - return all_settings; -} - -void Settings::set(std::string_view name, const Field & value) -{ - BaseSettings::set(name, value); -} - - -} diff --git a/src/Coordination/Standalone/ThreadStatusExt.cpp b/src/Coordination/Standalone/ThreadStatusExt.cpp deleted file mode 100644 index fc78233d9dc..00000000000 --- a/src/Coordination/Standalone/ThreadStatusExt.cpp +++ /dev/null @@ -1,19 +0,0 @@ -#include -#include - -namespace DB -{ - -void CurrentThread::detachFromGroupIfNotDetached() -{ -} - -void CurrentThread::attachToGroup(const ThreadGroupPtr &) -{ -} - -void ThreadStatus::initGlobalProfiler(UInt64 /*global_profiler_real_time_period*/, UInt64 /*global_profiler_cpu_time_period*/) -{ -} - -} diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index a329bec8e2a..b65e9358a72 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp index bc72c996384..af681cd5639 100644 --- a/src/Core/ExternalTable.cpp +++ b/src/Core/ExternalTable.cpp @@ -16,6 +16,7 @@ #include #include +#include #include #include #include diff --git a/src/Core/LoadBalancing.h b/src/Core/LoadBalancing.h new file mode 100644 index 00000000000..23758895ce2 --- /dev/null +++ b/src/Core/LoadBalancing.h @@ -0,0 +1,27 @@ +#pragma once + +#include + +namespace DB +{ + +enum class LoadBalancing : uint8_t +{ + /// among replicas with a minimum number of errors selected randomly + RANDOM = 0, + /// a replica is selected among the replicas with the minimum number of errors + /// with the minimum number of distinguished characters in the replica name prefix and local hostname prefix + NEAREST_HOSTNAME, + /// just like NEAREST_HOSTNAME, but it count distinguished characters in a levenshtein distance manner + HOSTNAME_LEVENSHTEIN_DISTANCE, + // replicas with the same number of errors are accessed in the same order + // as they are specified in the configuration. + IN_ORDER, + /// if first replica one has higher number of errors, + /// pick a random one from replicas with minimum number of errors + FIRST_OR_RANDOM, + // round robin across replicas with the same number of errors. + ROUND_ROBIN, +}; + +} diff --git a/src/Core/PostgreSQL/PoolWithFailover.cpp b/src/Core/PostgreSQL/PoolWithFailover.cpp index a034c50094d..5014564dbe0 100644 --- a/src/Core/PostgreSQL/PoolWithFailover.cpp +++ b/src/Core/PostgreSQL/PoolWithFailover.cpp @@ -27,7 +27,8 @@ PoolWithFailover::PoolWithFailover( size_t pool_size, size_t pool_wait_timeout_, size_t max_tries_, - bool auto_close_connection_) + bool auto_close_connection_, + size_t connection_attempt_timeout_) : pool_wait_timeout(pool_wait_timeout_) , max_tries(max_tries_) , auto_close_connection(auto_close_connection_) @@ -39,8 +40,13 @@ PoolWithFailover::PoolWithFailover( { for (const auto & replica_configuration : configurations) { - auto connection_info = formatConnectionString(replica_configuration.database, - replica_configuration.host, replica_configuration.port, replica_configuration.username, replica_configuration.password); + auto connection_info = formatConnectionString( + replica_configuration.database, + replica_configuration.host, + replica_configuration.port, + replica_configuration.username, + replica_configuration.password, + connection_attempt_timeout_); replicas_with_priority[priority].emplace_back(connection_info, pool_size); } } @@ -51,7 +57,8 @@ PoolWithFailover::PoolWithFailover( size_t pool_size, size_t pool_wait_timeout_, size_t max_tries_, - bool auto_close_connection_) + bool auto_close_connection_, + size_t connection_attempt_timeout_) : pool_wait_timeout(pool_wait_timeout_) , max_tries(max_tries_) , auto_close_connection(auto_close_connection_) @@ -63,7 +70,13 @@ PoolWithFailover::PoolWithFailover( for (const auto & [host, port] : configuration.addresses) { LOG_DEBUG(getLogger("PostgreSQLPoolWithFailover"), "Adding address host: {}, port: {} to connection pool", host, port); - auto connection_string = formatConnectionString(configuration.database, host, port, configuration.username, configuration.password); + auto connection_string = formatConnectionString( + configuration.database, + host, + port, + configuration.username, + configuration.password, + connection_attempt_timeout_); replicas_with_priority[0].emplace_back(connection_string, pool_size); } } diff --git a/src/Core/PostgreSQL/PoolWithFailover.h b/src/Core/PostgreSQL/PoolWithFailover.h index 3c538fc3dea..502a9a9b7d7 100644 --- a/src/Core/PostgreSQL/PoolWithFailover.h +++ b/src/Core/PostgreSQL/PoolWithFailover.h @@ -14,7 +14,6 @@ static constexpr inline auto POSTGRESQL_POOL_DEFAULT_SIZE = 16; static constexpr inline auto POSTGRESQL_POOL_WAIT_TIMEOUT = 5000; -static constexpr inline auto POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES = 2; namespace postgres { @@ -30,14 +29,16 @@ public: size_t pool_size, size_t pool_wait_timeout, size_t max_tries_, - bool auto_close_connection_); + bool auto_close_connection_, + size_t connection_attempt_timeout_); explicit PoolWithFailover( const DB::StoragePostgreSQL::Configuration & configuration, size_t pool_size, size_t pool_wait_timeout, size_t max_tries_, - bool auto_close_connection_); + bool auto_close_connection_, + size_t connection_attempt_timeout_); PoolWithFailover(const PoolWithFailover & other) = delete; diff --git a/src/Core/PostgreSQL/Utils.cpp b/src/Core/PostgreSQL/Utils.cpp index 810bf62fdab..9dc010c1c69 100644 --- a/src/Core/PostgreSQL/Utils.cpp +++ b/src/Core/PostgreSQL/Utils.cpp @@ -8,7 +8,7 @@ namespace postgres { -ConnectionInfo formatConnectionString(String dbname, String host, UInt16 port, String user, String password) +ConnectionInfo formatConnectionString(String dbname, String host, UInt16 port, String user, String password, UInt64 timeout) { DB::WriteBufferFromOwnString out; out << "dbname=" << DB::quote << dbname @@ -16,7 +16,7 @@ ConnectionInfo formatConnectionString(String dbname, String host, UInt16 port, S << " port=" << port << " user=" << DB::quote << user << " password=" << DB::quote << password - << " connect_timeout=2"; + << " connect_timeout=" << timeout; return {out.str(), host + ':' + DB::toString(port)}; } diff --git a/src/Core/PostgreSQL/Utils.h b/src/Core/PostgreSQL/Utils.h index f179ab14c89..f2b8f1ac084 100644 --- a/src/Core/PostgreSQL/Utils.h +++ b/src/Core/PostgreSQL/Utils.h @@ -18,7 +18,7 @@ namespace pqxx namespace postgres { -ConnectionInfo formatConnectionString(String dbname, String host, UInt16 port, String user, String password); +ConnectionInfo formatConnectionString(String dbname, String host, UInt16 port, String user, String password, UInt64 timeout); String getConnectionForLog(const String & host, UInt16 port); diff --git a/src/Core/Protocol.h b/src/Core/Protocol.h index 4c0848c0706..2e5b91e9b1b 100644 --- a/src/Core/Protocol.h +++ b/src/Core/Protocol.h @@ -63,7 +63,7 @@ const char USER_INTERSERVER_MARKER[] = " INTERSERVER SECRET "; /// Marker for SSH-keys-based authentication (passed as the user name) const char SSH_KEY_AUTHENTICAION_MARKER[] = " SSH KEY AUTHENTICATION "; -/// Market for JSON Web Token authentication +/// Marker for JSON Web Token authentication const char JWT_AUTHENTICAION_MARKER[] = " JWT AUTHENTICATION "; }; diff --git a/src/Core/QueryLogElementType.h b/src/Core/QueryLogElementType.h new file mode 100644 index 00000000000..99361f945cc --- /dev/null +++ b/src/Core/QueryLogElementType.h @@ -0,0 +1,17 @@ +#pragma once + +#include + +namespace DB +{ + +// Make it signed for compatibility with DataTypeEnum8 +enum QueryLogElementType : int8_t +{ + QUERY_START = 1, + QUERY_FINISH = 2, + EXCEPTION_BEFORE_START = 3, + EXCEPTION_WHILE_PROCESSING = 4, +}; + +} diff --git a/src/Core/SchemaInferenceMode.h b/src/Core/SchemaInferenceMode.h new file mode 100644 index 00000000000..17d1d62a60e --- /dev/null +++ b/src/Core/SchemaInferenceMode.h @@ -0,0 +1,14 @@ +#pragma once + +#include + +namespace DB +{ + +enum class SchemaInferenceMode : uint8_t +{ + DEFAULT, + UNION, +}; + +} diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 68ac45fa24f..28b32a6e6a5 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -151,9 +151,11 @@ namespace DB M(UInt64, global_profiler_real_time_period_ns, 0, "Period for real clock timer of global profiler (in nanoseconds). Set 0 value to turn off the real clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ M(UInt64, global_profiler_cpu_time_period_ns, 0, "Period for CPU clock timer of global profiler (in nanoseconds). Set 0 value to turn off the CPU clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ M(Bool, enable_azure_sdk_logging, false, "Enables logging from Azure sdk", 0) \ + M(UInt64, max_entries_for_hash_table_stats, 10'000, "How many entries hash table statistics collected during aggregation is allowed to have", 0) \ M(String, merge_workload, "default", "Name of workload to be used to access resources for all merges (may be overridden by a merge tree setting)", 0) \ M(String, mutation_workload, "default", "Name of workload to be used to access resources for all mutations (may be overridden by a merge tree setting)", 0) \ - M(Double, gwp_asan_force_sample_probability, 0, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ + M(Bool, prepare_system_log_tables_on_startup, false, "If true, ClickHouse creates all configured `system.*_log` tables before the startup. It can be helpful if some startup scripts depend on these tables.", 0) \ + M(Double, gwp_asan_force_sample_probability, 0.0003, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ M(UInt64, config_reload_interval_ms, 2000, "How often clickhouse will reload config and check for new changes", 0) \ /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp diff --git a/src/Core/Settings.h b/src/Core/Settings.h index d84e5b149f6..52fa28a4481 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -125,6 +125,9 @@ class IColumn; M(Bool, s3_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in S3 table engine", 0) \ M(Bool, hdfs_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in HDFS table engine", 0) \ M(Bool, azure_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in AzureBlobStorage table engine", 0) \ + M(UInt64, azure_sdk_max_retries, 10, "Maximum number of retries in azure sdk", 0) \ + M(UInt64, azure_sdk_retry_initial_backoff_ms, 10, "Minimal backoff between retries in azure sdk", 0) \ + M(UInt64, azure_sdk_retry_max_backoff_ms, 1000, "Maximal backoff between retries in azure sdk", 0) \ M(Bool, s3_validate_request_settings, true, "Validate S3 request settings", 0) \ M(Bool, s3_disable_checksum, S3::DEFAULT_DISABLE_CHECKSUM, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \ M(UInt64, s3_retry_attempts, S3::DEFAULT_RETRY_ATTEMPTS, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \ @@ -164,9 +167,6 @@ class IColumn; M(Bool, enable_multiple_prewhere_read_steps, true, "Move more conditions from WHERE to PREWHERE and do reads from disk and filtering in multiple steps if there are multiple conditions combined with AND", 0) \ M(Bool, move_primary_key_columns_to_end_of_prewhere, true, "Move PREWHERE conditions containing primary key columns to the end of AND chain. It is likely that these conditions are taken into account during primary key analysis and thus will not contribute a lot to PREWHERE filtering.", 0) \ \ - M(Bool, allow_statistics_optimize, false, "Allows using statistics to optimize queries", 0) ALIAS(allow_statistic_optimize) \ - M(Bool, allow_experimental_statistics, false, "Allows using statistics", 0) ALIAS(allow_experimental_statistic) \ - \ M(UInt64, alter_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) ALIAS(replication_alter_partitions_sync) \ M(Int64, replication_wait_for_inactive_replica_timeout, 120, "Wait for inactive replica to execute ALTER/OPTIMIZE. Time in seconds, 0 - do not wait, negative - wait for unlimited time.", 0) \ M(Bool, alter_move_to_space_execute_async, false, "Execute ALTER TABLE MOVE ... TO [DISK|VOLUME] asynchronously", 0) \ @@ -202,21 +202,6 @@ class IColumn; M(Bool, group_by_use_nulls, false, "Treat columns mentioned in ROLLUP, CUBE or GROUPING SETS as Nullable", 0) \ \ M(NonZeroUInt64, max_parallel_replicas, 1, "The maximum number of replicas of each shard used when the query is executed. For consistency (to get different parts of the same partition), this option only works for the specified sampling key. The lag of the replicas is not controlled. Should be always greater than 0", 0) \ - M(UInt64, parallel_replicas_count, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the number of parallel replicas participating in query processing.", 0) \ - M(UInt64, parallel_replica_offset, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the index of the replica participating in query processing among parallel replicas.", 0) \ - M(String, parallel_replicas_custom_key, "", "Custom key assigning work to replicas when parallel replicas are used.", 0) \ - M(ParallelReplicasCustomKeyFilterType, parallel_replicas_custom_key_filter_type, ParallelReplicasCustomKeyFilterType::DEFAULT, "Type of filter to use with custom key for parallel replicas. default - use modulo operation on the custom key, range - use range filter on custom key using all possible values for the value type of custom key.", 0) \ - M(UInt64, parallel_replicas_custom_key_range_lower, 0, "Lower bound for the universe that the parallel replicas custom range filter is calculated over", 0) \ - M(UInt64, parallel_replicas_custom_key_range_upper, 0, "Upper bound for the universe that the parallel replicas custom range filter is calculated over. A value of 0 disables the upper bound, setting it to the max value of the custom key expression", 0) \ - \ - M(String, cluster_for_parallel_replicas, "", "Cluster for a shard in which current server is located", 0) \ - M(UInt64, allow_experimental_parallel_reading_from_replicas, 0, "Use all the replicas from a shard for SELECT query execution. Reading is parallelized and coordinated dynamically. 0 - disabled, 1 - enabled, silently disable them in case of failure, 2 - enabled, throw an exception in case of failure", 0) \ - M(Bool, parallel_replicas_allow_in_with_subquery, true, "If true, subquery for IN will be executed on every follower replica.", 0) \ - M(Float, parallel_replicas_single_task_marks_count_multiplier, 2, "A multiplier which will be added during calculation for minimal number of marks to retrieve from coordinator. This will be applied only for remote replicas.", 0) \ - M(Bool, parallel_replicas_for_non_replicated_merge_tree, false, "If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables", 0) \ - M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \ - M(Bool, parallel_replicas_prefer_local_join, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN.", 0) \ - M(UInt64, parallel_replicas_mark_segment_size, 128, "Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing", 0) \ \ M(Bool, skip_unavailable_shards, false, "If true, ClickHouse silently skips unavailable shards. Shard is marked as unavailable when: 1) The shard cannot be reached due to a connection failure. 2) Shard is unresolvable through DNS. 3) Table does not exist on the shard.", 0) \ \ @@ -248,8 +233,6 @@ class IColumn; M(Bool, do_not_merge_across_partitions_select_final, false, "Merge parts only in one partition in select final", 0) \ M(Bool, split_parts_ranges_into_intersecting_and_non_intersecting_final, true, "Split parts ranges into intersecting and non intersecting during FINAL optimization", 0) \ M(Bool, split_intersecting_parts_ranges_into_layers_final, true, "Split intersecting parts ranges into layers during FINAL optimization", 0) \ - M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental inverted index.", 0) \ - M(Bool, allow_experimental_full_text_index, false, "If it is set to true, allow to use experimental full-text index.", 0) \ \ M(UInt64, mysql_max_rows_to_insert, 65536, "The maximum number of rows in MySQL batch insertion of the MySQL storage engine", 0) \ M(Bool, mysql_map_string_to_text_in_show_columns, true, "If enabled, String type will be mapped to TEXT in SHOW [FULL] COLUMNS, BLOB otherwise. Has an effect only when the connection is made through the MySQL wire protocol.", 0) \ @@ -338,7 +321,6 @@ class IColumn; M(Bool, fsync_metadata, true, "Do fsync after changing metadata for tables and databases (.sql files). Could be disabled in case of poor latency on server with high load of DDL queries and high load of disk subsystem.", 0) \ \ M(Bool, join_use_nulls, false, "Use NULLs for non-joined rows of outer JOINs for types that can be inside Nullable. If false, use default value of corresponding columns data type.", IMPORTANT) \ - M(Bool, allow_experimental_join_condition, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y.", 0) \ \ M(JoinStrictness, join_default_strictness, JoinStrictness::All, "Set default strictness in JOIN query. Possible values: empty string, 'ANY', 'ALL'. If empty, query without strictness will throw exception.", 0) \ M(Bool, any_join_distinct_right_table_keys, false, "Enable old ANY JOIN logic with many-to-one left-to-right table keys mapping for all ANY JOINs. It leads to confusing not equal results for 't1 ANY LEFT JOIN t2' and 't2 ANY RIGHT JOIN t1'. ANY RIGHT JOIN needs one-to-many keys mapping to be consistent with LEFT one.", IMPORTANT) \ @@ -364,6 +346,7 @@ class IColumn; \ M(Bool, ignore_on_cluster_for_replicated_udf_queries, false, "Ignore ON CLUSTER clause for replicated UDF management queries.", 0) \ M(Bool, ignore_on_cluster_for_replicated_access_entities_queries, false, "Ignore ON CLUSTER clause for replicated access entities management queries.", 0) \ + M(Bool, ignore_on_cluster_for_replicated_named_collections_queries, false, "Ignore ON CLUSTER clause for replicated named collections management queries.", 0) \ /** Settings for testing hedged requests */ \ M(Milliseconds, sleep_in_send_tables_status_ms, 0, "Time to sleep in sending tables status response in TCPHandler", 0) \ M(Milliseconds, sleep_in_send_data_ms, 0, "Time to sleep in sending data in TCPHandler", 0) \ @@ -389,7 +372,6 @@ class IColumn; M(Bool, empty_result_for_aggregation_by_constant_keys_on_empty_set, true, "Return empty result when aggregating by constant keys on empty set.", 0) \ M(Bool, allow_distributed_ddl, true, "If it is set to true, then a user is allowed to executed distributed DDL queries.", 0) \ M(Bool, allow_suspicious_codecs, false, "If it is set to true, allow to specify meaningless compression codecs.", 0) \ - M(Bool, allow_experimental_codecs, false, "If it is set to true, allow to specify experimental compression codecs (but we don't have those yet and this option does nothing).", 0) \ M(Bool, enable_deflate_qpl_codec, false, "Enable/disable the DEFLATE_QPL codec.", 0) \ M(Bool, enable_zstd_qat_codec, false, "Enable/disable the ZSTD_QAT codec.", 0) \ M(UInt64, query_profiler_real_time_period_ns, QUERY_PROFILER_DEFAULT_SAMPLE_RATE_NS, "Period for real clock timer of query profiler (in nanoseconds). Set 0 value to turn off the real clock query profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ @@ -399,8 +381,7 @@ class IColumn; M(Float, opentelemetry_start_trace_probability, 0., "Probability to start an OpenTelemetry trace for an incoming query.", 0) \ M(Bool, opentelemetry_trace_processors, false, "Collect OpenTelemetry spans for processors.", 0) \ M(Bool, prefer_column_name_to_alias, false, "Prefer using column names instead of aliases if possible.", 0) \ - M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", 0) \ - M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \ + \ M(Bool, prefer_global_in_and_join, false, "If enabled, all IN/JOIN operators will be rewritten as GLOBAL IN/JOIN. It's useful when the to-be-joined tables are only available on the initiator and we need to always scatter their data on-the-fly during distributed processing with the GLOBAL keyword. It's also useful to reduce the need to access the external sources joining external tables.", 0) \ M(Bool, enable_vertical_final, true, "If enable, remove duplicated rows during FINAL by marking rows as deleted and filtering them later instead of merging rows", 0) \ \ @@ -550,6 +531,7 @@ class IColumn; M(Bool, optimize_read_in_order, true, "Enable ORDER BY optimization for reading data in corresponding order in MergeTree tables.", 0) \ M(Bool, optimize_read_in_window_order, true, "Enable ORDER BY optimization in window clause for reading data in corresponding order in MergeTree tables.", 0) \ M(Bool, optimize_aggregation_in_order, false, "Enable GROUP BY optimization for aggregating data in corresponding order in MergeTree tables.", 0) \ + M(Bool, read_in_order_use_buffering, true, "Use buffering before merging while reading in order of primary key. It increases the parallelism of query execution", 0) \ M(UInt64, aggregation_in_order_max_block_bytes, 50000000, "Maximal size of block in bytes accumulated during aggregation in order of primary key. Lower block size allows to parallelize more final merge stage of aggregation.", 0) \ M(UInt64, read_in_order_two_level_merge_threshold, 100, "Minimal number of parts to read to run preliminary merge step during multithread reading in order of primary key.", 0) \ M(Bool, low_cardinality_allow_in_native_format, true, "Use LowCardinality type in Native format. Otherwise, convert LowCardinality columns to ordinary for select query, and convert ordinary columns to required LowCardinality for insert query.", 0) \ @@ -580,7 +562,9 @@ class IColumn; M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \ \ M(UInt64, postgresql_connection_pool_size, 16, "Connection pool size for PostgreSQL table engine and database engine.", 0) \ + M(UInt64, postgresql_connection_attempt_timeout, 2, "Connection timeout to PostgreSQL table engine and database engine in seconds.", 0) \ M(UInt64, postgresql_connection_pool_wait_timeout, 5000, "Connection pool push/pop timeout on empty pool for PostgreSQL table engine and database engine. By default it will block on empty pool.", 0) \ + M(UInt64, postgresql_connection_pool_retries, 2, "Connection pool push/pop retries number for PostgreSQL table engine and database engine.", 0) \ M(Bool, postgresql_connection_pool_auto_close_connection, false, "Close connection before returning connection to the pool.", 0) \ M(UInt64, glob_expansion_max_elements, 1000, "Maximum number of allowed addresses (For external storages, table functions, etc).", 0) \ M(UInt64, odbc_bridge_connection_pool_size, 16, "Connection pool size for each connection settings string in ODBC bridge.", 0) \ @@ -590,13 +574,6 @@ class IColumn; M(UInt64, distributed_replica_error_cap, DBMS_CONNECTION_POOL_WITH_FAILOVER_MAX_ERROR_COUNT, "Max number of errors per replica, prevents piling up an incredible amount of errors if replica was offline for some time and allows it to be reconsidered in a shorter amount of time.", 0) \ M(UInt64, distributed_replica_max_ignored_errors, 0, "Number of errors that will be ignored while choosing replicas", 0) \ \ - M(Bool, allow_experimental_live_view, false, "Enable LIVE VIEW. Not mature enough.", 0) \ - M(Seconds, live_view_heartbeat_interval, 15, "The heartbeat interval in seconds to indicate live query is alive.", 0) \ - M(UInt64, max_live_view_insert_blocks_before_refresh, 64, "Limit maximum number of inserted blocks after which mergeable blocks are dropped and query is re-executed.", 0) \ - M(Bool, allow_experimental_window_view, false, "Enable WINDOW VIEW. Not mature enough.", 0) \ - M(Seconds, window_view_clean_interval, 60, "The clean interval of window view in seconds to free outdated data.", 0) \ - M(Seconds, window_view_heartbeat_interval, 15, "The heartbeat interval in seconds to indicate watch query is alive.", 0) \ - M(Seconds, wait_for_window_view_fire_signal_timeout, 10, "Timeout for waiting for window view fire signal in event time processing", 0) \ M(UInt64, min_free_disk_space_for_temporary_data, 0, "The minimum disk space to keep while writing temporary data used in external sorting and aggregation.", 0) \ \ M(DefaultTableEngine, default_temporary_table_engine, DefaultTableEngine::Memory, "Default table engine used when ENGINE is not set in CREATE TEMPORARY statement.",0) \ @@ -612,6 +589,7 @@ class IColumn; M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \ M(Bool, enable_lightweight_delete, true, "Enable lightweight DELETE mutations for mergetree tables.", 0) ALIAS(allow_experimental_lightweight_delete) \ M(UInt64, lightweight_deletes_sync, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes", 0) \ + M(LightweightMutationProjectionMode, lightweight_mutation_projection_mode, LightweightMutationProjectionMode::THROW, "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete.", 0) \ M(Bool, apply_deleted_mask, true, "Enables filtering out rows deleted with lightweight DELETE. If disabled, a query will be able to read those rows. This is useful for debugging and \"undelete\" scenarios", 0) \ M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \ M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \ @@ -623,7 +601,7 @@ class IColumn; M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \ M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \ M(Bool, optimize_if_transform_strings_to_enum, false, "Replaces string-type arguments in If and Transform to enum. Disabled by default cause it could make inconsistent change in distributed query that would lead to its fail.", 0) \ - M(Bool, optimize_functions_to_subcolumns, false, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \ + M(Bool, optimize_functions_to_subcolumns, true, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \ M(Bool, optimize_using_constraints, false, "Use constraints for query optimization", 0) \ M(Bool, optimize_substitute_columns, false, "Use constraints for column substitution", 0) \ M(Bool, optimize_append_index, false, "Use constraints in order to append index condition (indexHint)", 0) \ @@ -635,8 +613,6 @@ class IColumn; M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Should update insert deduplication token with table identifier during insert in dependent materialized views.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \ - M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW REFRESH ...).", 0) \ - M(Bool, stop_refreshable_materialized_views_on_startup, false, "On server startup, prevent scheduling of refreshable materialized views, as if with SYSTEM STOP VIEWS. You can manually start them with SYSTEM START VIEWS or SYSTEM START VIEW afterwards. Also applies to newly created views. Has no effect on non-refreshable materialized views.", 0) \ M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \ M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \ M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \ @@ -653,8 +629,6 @@ class IColumn; M(Bool, cast_keep_nullable, false, "CAST operator keep Nullable for result data type", 0) \ M(Bool, cast_ipv4_ipv6_default_on_conversion_error, false, "CAST operator into IPv4, CAST operator into IPV6 type, toIPv4, toIPv6 functions will return default value instead of throwing exception on conversion error.", 0) \ M(Bool, alter_partition_verbose_result, false, "Output information about affected parts. Currently works only for FREEZE and ATTACH commands.", 0) \ - M(Bool, allow_experimental_database_materialized_mysql, false, "Allow to create database with Engine=MaterializedMySQL(...).", 0) \ - M(Bool, allow_experimental_database_materialized_postgresql, false, "Allow to create database with Engine=MaterializedPostgreSQL(...).", 0) \ M(Bool, system_events_show_zero_values, false, "When querying system.events or system.metrics tables, include all metrics, even with zero values.", 0) \ M(MySQLDataTypesSupport, mysql_datatypes_support_level, MySQLDataTypesSupportList{}, "Defines how MySQL types are converted to corresponding ClickHouse types. A comma separated list in any combination of 'decimal', 'datetime64', 'date2Date32' or 'date2String'. decimal: convert NUMERIC and DECIMAL types to Decimal when precision allows it. datetime64: convert DATETIME and TIMESTAMP types to DateTime64 instead of DateTime when precision is not 0. date2Date32: convert DATE to Date32 instead of Date. Takes precedence over date2String. date2String: convert DATE to String instead of Date. Overridden by datetime64.", 0) \ M(Bool, optimize_trivial_insert_select, false, "Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query", 0) \ @@ -707,18 +681,17 @@ class IColumn; M(UInt64, insert_shard_id, 0, "If non zero, when insert into a distributed table, the data will be inserted into the shard `insert_shard_id` synchronously. Possible values range from 1 to `shards_number` of corresponding distributed table", 0) \ \ M(Bool, collect_hash_table_stats_during_aggregation, true, "Enable collecting hash table statistics to optimize memory allocation", 0) \ - M(UInt64, max_entries_for_hash_table_stats, 10'000, "How many entries hash table statistics collected during aggregation is allowed to have", 0) \ M(UInt64, max_size_to_preallocate_for_aggregation, 100'000'000, "For how many elements it is allowed to preallocate space in all hash tables in total before aggregation", 0) \ \ + M(Bool, collect_hash_table_stats_during_joins, true, "Enable collecting hash table statistics to optimize memory allocation", 0) \ + M(UInt64, max_size_to_preallocate_for_joins, 100'000'000, "For how many elements it is allowed to preallocate space in all hash tables in total before join", 0) \ + \ M(Bool, kafka_disable_num_consumers_limit, false, "Disable limit on kafka_num_consumers that depends on the number of available CPU cores", 0) \ M(Bool, enable_software_prefetch_in_aggregation, true, "Enable use of software prefetch in aggregation", 0) \ M(Bool, allow_aggregate_partitions_independently, false, "Enable independent aggregation of partitions on separate threads when partition key suits group by key. Beneficial when number of partitions close to number of cores and partitions have roughly the same size", 0) \ M(Bool, force_aggregate_partitions_independently, false, "Force the use of optimization when it is applicable, but heuristics decided not to use it", 0) \ M(UInt64, max_number_of_partitions_for_independent_aggregation, 128, "Maximal number of partitions in table to apply optimization", 0) \ M(Float, min_hit_rate_to_use_consecutive_keys_optimization, 0.5, "Minimal hit rate of a cache which is used for consecutive keys optimization in aggregation to keep it enabled", 0) \ - /** Experimental feature for moving data between shards. */ \ - \ - M(Bool, allow_experimental_query_deduplication, false, "Experimental data deduplication for SELECT queries based on part UUIDs", 0) \ \ M(Bool, engine_file_empty_if_not_exists, false, "Allows to select data from a file engine table without file", 0) \ M(Bool, engine_file_truncate_on_insert, false, "Enables or disables truncate before insert in file engine tables", 0) \ @@ -732,6 +705,7 @@ class IColumn; M(Bool, database_replicated_always_detach_permanently, false, "Execute DETACH TABLE as DETACH TABLE PERMANENTLY if database engine is Replicated", 0) \ M(Bool, database_replicated_allow_only_replicated_engine, false, "Allow to create only Replicated tables in database with engine Replicated", 0) \ M(Bool, database_replicated_allow_replicated_engine_arguments, true, "Allow to create only Replicated tables in database with engine Replicated with explicit arguments", 0) \ + M(Bool, database_replicated_allow_heavy_create, false, "Allow long-running DDL queries (CREATE AS SELECT and POPULATE) in Replicated database engine. Note that it can block DDL queue for a long time.", 0) \ M(Bool, cloud_mode, false, "Only available in ClickHouse Cloud", 0) \ M(UInt64, cloud_mode_engine, 1, "Only available in ClickHouse Cloud", 0) \ M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result, one of: 'none', 'throw', 'null_status_on_timeout', 'never_throw', 'none_only_active', 'throw_only_active', 'null_status_on_timeout_only_active'", 0) \ @@ -750,6 +724,7 @@ class IColumn; M(Bool, optimize_group_by_function_keys, true, "Eliminates functions of other keys in GROUP BY section", 0) \ M(Bool, optimize_group_by_constant_keys, true, "Optimize GROUP BY when all keys in block are constant", 0) \ M(Bool, legacy_column_name_of_tuple_literal, false, "List all names of element of large tuple literals in their column names instead of hash. This settings exists only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher.", 0) \ + M(Bool, enable_named_columns_in_function_tuple, true, "Generate named tuples in function tuple() when all names are unique and can be treated as unquoted identifiers.", 0) \ \ M(Bool, query_plan_enable_optimizations, true, "Globally enable/disable query optimization at the query plan level", 0) \ M(UInt64, query_plan_max_optimizations_to_apply, 10000, "Limit the total number of optimizations applied to query plan. If zero, ignored. If limit reached, throw exception", 0) \ @@ -757,6 +732,7 @@ class IColumn; M(Bool, query_plan_push_down_limit, true, "Allow to move LIMITs down in the query plan", 0) \ M(Bool, query_plan_split_filter, true, "Allow to split filters in the query plan", 0) \ M(Bool, query_plan_merge_expressions, true, "Allow to merge expressions in the query plan", 0) \ + M(Bool, query_plan_merge_filters, false, "Allow to merge filters in the query plan", 0) \ M(Bool, query_plan_filter_push_down, true, "Allow to push down filter by predicate query plan step", 0) \ M(Bool, query_plan_convert_outer_join_to_inner_join, true, "Allow to convert OUTER JOIN to INNER JOIN if filter after JOIN always filters default values", 0) \ M(Bool, query_plan_optimize_prewhere, true, "Allow to push down filter to PREWHERE expression for supported storages", 0) \ @@ -898,34 +874,11 @@ class IColumn; M(Bool, allow_get_client_http_header, false, "Allow to use the function `getClientHTTPHeader` which lets to obtain a value of an the current HTTP request's header. It is not enabled by default for security reasons, because some headers, such as `Cookie`, could contain sensitive info. Note that the `X-ClickHouse-*` and `Authentication` headers are always restricted and cannot be obtained with this function.", 0) \ M(Bool, cast_string_to_dynamic_use_inference, false, "Use types inference during String to Dynamic conversion", 0) \ M(Bool, enable_blob_storage_log, true, "Write information about blob storage operations to system.blob_storage_log table", 0) \ - \ - /** Experimental functions */ \ - M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \ - M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ - M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \ - M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \ - M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \ - M(Bool, allow_experimental_variant_type, false, "Allow Variant data type", 0) \ - M(Bool, allow_experimental_dynamic_type, false, "Allow Dynamic data type", 0) \ - M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \ - M(Bool, allow_experimental_usearch_index, false, "Allows to use USearch index. Disabled by default because this feature is experimental", 0) \ - M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \ - M(UInt64, max_threads_for_annoy_index_creation, 4, "Number of threads used to build Annoy indexes (0 means all cores, not recommended)", 0) \ - M(Int64, annoy_index_search_k_nodes, -1, "SELECT queries search up to this many nodes in Annoy indexes.", 0) \ - M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \ - M(TransactionsWaitCSNMode, wait_changes_become_visible_after_commit_mode, TransactionsWaitCSNMode::WAIT_UNKNOWN, "Wait for committed changes to become actually visible in the latest snapshot", 0) \ - M(Bool, implicit_transaction, false, "If enabled and not already inside a transaction, wraps the query inside a full transaction (begin + commit or rollback)", 0) \ - M(UInt64, grace_hash_join_initial_buckets, 1, "Initial number of grace hash join buckets", 0) \ - M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \ - M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \ - M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \ - M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.", 0) ALIAS(extract_kvp_max_pairs_per_row) \ - M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \ M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0) \ M(Bool, create_index_ignore_unique, false, "Ignore UNIQUE keyword in CREATE UNIQUE INDEX. Made for SQL compatibility tests.", 0) \ M(Bool, print_pretty_type_names, true, "Print pretty type names in DESCRIBE query and toTypeName() function", 0) \ M(Bool, create_table_empty_primary_key_by_default, false, "Allow to create *MergeTree tables with empty primary key when ORDER BY and PRIMARY KEY not specified", 0) \ - M(Bool, allow_named_collection_override_by_default, true, "Allow named collections' fields override by default.", 0)\ + M(Bool, allow_named_collection_override_by_default, true, "Allow named collections' fields override by default.", 0) \ M(SQLSecurityType, default_normal_view_sql_security, SQLSecurityType::INVOKER, "Allows to set a default value for SQL SECURITY option when creating a normal view.", 0) \ M(SQLSecurityType, default_materialized_view_sql_security, SQLSecurityType::DEFINER, "Allows to set a default value for SQL SECURITY option when creating a materialized view.", 0) \ M(String, default_view_definer, "CURRENT_USER", "Allows to set a default value for DEFINER option when creating view.", 0) \ @@ -935,6 +888,81 @@ class IColumn; M(Bool, iceberg_engine_ignore_schema_evolution, false, "Ignore schema evolution in Iceberg table engine and read all data using latest schema saved on table creation. Note that it can lead to incorrect result", 0) \ M(Bool, allow_deprecated_error_prone_window_functions, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)", 0) \ M(Bool, allow_deprecated_snowflake_conversion_functions, false, "Enables deprecated functions snowflakeToDateTime[64] and dateTime[64]ToSnowflake.", 0) \ + M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \ + M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \ + M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.", 0) ALIAS(extract_kvp_max_pairs_per_row) \ + \ + \ + /* ###################################### */ \ + /* ######## EXPERIMENTAL FEATURES ####### */ \ + /* ###################################### */ \ + M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \ + M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ + M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \ + M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \ + M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \ + M(Bool, allow_experimental_variant_type, false, "Allow Variant data type", 0) \ + M(Bool, allow_experimental_dynamic_type, false, "Allow Dynamic data type", 0) \ + M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \ + M(Bool, allow_experimental_usearch_index, false, "Allows to use USearch index. Disabled by default because this feature is experimental", 0) \ + M(Bool, allow_experimental_codecs, false, "If it is set to true, allow to specify experimental compression codecs (but we don't have those yet and this option does nothing).", 0) \ + M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \ + M(UInt64, max_threads_for_annoy_index_creation, 4, "Number of threads used to build Annoy indexes (0 means all cores, not recommended)", 0) \ + M(Int64, annoy_index_search_k_nodes, -1, "SELECT queries search up to this many nodes in Annoy indexes.", 0) \ + M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \ + M(TransactionsWaitCSNMode, wait_changes_become_visible_after_commit_mode, TransactionsWaitCSNMode::WAIT_UNKNOWN, "Wait for committed changes to become actually visible in the latest snapshot", 0) \ + M(Bool, implicit_transaction, false, "If enabled and not already inside a transaction, wraps the query inside a full transaction (begin + commit or rollback)", 0) \ + M(UInt64, grace_hash_join_initial_buckets, 1, "Initial number of grace hash join buckets", 0) \ + M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \ + M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \ + \ + M(Bool, allow_statistics_optimize, false, "Allows using statistics to optimize queries", 0) ALIAS(allow_statistic_optimize) \ + M(Bool, allow_experimental_statistics, false, "Allows using statistics", 0) ALIAS(allow_experimental_statistic) \ + \ + /* Parallel replicas */ \ + M(UInt64, parallel_replicas_count, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the number of parallel replicas participating in query processing.", 0) \ + M(UInt64, parallel_replica_offset, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the index of the replica participating in query processing among parallel replicas.", 0) \ + M(String, parallel_replicas_custom_key, "", "Custom key assigning work to replicas when parallel replicas are used.", 0) \ + M(ParallelReplicasCustomKeyFilterType, parallel_replicas_custom_key_filter_type, ParallelReplicasCustomKeyFilterType::DEFAULT, "Type of filter to use with custom key for parallel replicas. default - use modulo operation on the custom key, range - use range filter on custom key using all possible values for the value type of custom key.", 0) \ + M(UInt64, parallel_replicas_custom_key_range_lower, 0, "Lower bound for the universe that the parallel replicas custom range filter is calculated over", 0) \ + M(UInt64, parallel_replicas_custom_key_range_upper, 0, "Upper bound for the universe that the parallel replicas custom range filter is calculated over. A value of 0 disables the upper bound, setting it to the max value of the custom key expression", 0) \ + M(String, cluster_for_parallel_replicas, "", "Cluster for a shard in which current server is located", 0) \ + M(UInt64, allow_experimental_parallel_reading_from_replicas, 0, "Use all the replicas from a shard for SELECT query execution. Reading is parallelized and coordinated dynamically. 0 - disabled, 1 - enabled, silently disable them in case of failure, 2 - enabled, throw an exception in case of failure", 0) \ + M(Bool, parallel_replicas_allow_in_with_subquery, true, "If true, subquery for IN will be executed on every follower replica.", 0) \ + M(Float, parallel_replicas_single_task_marks_count_multiplier, 2, "A multiplier which will be added during calculation for minimal number of marks to retrieve from coordinator. This will be applied only for remote replicas.", 0) \ + M(Bool, parallel_replicas_for_non_replicated_merge_tree, false, "If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables", 0) \ + M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \ + M(Bool, parallel_replicas_prefer_local_join, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN.", 0) \ + M(UInt64, parallel_replicas_mark_segment_size, 128, "Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing", 0) \ + \ + M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental inverted index.", 0) \ + M(Bool, allow_experimental_full_text_index, false, "If it is set to true, allow to use experimental full-text index.", 0) \ + \ + M(Bool, allow_experimental_join_condition, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y.", 0) \ + \ + /* Analyzer: It's not experimental anymore (WIP) */ \ + M(Bool, allow_experimental_analyzer, true, "Allow new query analyzer.", IMPORTANT) \ + M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \ + \ + M(Bool, allow_experimental_live_view, false, "Enable LIVE VIEW. Not mature enough.", 0) \ + M(Seconds, live_view_heartbeat_interval, 15, "The heartbeat interval in seconds to indicate live query is alive.", 0) \ + M(UInt64, max_live_view_insert_blocks_before_refresh, 64, "Limit maximum number of inserted blocks after which mergeable blocks are dropped and query is re-executed.", 0) \ + \ + M(Bool, allow_experimental_window_view, false, "Enable WINDOW VIEW. Not mature enough.", 0) \ + M(Seconds, window_view_clean_interval, 60, "The clean interval of window view in seconds to free outdated data.", 0) \ + M(Seconds, window_view_heartbeat_interval, 15, "The heartbeat interval in seconds to indicate watch query is alive.", 0) \ + M(Seconds, wait_for_window_view_fire_signal_timeout, 10, "Timeout for waiting for window view fire signal in event time processing", 0) \ + \ + M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW REFRESH ...).", 0) \ + M(Bool, stop_refreshable_materialized_views_on_startup, false, "On server startup, prevent scheduling of refreshable materialized views, as if with SYSTEM STOP VIEWS. You can manually start them with SYSTEM START VIEWS or SYSTEM START VIEW afterwards. Also applies to newly created views. Has no effect on non-refreshable materialized views.", 0) \ + \ + M(Bool, allow_experimental_database_materialized_mysql, false, "Allow to create database with Engine=MaterializedMySQL(...).", 0) \ + M(Bool, allow_experimental_database_materialized_postgresql, false, "Allow to create database with Engine=MaterializedPostgreSQL(...).", 0) \ + \ + /** Experimental feature for moving data between shards. */ \ + M(Bool, allow_experimental_query_deduplication, false, "Experimental data deduplication for SELECT queries based on part UUIDs", 0) \ + + /** End of experimental features */ // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS. @@ -987,6 +1015,7 @@ class IColumn; MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, async_insert_threads, 16) \ MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_replicated_fetches_network_bandwidth_for_server, 0) \ MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_replicated_sends_network_bandwidth_for_server, 0) \ + MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_entries_for_hash_table_stats, 10'000) \ /* ---- */ \ MAKE_OBSOLETE(M, DefaultDatabaseEngine, default_database_engine, DefaultDatabaseEngine::Atomic) \ MAKE_OBSOLETE(M, UInt64, max_pipeline_depth, 0) \ @@ -1041,6 +1070,7 @@ class IColumn; M(Bool, input_format_orc_allow_missing_columns, true, "Allow missing columns while reading ORC input formats", 0) \ M(Bool, input_format_orc_use_fast_decoder, true, "Use a faster ORC decoder implementation.", 0) \ M(Bool, input_format_orc_filter_push_down, true, "When reading ORC files, skip whole stripes or row groups based on the WHERE/PREWHERE expressions, min/max statistics or bloom filter in the ORC metadata.", 0) \ + M(Bool, input_format_orc_read_use_writer_time_zone, false, "Whether use the writer's time zone in ORC stripe for ORC row reader, the default ORC row reader's time zone is GMT.", 0) \ M(Bool, input_format_parquet_allow_missing_columns, true, "Allow missing columns while reading Parquet input formats", 0) \ M(UInt64, input_format_parquet_local_file_min_bytes_for_seek, 8192, "Min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format", 0) \ M(Bool, input_format_arrow_allow_missing_columns, true, "Allow missing columns while reading Arrow input formats", 0) \ @@ -1108,6 +1138,8 @@ class IColumn; M(Bool, input_format_tsv_crlf_end_of_line, false, "If it is set true, file function will read TSV format with \\r\\n instead of \\n.", 0) \ \ M(Bool, input_format_native_allow_types_conversion, true, "Allow data types conversion in Native input format", 0) \ + M(Bool, input_format_native_decode_types_in_binary_format, false, "Read data types in binary format instead of type names in Native input format", 0) \ + M(Bool, output_format_native_encode_types_in_binary_format, false, "Write data types in binary format instead of type names in Native output format", 0) \ \ M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic', 'best_effort' and 'best_effort_us'.", 0) \ M(DateTimeOutputFormat, date_time_output_format, FormatSettings::DateTimeOutputFormat::Simple, "Method to write DateTime to text output. Possible values: 'simple', 'iso', 'unix_timestamp'.", 0) \ @@ -1127,6 +1159,8 @@ class IColumn; M(Bool, input_format_avro_null_as_default, false, "For Avro/AvroConfluent format: insert default in case of null and non Nullable column", 0) \ M(UInt64, format_binary_max_string_size, 1_GiB, "The maximum allowed size for String in RowBinary format. It prevents allocating large amount of memory in case of corrupted data. 0 means there is no limit", 0) \ M(UInt64, format_binary_max_array_size, 1_GiB, "The maximum allowed size for Array in RowBinary format. It prevents allocating large amount of memory in case of corrupted data. 0 means there is no limit", 0) \ + M(Bool, input_format_binary_decode_types_in_binary_format, false, "Read data types in binary format instead of type names in RowBinaryWithNamesAndTypes input format", 0) \ + M(Bool, output_format_binary_encode_types_in_binary_format, false, "Write data types in binary format instead of type names in RowBinaryWithNamesAndTypes output format ", 0) \ M(URI, format_avro_schema_registry_url, "", "For AvroConfluent format: Confluent Schema Registry URL.", 0) \ \ M(Bool, output_format_json_quote_64bit_integers, true, "Controls quoting of 64-bit integers in JSON output format.", 0) \ @@ -1147,9 +1181,9 @@ class IColumn; M(UInt64, output_format_pretty_max_value_width, 10000, "Maximum width of value to display in Pretty formats. If greater - it will be cut.", 0) \ M(UInt64, output_format_pretty_max_value_width_apply_for_single_value, false, "Only cut values (see the `output_format_pretty_max_value_width` setting) when it is not a single value in a block. Otherwise output it entirely, which is useful for the `SHOW CREATE TABLE` query.", 0) \ M(UInt64Auto, output_format_pretty_color, "auto", "Use ANSI escape sequences in Pretty formats. 0 - disabled, 1 - enabled, 'auto' - enabled if a terminal.", 0) \ - M(String, output_format_pretty_grid_charset, "UTF-8", "Charset for printing grid borders. Available charsets: ASCII, UTF-8 (default one).", 0) \ - M(UInt64, output_format_pretty_display_footer_column_names, true, "Display column names in the footer if there are 999 or more rows.", 0) \ - M(UInt64, output_format_pretty_display_footer_column_names_min_rows, 50, "Sets the minimum threshold value of rows for which to enable displaying column names in the footer. 50 (default)", 0) \ + M(String, output_format_pretty_grid_charset, "UTF-8", "Charset for printing grid borders. Available charsets: ASCII, UTF-8 (default one).", 0) \ + M(UInt64, output_format_pretty_display_footer_column_names, true, "Display column names in the footer if there are 999 or more rows.", 0) \ + M(UInt64, output_format_pretty_display_footer_column_names_min_rows, 50, "Sets the minimum threshold value of rows for which to enable displaying column names in the footer. 50 (default)", 0) \ M(UInt64, output_format_parquet_row_group_size, 1000000, "Target row group size in rows.", 0) \ M(UInt64, output_format_parquet_row_group_size_bytes, 512 * 1024 * 1024, "Target row group size in bytes, before compression.", 0) \ M(Bool, output_format_parquet_string_as_string, true, "Use Parquet String type instead of Binary for String columns.", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index b0725340f46..b9b72209103 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -58,7 +58,27 @@ String ClickHouseVersion::toString() const static std::initializer_list> settings_changes_history_initializer = { {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."}, + {"output_format_binary_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in RowBinaryWithNamesAndTypes output format"}, + {"input_format_binary_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in RowBinaryWithNamesAndTypes input format"}, + {"output_format_native_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in Native output format"}, + {"input_format_native_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in Native output format"}, + {"read_in_order_use_buffering", false, true, "Use buffering before merging while reading in order of primary key"}, + {"optimize_functions_to_subcolumns", false, true, "Enable optimization by default"}, + {"enable_named_columns_in_function_tuple", false, true, "Generate named tuples in function tuple() when all names are unique and can be treated as unquoted identifiers."}, + {"input_format_json_ignore_key_case", false, false, "Ignore json key case while read json field from string."}, {"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."}, + {"collect_hash_table_stats_during_joins", false, true, "New setting."}, + {"max_size_to_preallocate_for_joins", 0, 100'000'000, "New setting."}, + {"input_format_orc_read_use_writer_time_zone", false, false, "Whether use the writer's time zone in ORC stripe for ORC row reader, the default ORC row reader's time zone is GMT."}, + {"lightweight_mutation_projection_mode", "throw", "throw", "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete."}, + {"database_replicated_allow_heavy_create", true, false, "Long-running DDL queries (CREATE AS SELECT and POPULATE) for Replicated database engine was forbidden"}, + {"query_plan_merge_filters", false, false, "Allow to merge filters in the query plan"}, + {"azure_sdk_max_retries", 10, 10, "Maximum number of retries in azure sdk"}, + {"azure_sdk_retry_initial_backoff_ms", 10, 10, "Minimal backoff between retries in azure sdk"}, + {"azure_sdk_retry_max_backoff_ms", 1000, 1000, "Maximal backoff between retries in azure sdk"}, + {"ignore_on_cluster_for_replicated_named_collections_queries", false, false, "Ignore ON CLUSTER clause for replicated named collections management queries."}, + {"postgresql_connection_attempt_timeout", 2, 2, "Allow to control 'connect_timeout' parameter of PostgreSQL connection."}, + {"postgresql_connection_pool_retries", 2, 2, "Allow to control the number of retries in PostgreSQL connection pool."} }}, {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"}, {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"}, @@ -89,7 +109,6 @@ static std::initializer_list +#include #include +#include +#include #include +#include #include #include #include @@ -115,25 +119,6 @@ constexpr auto getEnumValues(); return getEnumValues().size();\ } -enum class LoadBalancing : uint8_t -{ - /// among replicas with a minimum number of errors selected randomly - RANDOM = 0, - /// a replica is selected among the replicas with the minimum number of errors - /// with the minimum number of distinguished characters in the replica name prefix and local hostname prefix - NEAREST_HOSTNAME, - /// just like NEAREST_HOSTNAME, but it count distinguished characters in a levenshtein distance manner - HOSTNAME_LEVENSHTEIN_DISTANCE, - // replicas with the same number of errors are accessed in the same order - // as they are specified in the configuration. - IN_ORDER, - /// if first replica one has higher number of errors, - /// pick a random one from replicas with minimum number of errors - FIRST_OR_RANDOM, - // round robin across replicas with the same number of errors. - ROUND_ROBIN, -}; - DECLARE_SETTING_ENUM(LoadBalancing) DECLARE_SETTING_ENUM(JoinStrictness) @@ -204,16 +189,6 @@ DECLARE_SETTING_ENUM_WITH_RENAME(ParquetVersion, FormatSettings::ParquetVersion) DECLARE_SETTING_ENUM(LogsLevel) - -// Make it signed for compatibility with DataTypeEnum8 -enum QueryLogElementType : int8_t -{ - QUERY_START = 1, - QUERY_FINISH = 2, - EXCEPTION_BEFORE_START = 3, - EXCEPTION_WHILE_PROCESSING = 4, -}; - DECLARE_SETTING_ENUM_WITH_RENAME(LogQueriesType, QueryLogElementType) @@ -292,13 +267,6 @@ enum class StreamingHandleErrorMode : uint8_t DECLARE_SETTING_ENUM(StreamingHandleErrorMode) -enum class ShortCircuitFunctionEvaluation : uint8_t -{ - ENABLE, // Use short-circuit function evaluation for functions that are suitable for it. - FORCE_ENABLE, // Use short-circuit function evaluation for all functions. - DISABLE, // Disable short-circuit function evaluation. -}; - DECLARE_SETTING_ENUM(ShortCircuitFunctionEvaluation) enum class TransactionsWaitCSNMode : uint8_t @@ -339,6 +307,14 @@ enum class ParallelReplicasCustomKeyFilterType : uint8_t DECLARE_SETTING_ENUM(ParallelReplicasCustomKeyFilterType) +enum class LightweightMutationProjectionMode : uint8_t +{ + THROW, + DROP, +}; + +DECLARE_SETTING_ENUM(LightweightMutationProjectionMode) + DECLARE_SETTING_ENUM(LocalFSReadMethod) enum class ObjectStorageQueueMode : uint8_t @@ -359,12 +335,6 @@ DECLARE_SETTING_ENUM(ObjectStorageQueueAction) DECLARE_SETTING_ENUM(ExternalCommandStderrReaction) -enum class SchemaInferenceMode : uint8_t -{ - DEFAULT, - UNION, -}; - DECLARE_SETTING_ENUM(SchemaInferenceMode) DECLARE_SETTING_ENUM_WITH_RENAME(DateTimeOverflowBehavior, FormatSettings::DateTimeOverflowBehavior) diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp index caa8b3fdffd..7d094e2a107 100644 --- a/src/Core/SettingsFields.cpp +++ b/src/Core/SettingsFields.cpp @@ -380,15 +380,6 @@ void SettingFieldString::readBinary(ReadBuffer & in) *this = std::move(str); } -/// Unbeautiful workaround for clickhouse-keeper standalone build ("-DBUILD_STANDALONE_KEEPER=1"). -/// In this build, we don't build and link library dbms (to which SettingsField.cpp belongs) but -/// only build SettingsField.cpp. Further dependencies, e.g. DataTypeString and DataTypeMap below, -/// require building of further files for clickhouse-keeper. To keep dependencies slim, we don't do -/// that. The linker does not complain only because clickhouse-keeper does not call any of below -/// functions. A cleaner alternative would be more modular libraries, e.g. one for data types, which -/// could then be linked by the server and the linker. -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD - SettingFieldMap::SettingFieldMap(const Field & f) : value(fieldToMap(f)) {} String SettingFieldMap::toString() const @@ -428,42 +419,6 @@ void SettingFieldMap::readBinary(ReadBuffer & in) *this = map; } -#else - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -SettingFieldMap::SettingFieldMap(const Field &) : value(Map()) {} -String SettingFieldMap::toString() const -{ - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Setting of type Map not supported"); -} - - -SettingFieldMap & SettingFieldMap::operator =(const Field &) -{ - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Setting of type Map not supported"); -} - -void SettingFieldMap::parseFromString(const String &) -{ - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Setting of type Map not supported"); -} - -void SettingFieldMap::writeBinary(WriteBuffer &) const -{ - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Setting of type Map not supported"); -} - -void SettingFieldMap::readBinary(ReadBuffer &) -{ - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Setting of type Map not supported"); -} - -#endif - namespace { char stringToChar(const String & str) diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h index 19809348921..266141815e3 100644 --- a/src/Core/SettingsFields.h +++ b/src/Core/SettingsFields.h @@ -247,12 +247,6 @@ struct SettingFieldString void readBinary(ReadBuffer & in); }; -#ifdef CLICKHOUSE_KEEPER_STANDALONE_BUILD -#define NORETURN [[noreturn]] -#else -#define NORETURN -#endif - struct SettingFieldMap { public: @@ -269,11 +263,11 @@ public: operator const Map &() const { return value; } /// NOLINT explicit operator Field() const { return value; } - NORETURN String toString() const; - NORETURN void parseFromString(const String & str); + String toString() const; + void parseFromString(const String & str); - NORETURN void writeBinary(WriteBuffer & out) const; - NORETURN void readBinary(ReadBuffer & in); + void writeBinary(WriteBuffer & out) const; + void readBinary(ReadBuffer & in); }; #undef NORETURN diff --git a/src/Core/ShortCircuitFunctionEvaluation.h b/src/Core/ShortCircuitFunctionEvaluation.h new file mode 100644 index 00000000000..3785efe8015 --- /dev/null +++ b/src/Core/ShortCircuitFunctionEvaluation.h @@ -0,0 +1,15 @@ +#pragma once + +#include + +namespace DB +{ + +enum class ShortCircuitFunctionEvaluation : uint8_t +{ + ENABLE, // Use short-circuit function evaluation for functions that are suitable for it. + FORCE_ENABLE, // Use short-circuit function evaluation for all functions. + DISABLE, // Disable short-circuit function evaluation. +}; + +} diff --git a/src/Core/SortDescription.h b/src/Core/SortDescription.h index 33fd6017599..5c6f3e3150a 100644 --- a/src/Core/SortDescription.h +++ b/src/Core/SortDescription.h @@ -1,15 +1,16 @@ #pragma once -#include -#include -#include -#include + #include -#include #include #include #include +#include +#include +#include +#include + namespace DB { diff --git a/src/Core/tests/gtest_fields_binary_enciding.cpp b/src/Core/tests/gtest_fields_binary_enciding.cpp new file mode 100644 index 00000000000..087caf746bb --- /dev/null +++ b/src/Core/tests/gtest_fields_binary_enciding.cpp @@ -0,0 +1,65 @@ +#include +#include +#include +#include + +using namespace DB; + +namespace DB::ErrorCodes +{ + extern const int UNSUPPORTED_METHOD; +} + + +void check(const Field & field) +{ +// std::cerr << "Check " << toString(field) << "\n"; + WriteBufferFromOwnString ostr; + encodeField(field, ostr); + ReadBufferFromString istr(ostr.str()); + Field decoded_field = decodeField(istr); + ASSERT_TRUE(istr.eof()); + ASSERT_EQ(field, decoded_field); +} + +GTEST_TEST(FieldBinaryEncoding, EncodeAndDecode) +{ + check(Null()); + check(POSITIVE_INFINITY); + check(NEGATIVE_INFINITY); + check(true); + check(UInt64(42)); + check(Int64(-42)); + check(UInt128(42)); + check(Int128(-42)); + check(UInt256(42)); + check(Int256(-42)); + check(UUID(42)); + check(IPv4(42)); + check(IPv6(42)); + check(Float64(42.42)); + check(String("Hello, World!")); + check(Array({Field(UInt64(42)), Field(UInt64(43))})); + check(Tuple({Field(UInt64(42)), Field(Null()), Field(UUID(42)), Field(String("Hello, World!"))})); + check(Map({Tuple{Field(UInt64(42)), Field(String("str_42"))}, Tuple{Field(UInt64(43)), Field(String("str_43"))}})); + check(Object({{String("key_1"), Field(UInt64(42))}, {String("key_2"), Field(UInt64(43))}})); + check(DecimalField(4242, 3)); + check(DecimalField(4242, 3)); + check(DecimalField(Int128(4242), 3)); + check(DecimalField(Int256(4242), 3)); + check(AggregateFunctionStateData{.name="some_name", .data="some_data"}); + try + { + check(CustomType()); + } + catch (const Exception & e) + { + ASSERT_EQ(e.code(), ErrorCodes::UNSUPPORTED_METHOD); + } + + check(Array({ + Tuple({Field(UInt64(42)), Map({Tuple{Field(UInt64(42)), Field(String("str_42"))}, Tuple{Field(UInt64(43)), Field(String("str_43"))}}), Field(UUID(42)), Field(String("Hello, World!"))}), + Tuple({Field(UInt64(43)), Map({Tuple{Field(UInt64(43)), Field(String("str_43"))}, Tuple{Field(UInt64(44)), Field(String("str_44"))}}), Field(UUID(43)), Field(String("Hello, World 2!"))}) + })); +} + diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index b2c425ceb79..9b132c3e199 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -502,9 +503,7 @@ private: if (collectCrashLog) collectCrashLog(sig, thread_num, query_id, stack_trace); -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD Context::getGlobalContextInstance()->handleCrash(); -#endif /// Send crash report to developers (if configured) if (sig != SanitizerTrap) @@ -533,8 +532,6 @@ private: } } - /// ClickHouse Keeper does not link to some parts of Settings. -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD /// List changed settings. if (!query_id.empty()) { @@ -549,7 +546,6 @@ private: LOG_FATAL(log, "Changed settings: {}", changed_settings); } } -#endif /// When everything is done, we will try to send these error messages to the client. if (thread_ptr) diff --git a/src/Daemon/SentryWriter.cpp b/src/Daemon/SentryWriter.cpp index 9479dd65730..c51a1100639 100644 --- a/src/Daemon/SentryWriter.cpp +++ b/src/Daemon/SentryWriter.cpp @@ -19,7 +19,7 @@ #include "config.h" #include -#if USE_SENTRY && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_SENTRY # include # include diff --git a/src/DataTypes/DataTypeAggregateFunction.h b/src/DataTypes/DataTypeAggregateFunction.h index 8b4b3d6ee4c..52ed151107e 100644 --- a/src/DataTypes/DataTypeAggregateFunction.h +++ b/src/DataTypes/DataTypeAggregateFunction.h @@ -25,7 +25,6 @@ private: mutable std::optional version; String getNameImpl(bool with_version) const; - size_t getVersion() const; public: static constexpr bool is_parametric = true; @@ -39,6 +38,8 @@ public: { } + size_t getVersion() const; + String getFunctionName() const; AggregateFunctionPtr getFunction() const { return function; } diff --git a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp index cae9622bcb9..d3b3adc4965 100644 --- a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp +++ b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp @@ -165,6 +165,19 @@ static std::pair create(const ASTPtr & argum return std::make_pair(storage_type, std::make_unique(std::move(custom_name), nullptr)); } +String DataTypeCustomSimpleAggregateFunction::getFunctionName() const +{ + return function->getName(); +} + +DataTypePtr createSimpleAggregateFunctionType(const AggregateFunctionPtr & function, const DataTypes & argument_types, const Array & parameters) +{ + auto custom_desc = std::make_unique( + std::make_unique(function, argument_types, parameters)); + + return DataTypeFactory::instance().getCustom(std::move(custom_desc)); +} + void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory) { factory.registerDataTypeCustom("SimpleAggregateFunction", create); diff --git a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h index bdabb465fe5..303da86979a 100644 --- a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h +++ b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h @@ -40,8 +40,13 @@ public: : function(function_), argument_types(argument_types_), parameters(parameters_) {} AggregateFunctionPtr getFunction() const { return function; } + String getFunctionName() const; + const DataTypes & getArgumentsDataTypes() const { return argument_types; } + const Array & getParameters() const { return parameters; } String getName() const override; static void checkSupportedFunctions(const AggregateFunctionPtr & function); }; +DataTypePtr createSimpleAggregateFunctionType(const AggregateFunctionPtr & function, const DataTypes & argument_types, const Array & parameters); + } diff --git a/src/DataTypes/DataTypeDecimalBase.cpp b/src/DataTypes/DataTypeDecimalBase.cpp index 62218694924..2adfda5bb27 100644 --- a/src/DataTypes/DataTypeDecimalBase.cpp +++ b/src/DataTypes/DataTypeDecimalBase.cpp @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/DataTypes/DataTypeDynamic.cpp b/src/DataTypes/DataTypeDynamic.cpp index c920e69c13b..a1b1f8325f0 100644 --- a/src/DataTypes/DataTypeDynamic.cpp +++ b/src/DataTypes/DataTypeDynamic.cpp @@ -2,9 +2,11 @@ #include #include #include +#include #include #include #include +#include #include #include #include @@ -69,7 +71,7 @@ static DataTypePtr create(const ASTPtr & arguments) auto * literal = argument->arguments->children[1]->as(); - if (!literal || literal->value.getType() != Field::Types::UInt64 || literal->value.get() == 0 || literal->value.get() > 255) + if (!literal || literal->value.getType() != Field::Types::UInt64 || literal->value.get() == 0 || literal->value.get() > ColumnVariant::MAX_NESTED_COLUMNS) throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "'max_types' argument for Dynamic type should be a positive integer between 1 and 255"); return std::make_shared(literal->value.get()); @@ -110,28 +112,58 @@ std::unique_ptr DataTypeDynamic::getDynamicSubcolumnDa } /// Extract nested subcolumn of requested dynamic subcolumn if needed. - if (!subcolumn_nested_name.empty()) + /// If requested subcolumn is null map, it's processed separately as there is no Nullable type yet. + bool is_null_map_subcolumn = subcolumn_nested_name == "null"; + if (is_null_map_subcolumn) + { + res->type = std::make_shared(); + } + else if (!subcolumn_nested_name.empty()) { res = getSubcolumnData(subcolumn_nested_name, *res, throw_if_null); if (!res) return nullptr; } - res->serialization = std::make_shared(res->serialization, subcolumn_type->getName()); - res->type = makeNullableOrLowCardinalityNullableSafe(res->type); + res->serialization = std::make_shared(res->serialization, subcolumn_type->getName(), is_null_map_subcolumn); + /// Make resulting subcolumn Nullable only if type subcolumn can be inside Nullable or can be LowCardinality(Nullable()). + bool make_subcolumn_nullable = subcolumn_type->canBeInsideNullable() || subcolumn_type->lowCardinality(); + if (!is_null_map_subcolumn && make_subcolumn_nullable) + res->type = makeNullableOrLowCardinalityNullableSafe(res->type); + if (data.column) { if (discriminator) { - /// Provided Dynamic column has subcolumn of this type, we should use VariantSubcolumnCreator to + /// Provided Dynamic column has subcolumn of this type, we should use VariantSubcolumnCreator/VariantNullMapSubcolumnCreator to /// create full subcolumn from variant according to discriminators. const auto & variant_column = assert_cast(*data.column).getVariantColumn(); - auto creator = SerializationVariantElement::VariantSubcolumnCreator(variant_column.getLocalDiscriminatorsPtr(), "", *discriminator, variant_column.localDiscriminatorByGlobal(*discriminator)); - res->column = creator.create(res->column); + std::unique_ptr creator; + if (is_null_map_subcolumn) + creator = std::make_unique( + variant_column.getLocalDiscriminatorsPtr(), + "", + *discriminator, + variant_column.localDiscriminatorByGlobal(*discriminator)); + else + creator = std::make_unique( + variant_column.getLocalDiscriminatorsPtr(), + "", + *discriminator, + variant_column.localDiscriminatorByGlobal(*discriminator), + make_subcolumn_nullable); + res->column = creator->create(res->column); + } + /// Provided Dynamic column doesn't have subcolumn of this type, just create column filled with default values. + else if (is_null_map_subcolumn) + { + /// Fill null map with 1 when there is no such Dynamic subcolumn. + auto column = ColumnUInt8::create(); + assert_cast(*column).getData().resize_fill(data.column->size(), 1); + res->column = std::move(column); } else { - /// Provided Dynamic column doesn't have subcolumn of this type, just create column filled with default values. auto column = res->type->createColumn(); column->insertManyDefaults(data.column->size()); res->column = std::move(column); diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp index 8c8f9999ada..07dc4395bfe 100644 --- a/src/DataTypes/DataTypeFactory.cpp +++ b/src/DataTypes/DataTypeFactory.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff --git a/src/DataTypes/DataTypeNested.h b/src/DataTypes/DataTypeNested.h index 1ad06477a6e..102e6c293cc 100644 --- a/src/DataTypes/DataTypeNested.h +++ b/src/DataTypes/DataTypeNested.h @@ -19,6 +19,8 @@ public: } String getName() const override; + const DataTypes & getElements() const { return elems; } + const Names & getNames() const { return names; } }; DataTypePtr createNested(const DataTypes & types, const Names & names); diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp index a4c8ed1a241..e96937d522d 100644 --- a/src/DataTypes/DataTypeTuple.cpp +++ b/src/DataTypes/DataTypeTuple.cpp @@ -32,7 +32,7 @@ namespace ErrorCodes extern const int NOT_FOUND_COLUMN_IN_BLOCK; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH; - extern const int ILLEGAL_INDEX; + extern const int ARGUMENT_OUT_OF_BOUND; extern const int LOGICAL_ERROR; } @@ -286,7 +286,7 @@ std::optional DataTypeTuple::tryGetPositionByName(const String & name) c String DataTypeTuple::getNameByPosition(size_t i) const { if (i == 0 || i > names.size()) - throw Exception(ErrorCodes::ILLEGAL_INDEX, "Index of tuple element ({}) if out range ([1, {}])", i, names.size()); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Index of tuple element ({}) is out range ([1, {}])", i, names.size()); return names[i - 1]; } diff --git a/src/DataTypes/DataTypesBinaryEncoding.cpp b/src/DataTypes/DataTypesBinaryEncoding.cpp new file mode 100644 index 00000000000..bd994e313ba --- /dev/null +++ b/src/DataTypes/DataTypesBinaryEncoding.cpp @@ -0,0 +1,705 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNSUPPORTED_METHOD; + extern const int INCORRECT_DATA; +} + +namespace +{ + +enum class BinaryTypeIndex : uint8_t +{ + Nothing = 0x00, + UInt8 = 0x01, + UInt16 = 0x02, + UInt32 = 0x03, + UInt64 = 0x04, + UInt128 = 0x05, + UInt256 = 0x06, + Int8 = 0x07, + Int16 = 0x08, + Int32 = 0x09, + Int64 = 0x0A, + Int128 = 0x0B, + Int256 = 0x0C, + Float32 = 0x0D, + Float64 = 0x0E, + Date = 0x0F, + Date32 = 0x10, + DateTimeUTC = 0x11, + DateTimeWithTimezone = 0x12, + DateTime64UTC = 0x13, + DateTime64WithTimezone = 0x14, + String = 0x15, + FixedString = 0x16, + Enum8 = 0x17, + Enum16 = 0x18, + Decimal32 = 0x19, + Decimal64 = 0x1A, + Decimal128 = 0x1B, + Decimal256 = 0x1C, + UUID = 0x1D, + Array = 0x1E, + UnnamedTuple = 0x1F, + NamedTuple = 0x20, + Set = 0x21, + Interval = 0x22, + Nullable = 0x23, + Function = 0x24, + AggregateFunction = 0x25, + LowCardinality = 0x26, + Map = 0x27, + IPv4 = 0x28, + IPv6 = 0x29, + Variant = 0x2A, + Dynamic = 0x2B, + Custom = 0x2C, + Bool = 0x2D, + SimpleAggregateFunction = 0x2E, + Nested = 0x2F, +}; + +BinaryTypeIndex getBinaryTypeIndex(const DataTypePtr & type) +{ + /// By default custom types don't have their own BinaryTypeIndex. + if (type->hasCustomName()) + { + /// Some widely used custom types have separate BinaryTypeIndex for better serialization. + /// Right now it's Bool, SimpleAggregateFunction and Nested types. + /// TODO: Consider adding BinaryTypeIndex for more custom types. + + if (isBool(type)) + return BinaryTypeIndex::Bool; + + if (typeid_cast(type->getCustomName())) + return BinaryTypeIndex::SimpleAggregateFunction; + + if (isNested(type)) + return BinaryTypeIndex::Nested; + + return BinaryTypeIndex::Custom; + } + + switch (type->getTypeId()) + { + case TypeIndex::Nothing: + return BinaryTypeIndex::Nothing; + case TypeIndex::UInt8: + return BinaryTypeIndex::UInt8; + case TypeIndex::UInt16: + return BinaryTypeIndex::UInt16; + case TypeIndex::UInt32: + return BinaryTypeIndex::UInt32; + case TypeIndex::UInt64: + return BinaryTypeIndex::UInt64; + case TypeIndex::UInt128: + return BinaryTypeIndex::UInt128; + case TypeIndex::UInt256: + return BinaryTypeIndex::UInt256; + case TypeIndex::Int8: + return BinaryTypeIndex::Int8; + case TypeIndex::Int16: + return BinaryTypeIndex::Int16; + case TypeIndex::Int32: + return BinaryTypeIndex::Int32; + case TypeIndex::Int64: + return BinaryTypeIndex::Int64; + case TypeIndex::Int128: + return BinaryTypeIndex::Int128; + case TypeIndex::Int256: + return BinaryTypeIndex::Int256; + case TypeIndex::Float32: + return BinaryTypeIndex::Float32; + case TypeIndex::Float64: + return BinaryTypeIndex::Float64; + case TypeIndex::Date: + return BinaryTypeIndex::Date; + case TypeIndex::Date32: + return BinaryTypeIndex::Date32; + case TypeIndex::DateTime: + if (assert_cast(*type).hasExplicitTimeZone()) + return BinaryTypeIndex::DateTimeWithTimezone; + return BinaryTypeIndex::DateTimeUTC; + case TypeIndex::DateTime64: + if (assert_cast(*type).hasExplicitTimeZone()) + return BinaryTypeIndex::DateTime64WithTimezone; + return BinaryTypeIndex::DateTime64UTC; + case TypeIndex::String: + return BinaryTypeIndex::String; + case TypeIndex::FixedString: + return BinaryTypeIndex::FixedString; + case TypeIndex::Enum8: + return BinaryTypeIndex::Enum8; + case TypeIndex::Enum16: + return BinaryTypeIndex::Enum16; + case TypeIndex::Decimal32: + return BinaryTypeIndex::Decimal32; + case TypeIndex::Decimal64: + return BinaryTypeIndex::Decimal64; + case TypeIndex::Decimal128: + return BinaryTypeIndex::Decimal128; + case TypeIndex::Decimal256: + return BinaryTypeIndex::Decimal256; + case TypeIndex::UUID: + return BinaryTypeIndex::UUID; + case TypeIndex::Array: + return BinaryTypeIndex::Array; + case TypeIndex::Tuple: + { + const auto & tuple_type = assert_cast(*type); + if (tuple_type.haveExplicitNames()) + return BinaryTypeIndex::NamedTuple; + return BinaryTypeIndex::UnnamedTuple; + } + case TypeIndex::Set: + return BinaryTypeIndex::Set; + case TypeIndex::Interval: + return BinaryTypeIndex::Interval; + case TypeIndex::Nullable: + return BinaryTypeIndex::Nullable; + case TypeIndex::Function: + return BinaryTypeIndex::Function; + case TypeIndex::AggregateFunction: + return BinaryTypeIndex::AggregateFunction; + case TypeIndex::LowCardinality: + return BinaryTypeIndex::LowCardinality; + case TypeIndex::Map: + return BinaryTypeIndex::Map; + case TypeIndex::Object: + /// Object type will be deprecated and replaced by new implementation. No need to support it here. + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Binary encoding of type Object is not supported"); + case TypeIndex::IPv4: + return BinaryTypeIndex::IPv4; + case TypeIndex::IPv6: + return BinaryTypeIndex::IPv6; + case TypeIndex::Variant: + return BinaryTypeIndex::Variant; + case TypeIndex::Dynamic: + return BinaryTypeIndex::Dynamic; + /// JSONPaths is used only during schema inference and cannot be used anywhere else. + case TypeIndex::JSONPaths: + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Binary encoding of type JSONPaths is not supported"); + } +} + +template +void encodeEnumValues(const DataTypePtr & type, WriteBuffer & buf) +{ + const auto & enum_type = assert_cast &>(*type); + const auto & values = enum_type.getValues(); + writeVarUInt(values.size(), buf); + for (const auto & [name, value] : values) + { + writeStringBinary(name, buf); + writeBinaryLittleEndian(value, buf); + } +} + +template +DataTypePtr decodeEnum(ReadBuffer & buf) +{ + typename DataTypeEnum::Values values; + size_t size; + readVarUInt(size, buf); + for (size_t i = 0; i != size; ++i) + { + String name; + readStringBinary(name, buf); + T value; + readBinaryLittleEndian(value, buf); + values.emplace_back(name, value); + } + + return std::make_shared>(values); +} + +template +void encodeDecimal(const DataTypePtr & type, WriteBuffer & buf) +{ + const auto & decimal_type = assert_cast &>(*type); + /// Both precision and scale should be less than 76, so we can decode it in 1 byte. + writeBinary(UInt8(decimal_type.getPrecision()), buf); + writeBinary(UInt8(decimal_type.getScale()), buf); +} + +template +DataTypePtr decodeDecimal(ReadBuffer & buf) +{ + UInt8 precision; + readBinary(precision, buf); + UInt8 scale; + readBinary(scale, buf); + return std::make_shared>(precision, scale); +} + +void encodeAggregateFunction(const String & function_name, const Array & parameters, const DataTypes & arguments_types, WriteBuffer & buf) +{ + writeStringBinary(function_name, buf); + writeVarUInt(parameters.size(), buf); + for (const auto & param : parameters) + encodeField(param, buf); + writeVarUInt(arguments_types.size(), buf); + for (const auto & argument_type : arguments_types) + encodeDataType(argument_type, buf); +} + +std::tuple decodeAggregateFunction(ReadBuffer & buf) +{ + String function_name; + readStringBinary(function_name, buf); + size_t num_parameters; + readVarUInt(num_parameters, buf); + Array parameters; + parameters.reserve(num_parameters); + for (size_t i = 0; i != num_parameters; ++i) + parameters.push_back(decodeField(buf)); + size_t num_arguments; + readVarUInt(num_arguments, buf); + DataTypes arguments_types; + arguments_types.reserve(num_arguments); + for (size_t i = 0; i != num_arguments; ++i) + arguments_types.push_back(decodeDataType(buf)); + AggregateFunctionProperties properties; + auto action = NullsAction::EMPTY; + auto function = AggregateFunctionFactory::instance().get(function_name, action, arguments_types, parameters, properties); + return {function, parameters, arguments_types}; +} + +} + +void encodeDataType(const DataTypePtr & type, WriteBuffer & buf) +{ + /// First, write the BinaryTypeIndex byte. + auto binary_type_index = getBinaryTypeIndex(type); + buf.write(UInt8(binary_type_index)); + /// Then, write additional information depending on the data type. + switch (binary_type_index) + { + case BinaryTypeIndex::DateTimeWithTimezone: + { + const auto & datetime_type = assert_cast(*type); + writeStringBinary(datetime_type.getTimeZone().getTimeZone(), buf); + break; + } + case BinaryTypeIndex::DateTime64UTC: + { + const auto & datetime64_type = assert_cast(*type); + /// Maximum scale for DateTime64 is 9, so we can write it as 1 byte. + buf.write(UInt8(datetime64_type.getScale())); + break; + } + case BinaryTypeIndex::DateTime64WithTimezone: + { + const auto & datetime64_type = assert_cast(*type); + buf.write(UInt8(datetime64_type.getScale())); + writeStringBinary(datetime64_type.getTimeZone().getTimeZone(), buf); + break; + } + case BinaryTypeIndex::FixedString: + { + const auto & fixed_string_type = assert_cast(*type); + writeVarUInt(fixed_string_type.getN(), buf); + break; + } + case BinaryTypeIndex::Enum8: + { + encodeEnumValues(type, buf); + break; + } + case BinaryTypeIndex::Enum16: + { + encodeEnumValues(type, buf); + break; + } + case BinaryTypeIndex::Decimal32: + { + encodeDecimal(type, buf); + break; + } + case BinaryTypeIndex::Decimal64: + { + encodeDecimal(type, buf); + break; + } + case BinaryTypeIndex::Decimal128: + { + encodeDecimal(type, buf); + break; + } + case BinaryTypeIndex::Decimal256: + { + encodeDecimal(type, buf); + break; + } + case BinaryTypeIndex::Array: + { + const auto & array_type = assert_cast(*type); + encodeDataType(array_type.getNestedType(), buf); + break; + } + case BinaryTypeIndex::NamedTuple: + { + const auto & tuple_type = assert_cast(*type); + const auto & types = tuple_type.getElements(); + const auto & names = tuple_type.getElementNames(); + writeVarUInt(types.size(), buf); + for (size_t i = 0; i != types.size(); ++i) + { + writeStringBinary(names[i], buf); + encodeDataType(types[i], buf); + } + break; + } + case BinaryTypeIndex::UnnamedTuple: + { + const auto & tuple_type = assert_cast(*type); + const auto & element_types = tuple_type.getElements(); + writeVarUInt(element_types.size(), buf); + for (const auto & element_type : element_types) + encodeDataType(element_type, buf); + break; + } + case BinaryTypeIndex::Interval: + { + const auto & interval_type = assert_cast(*type); + writeBinary(UInt8(interval_type.getKind().kind), buf); + break; + } + case BinaryTypeIndex::Nullable: + { + const auto & nullable_type = assert_cast(*type); + encodeDataType(nullable_type.getNestedType(), buf); + break; + } + case BinaryTypeIndex::Function: + { + const auto & function_type = assert_cast(*type); + const auto & arguments_types = function_type.getArgumentTypes(); + const auto & return_type = function_type.getReturnType(); + writeVarUInt(arguments_types.size(), buf); + for (const auto & argument_type : arguments_types) + encodeDataType(argument_type, buf); + encodeDataType(return_type, buf); + break; + } + case BinaryTypeIndex::LowCardinality: + { + const auto & low_cardinality_type = assert_cast(*type); + encodeDataType(low_cardinality_type.getDictionaryType(), buf); + break; + } + case BinaryTypeIndex::Map: + { + const auto & map_type = assert_cast(*type); + encodeDataType(map_type.getKeyType(), buf); + encodeDataType(map_type.getValueType(), buf); + break; + } + case BinaryTypeIndex::Variant: + { + const auto & variant_type = assert_cast(*type); + const auto & variants = variant_type.getVariants(); + writeVarUInt(variants.size(), buf); + for (const auto & variant : variants) + encodeDataType(variant, buf); + break; + } + case BinaryTypeIndex::Dynamic: + { + const auto & dynamic_type = assert_cast(*type); + /// Maximum number of dynamic types is 255, we can write it as 1 byte. + writeBinary(UInt8(dynamic_type.getMaxDynamicTypes()), buf); + break; + } + case BinaryTypeIndex::AggregateFunction: + { + const auto & aggregate_function_type = assert_cast(*type); + writeVarUInt(aggregate_function_type.getVersion(), buf); + encodeAggregateFunction(aggregate_function_type.getFunctionName(), aggregate_function_type.getParameters(), aggregate_function_type.getArgumentsDataTypes(), buf); + break; + } + case BinaryTypeIndex::SimpleAggregateFunction: + { + const auto & simple_aggregate_function_type = assert_cast(*type->getCustomName()); + encodeAggregateFunction(simple_aggregate_function_type.getFunctionName(), simple_aggregate_function_type.getParameters(), simple_aggregate_function_type.getArgumentsDataTypes(), buf); + break; + } + case BinaryTypeIndex::Nested: + { + const auto & nested_type = assert_cast(*type->getCustomName()); + const auto & elements = nested_type.getElements(); + const auto & names = nested_type.getNames(); + writeVarUInt(elements.size(), buf); + for (size_t i = 0; i != elements.size(); ++i) + { + writeStringBinary(names[i], buf); + encodeDataType(elements[i], buf); + } + break; + } + case BinaryTypeIndex::Custom: + { + const auto & type_name = type->getName(); + writeStringBinary(type_name, buf); + break; + } + default: + break; + } +} + +String encodeDataType(const DataTypePtr & type) +{ + WriteBufferFromOwnString buf; + encodeDataType(type, buf); + return buf.str(); +} + +DataTypePtr decodeDataType(ReadBuffer & buf) +{ + UInt8 type; + readBinary(type, buf); + switch (BinaryTypeIndex(type)) + { + case BinaryTypeIndex::Nothing: + return std::make_shared(); + case BinaryTypeIndex::UInt8: + return std::make_shared(); + case BinaryTypeIndex::Bool: + return DataTypeFactory::instance().get("Bool"); + case BinaryTypeIndex::UInt16: + return std::make_shared(); + case BinaryTypeIndex::UInt32: + return std::make_shared(); + case BinaryTypeIndex::UInt64: + return std::make_shared(); + case BinaryTypeIndex::UInt128: + return std::make_shared(); + case BinaryTypeIndex::UInt256: + return std::make_shared(); + case BinaryTypeIndex::Int8: + return std::make_shared(); + case BinaryTypeIndex::Int16: + return std::make_shared(); + case BinaryTypeIndex::Int32: + return std::make_shared(); + case BinaryTypeIndex::Int64: + return std::make_shared(); + case BinaryTypeIndex::Int128: + return std::make_shared(); + case BinaryTypeIndex::Int256: + return std::make_shared(); + case BinaryTypeIndex::Float32: + return std::make_shared(); + case BinaryTypeIndex::Float64: + return std::make_shared(); + case BinaryTypeIndex::Date: + return std::make_shared(); + case BinaryTypeIndex::Date32: + return std::make_shared(); + case BinaryTypeIndex::DateTimeUTC: + return std::make_shared(); + case BinaryTypeIndex::DateTimeWithTimezone: + { + String time_zone; + readStringBinary(time_zone, buf); + return std::make_shared(time_zone); + } + case BinaryTypeIndex::DateTime64UTC: + { + UInt8 scale; + readBinary(scale, buf); + return std::make_shared(scale); + } + case BinaryTypeIndex::DateTime64WithTimezone: + { + UInt8 scale; + readBinary(scale, buf); + String time_zone; + readStringBinary(time_zone, buf); + return std::make_shared(scale, time_zone); + } + case BinaryTypeIndex::String: + return std::make_shared(); + case BinaryTypeIndex::FixedString: + { + UInt64 size; + readVarUInt(size, buf); + return std::make_shared(size); + } + case BinaryTypeIndex::Enum8: + return decodeEnum(buf); + case BinaryTypeIndex::Enum16: + return decodeEnum(buf); + case BinaryTypeIndex::Decimal32: + return decodeDecimal(buf); + case BinaryTypeIndex::Decimal64: + return decodeDecimal(buf); + case BinaryTypeIndex::Decimal128: + return decodeDecimal(buf); + case BinaryTypeIndex::Decimal256: + return decodeDecimal(buf); + case BinaryTypeIndex::UUID: + return std::make_shared(); + case BinaryTypeIndex::Array: + return std::make_shared(decodeDataType(buf)); + case BinaryTypeIndex::NamedTuple: + { + size_t size; + readVarUInt(size, buf); + DataTypes elements; + elements.reserve(size); + Names names; + names.reserve(size); + for (size_t i = 0; i != size; ++i) + { + names.emplace_back(); + readStringBinary(names.back(), buf); + elements.push_back(decodeDataType(buf)); + } + + return std::make_shared(elements, names); + } + case BinaryTypeIndex::UnnamedTuple: + { + size_t size; + readVarUInt(size, buf); + DataTypes elements; + elements.reserve(size); + for (size_t i = 0; i != size; ++i) + elements.push_back(decodeDataType(buf)); + return std::make_shared(elements); + } + case BinaryTypeIndex::Set: + return std::make_shared(); + case BinaryTypeIndex::Interval: + { + UInt8 kind; + readBinary(kind, buf); + return std::make_shared(IntervalKind(IntervalKind::Kind(kind))); + } + case BinaryTypeIndex::Nullable: + return std::make_shared(decodeDataType(buf)); + case BinaryTypeIndex::Function: + { + size_t arguments_size; + readVarUInt(arguments_size, buf); + DataTypes arguments; + arguments.reserve(arguments_size); + for (size_t i = 0; i != arguments_size; ++i) + arguments.push_back(decodeDataType(buf)); + auto return_type = decodeDataType(buf); + return std::make_shared(arguments, return_type); + } + case BinaryTypeIndex::LowCardinality: + return std::make_shared(decodeDataType(buf)); + case BinaryTypeIndex::Map: + { + auto key_type = decodeDataType(buf); + auto value_type = decodeDataType(buf); + return std::make_shared(key_type, value_type); + } + case BinaryTypeIndex::IPv4: + return std::make_shared(); + case BinaryTypeIndex::IPv6: + return std::make_shared(); + case BinaryTypeIndex::Variant: + { + size_t size; + readVarUInt(size, buf); + DataTypes variants; + variants.reserve(size); + for (size_t i = 0; i != size; ++i) + variants.push_back(decodeDataType(buf)); + return std::make_shared(variants); + } + case BinaryTypeIndex::Dynamic: + { + UInt8 max_dynamic_types; + readBinary(max_dynamic_types, buf); + return std::make_shared(max_dynamic_types); + } + case BinaryTypeIndex::AggregateFunction: + { + size_t version; + readVarUInt(version, buf); + const auto & [function, parameters, arguments_types] = decodeAggregateFunction(buf); + return std::make_shared(function, arguments_types, parameters, version); + } + case BinaryTypeIndex::SimpleAggregateFunction: + { + const auto & [function, parameters, arguments_types] = decodeAggregateFunction(buf); + return createSimpleAggregateFunctionType(function, arguments_types, parameters); + } + case BinaryTypeIndex::Nested: + { + size_t size; + readVarUInt(size, buf); + Names names; + names.reserve(size); + DataTypes elements; + elements.reserve(size); + for (size_t i = 0; i != size; ++i) + { + names.emplace_back(); + readStringBinary(names.back(), buf); + elements.push_back(decodeDataType(buf)); + } + + return createNested(elements, names); + } + case BinaryTypeIndex::Custom: + { + String type_name; + readStringBinary(type_name, buf); + return DataTypeFactory::instance().get(type_name); + } + } + + throw Exception(ErrorCodes::INCORRECT_DATA, "Unknown type code: {0:#04x}", UInt64(type)); +} + +DataTypePtr decodeDataType(const String & data) +{ + ReadBufferFromString buf(data); + return decodeDataType(buf); +} + +} diff --git a/src/DataTypes/DataTypesBinaryEncoding.h b/src/DataTypes/DataTypesBinaryEncoding.h new file mode 100644 index 00000000000..d02e7f85942 --- /dev/null +++ b/src/DataTypes/DataTypesBinaryEncoding.h @@ -0,0 +1,118 @@ +#pragma once + +#include + +namespace DB +{ + +/** + +Binary encoding for ClickHouse data types: +|------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| ClickHouse data type | Binary encoding | +|------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Nothing | 0x00 | +| UInt8 | 0x01 | +| UInt16 | 0x02 | +| UInt32 | 0x03 | +| UInt64 | 0x04 | +| UInt128 | 0x05 | +| UInt256 | 0x06 | +| Int8 | 0x07 | +| Int16 | 0x08 | +| Int32 | 0x09 | +| Int64 | 0x0A | +| Int128 | 0x0B | +| Int256 | 0x0C | +| Float32 | 0x0D | +| Float64 | 0x0E | +| Date | 0x0F | +| Date32 | 0x10 | +| DateTime | 0x11 | +| DateTime(time_zone) | 0x12 | +| DateTime64(P) | 0x13 | +| DateTime64(P, time_zone) | 0x14 | +| String | 0x15 | +| FixedString(N) | 0x16 | +| Enum8 | 0x17... | +| Enum16 | 0x18...> | +| Decimal32(P, S) | 0x19 | +| Decimal64(P, S) | 0x1A | +| Decimal128(P, S) | 0x1B | +| Decimal256(P, S) | 0x1C | +| UUID | 0x1D | +| Array(T) | 0x1E | +| Tuple(T1, ..., TN) | 0x1F... | +| Tuple(name1 T1, ..., nameN TN) | 0x20... | +| Set | 0x21 | +| Interval | 0x22 | +| Nullable(T) | 0x23 | +| Function | 0x24... | +| AggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN) | 0x25...... | +| LowCardinality(T) | 0x26 | +| Map(K, V) | 0x27 | +| IPv4 | 0x28 | +| IPv6 | 0x29 | +| Variant(T1, ..., TN) | 0x2A... | +| Dynamic(max_types=N) | 0x2B | +| Custom type (Ring, Polygon, etc) | 0x2C | +| Bool | 0x2D | +| SimpleAggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN) | 0x2E...... | +| Nested(name1 T1, ..., nameN TN) | 0x2F... | +|------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| + +Interval kind binary encoding: +|---------------|-----------------| +| Interval kind | Binary encoding | +|---------------|-----------------| +| Nanosecond | 0x00 | +| Microsecond | 0x01 | +| Millisecond | 0x02 | +| Second | 0x03 | +| Minute | 0x04 | +| Hour | 0x05 | +| Day | 0x06 | +| Week | 0x07 | +| Month | 0x08 | +| Quarter | 0x09 | +| Year | 0x1A | +|---------------|-----------------| + +Aggregate function parameter binary encoding (binary encoding of a Field, see src/Common/FieldBinaryEncoding.h): +|------------------------|------------------------------------------------------------------------------------------------------------------------------| +| Parameter type | Binary encoding | +|------------------------|------------------------------------------------------------------------------------------------------------------------------| +| Null | 0x00 | +| UInt64 | 0x01 | +| Int64 | 0x02 | +| UInt128 | 0x03 | +| Int128 | 0x04 | +| UInt128 | 0x05 | +| Int128 | 0x06 | +| Float64 | 0x07 | +| Decimal32 | 0x08 | +| Decimal64 | 0x09 | +| Decimal128 | 0x0A | +| Decimal256 | 0x0B | +| String | 0x0C | +| Array | 0x0D... | +| Tuple | 0x0E... | +| Map | 0x0F... | +| IPv4 | 0x10 | +| IPv6 | 0x11 | +| UUID | 0x12 | +| Bool | 0x13 | +| Object | 0x14... | +| AggregateFunctionState | 0x15 | +| Negative infinity | 0xFE | +| Positive infinity | 0xFF | +|------------------------|------------------------------------------------------------------------------------------------------------------------------| +*/ + +String encodeDataType(const DataTypePtr & type); +void encodeDataType(const DataTypePtr & type, WriteBuffer & buf); + +DataTypePtr decodeDataType(const String & data); +DataTypePtr decodeDataType(ReadBuffer & buf); + +} diff --git a/src/DataTypes/EnumValues.h b/src/DataTypes/EnumValues.h index 889878bc60f..9762ff5870f 100644 --- a/src/DataTypes/EnumValues.h +++ b/src/DataTypes/EnumValues.h @@ -7,7 +7,7 @@ namespace DB { -namespace ErrorCodesEnumValues +namespace ErrorCodes { extern const int BAD_ARGUMENTS; } diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp index 1c9715bbf53..1cb64b65d3a 100644 --- a/src/DataTypes/IDataType.cpp +++ b/src/DataTypes/IDataType.cpp @@ -173,7 +173,7 @@ bool IDataType::hasDynamicSubcolumns() const auto data = SubstreamData(getDefaultSerialization()).withType(getPtr()); auto callback = [&](const SubstreamPath &, const String &, const SubstreamData & subcolumn_data) { - has_dynamic_subcolumns |= subcolumn_data.type->hasDynamicSubcolumnsData(); + has_dynamic_subcolumns |= subcolumn_data.type && subcolumn_data.type->hasDynamicSubcolumnsData(); }; forEachSubcolumn(callback, data); return has_dynamic_subcolumns; diff --git a/src/DataTypes/Serializations/ISerialization.cpp b/src/DataTypes/Serializations/ISerialization.cpp index bbb1d1a6cd1..7642a6619b3 100644 --- a/src/DataTypes/Serializations/ISerialization.cpp +++ b/src/DataTypes/Serializations/ISerialization.cpp @@ -64,6 +64,9 @@ String ISerialization::Substream::toString() const if (type == VariantElement) return fmt::format("VariantElement({})", variant_element_name); + if (type == VariantElementNullMap) + return fmt::format("VariantElementNullMap({}.null)", variant_element_name); + return String(magic_enum::enum_name(type)); } @@ -195,6 +198,8 @@ String getNameForSubstreamPath( stream_name += ".variant_offsets"; else if (it->type == Substream::VariantElement) stream_name += "." + it->variant_element_name; + else if (it->type == Substream::VariantElementNullMap) + stream_name += "." + it->variant_element_name + ".null"; else if (it->type == SubstreamType::DynamicStructure) stream_name += ".dynamic_structure"; } @@ -395,7 +400,8 @@ bool ISerialization::hasSubcolumnForPath(const SubstreamPath & path, size_t pref return path[last_elem].type == Substream::NullMap || path[last_elem].type == Substream::TupleElement || path[last_elem].type == Substream::ArraySizes - || path[last_elem].type == Substream::VariantElement; + || path[last_elem].type == Substream::VariantElement + || path[last_elem].type == Substream::VariantElementNullMap; } ISerialization::SubstreamData ISerialization::createFromPath(const SubstreamPath & path, size_t prefix_len) diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index 914ff9cf4a2..255dbbfadd2 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -184,6 +184,7 @@ public: VariantOffsets, VariantElements, VariantElement, + VariantElementNullMap, DynamicData, DynamicStructure, @@ -256,6 +257,11 @@ public: bool position_independent_encoding = true; + /// True if data type names should be serialized in binary encoding. + bool data_types_binary_encoding = false; + + bool use_compact_variant_discriminators_serialization = false; + enum class DynamicStatisticsMode { NONE, /// Don't write statistics. @@ -275,6 +281,9 @@ public: bool position_independent_encoding = true; + /// True if data type names should be deserialized in binary encoding. + bool data_types_binary_encoding = false; + bool native_format = false; /// If not zero, may be used to avoid reallocations while reading column of String type. @@ -434,6 +443,9 @@ protected: template State * checkAndGetState(const StatePtr & state) const; + template + static State * checkAndGetState(const StatePtr & state, const ISerialization * serialization); + [[noreturn]] void throwUnexpectedDataAfterParsedValue(IColumn & column, ReadBuffer & istr, const FormatSettings &, const String & type_name) const; }; @@ -444,10 +456,16 @@ using SubstreamType = ISerialization::Substream::Type; template State * ISerialization::checkAndGetState(const StatePtr & state) const +{ + return checkAndGetState(state, this); +} + +template +State * ISerialization::checkAndGetState(const StatePtr & state, const ISerialization * serialization) { if (!state) throw Exception(ErrorCodes::LOGICAL_ERROR, - "Got empty state for {}", demangle(typeid(*this).name())); + "Got empty state for {}", demangle(typeid(*serialization).name())); auto * state_concrete = typeid_cast(state.get()); if (!state_concrete) @@ -455,7 +473,7 @@ State * ISerialization::checkAndGetState(const StatePtr & state) const auto & state_ref = *state; throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid State for {}. Expected: {}, got {}", - demangle(typeid(*this).name()), + demangle(typeid(*serialization).name()), demangle(typeid(State).name()), demangle(typeid(state_ref).name())); } diff --git a/src/DataTypes/Serializations/SerializationArray.cpp b/src/DataTypes/Serializations/SerializationArray.cpp index ac7b8f4d084..b7d43332085 100644 --- a/src/DataTypes/Serializations/SerializationArray.cpp +++ b/src/DataTypes/Serializations/SerializationArray.cpp @@ -42,13 +42,13 @@ void SerializationArray::deserializeBinary(Field & field, ReadBuffer & istr, con { size_t size; readVarUInt(size, istr); - if (settings.max_binary_array_size && size > settings.max_binary_array_size) + if (settings.binary.max_binary_string_size && size > settings.binary.max_binary_string_size) throw Exception( ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size: {}. The maximum is: {}. To increase the maximum, use setting " "format_binary_max_array_size", size, - settings.max_binary_array_size); + settings.binary.max_binary_string_size); field = Array(); Array & arr = field.get(); @@ -82,13 +82,13 @@ void SerializationArray::deserializeBinary(IColumn & column, ReadBuffer & istr, size_t size; readVarUInt(size, istr); - if (settings.max_binary_array_size && size > settings.max_binary_array_size) + if (settings.binary.max_binary_string_size && size > settings.binary.max_binary_string_size) throw Exception( ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size: {}. The maximum is: {}. To increase the maximum, use setting " "format_binary_max_array_size", size, - settings.max_binary_array_size); + settings.binary.max_binary_string_size); IColumn & nested_column = column_array.getData(); diff --git a/src/DataTypes/Serializations/SerializationDynamic.cpp b/src/DataTypes/Serializations/SerializationDynamic.cpp index 6351ff0ca0b..7609ffc91ca 100644 --- a/src/DataTypes/Serializations/SerializationDynamic.cpp +++ b/src/DataTypes/Serializations/SerializationDynamic.cpp @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include #include @@ -109,7 +111,10 @@ void SerializationDynamic::serializeBinaryBulkStatePrefix( const auto & variant_column = column_dynamic.getVariantColumn(); /// Write internal Variant type name. - writeStringBinary(dynamic_state->variant_type->getName(), *stream); + if (settings.data_types_binary_encoding) + encodeDataType(dynamic_state->variant_type, *stream); + else + writeStringBinary(dynamic_state->variant_type->getName(), *stream); /// Write statistics in prefix if needed. if (settings.dynamic_write_statistics == SerializeBinaryBulkSettings::DynamicStatisticsMode::PREFIX) @@ -178,9 +183,16 @@ ISerialization::DeserializeBinaryBulkStatePtr SerializationDynamic::deserializeD readBinaryLittleEndian(structure_version, *structure_stream); auto structure_state = std::make_shared(structure_version); /// Read internal Variant type name. - String data_type_name; - readStringBinary(data_type_name, *structure_stream); - structure_state->variant_type = DataTypeFactory::instance().get(data_type_name); + if (settings.data_types_binary_encoding) + { + structure_state->variant_type = decodeDataType(*structure_stream); + } + else + { + String data_type_name; + readStringBinary(data_type_name, *structure_stream); + structure_state->variant_type = DataTypeFactory::instance().get(data_type_name); + } const auto * variant_type = typeid_cast(structure_state->variant_type.get()); if (!variant_type) throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect type of Dynamic nested column, expected Variant, got {}", structure_state->variant_type->getName()); @@ -280,33 +292,27 @@ void SerializationDynamic::deserializeBinaryBulkWithMultipleStreams( void SerializationDynamic::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const { - UInt8 null_bit = field.isNull(); - writeBinary(null_bit, ostr); - if (null_bit) + /// Serialize NULL as Nothing type with no value. + if (field.isNull()) + { + encodeDataType(std::make_shared(), ostr); return; + } auto field_type = applyVisitor(FieldToDataType(), field); - auto field_type_name = field_type->getName(); - writeVarUInt(field_type_name.size(), ostr); - writeString(field_type_name, ostr); + encodeDataType(field_type, ostr); field_type->getDefaultSerialization()->serializeBinary(field, ostr, settings); } void SerializationDynamic::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const { - UInt8 null_bit; - readBinary(null_bit, istr); - if (null_bit) + auto field_type = decodeDataType(istr); + if (isNothing(field_type)) { field = Null(); return; } - size_t field_type_name_size; - readVarUInt(field_type_name_size, istr); - String field_type_name(field_type_name_size, 0); - istr.readStrict(field_type_name.data(), field_type_name_size); - auto field_type = DataTypeFactory::instance().get(field_type_name); field_type->getDefaultSerialization()->deserializeBinary(field, istr, settings); } @@ -317,15 +323,15 @@ void SerializationDynamic::serializeBinary(const IColumn & column, size_t row_nu const auto & variant_column = dynamic_column.getVariantColumn(); auto global_discr = variant_column.globalDiscriminatorAt(row_num); - UInt8 null_bit = global_discr == ColumnVariant::NULL_DISCRIMINATOR; - writeBinary(null_bit, ostr); - if (null_bit) + /// Serialize NULL as Nothing type with no value. + if (global_discr == ColumnVariant::NULL_DISCRIMINATOR) + { + encodeDataType(std::make_shared(), ostr); return; + } const auto & variant_type = assert_cast(*variant_info.variant_type).getVariant(global_discr); - const auto & variant_type_name = variant_info.variant_names[global_discr]; - writeVarUInt(variant_type_name.size(), ostr); - writeString(variant_type_name, ostr); + encodeDataType(variant_type, ostr); variant_type->getDefaultSerialization()->serializeBinary(variant_column.getVariantByGlobalDiscriminator(global_discr), variant_column.offsetAt(row_num), ostr, settings); } @@ -346,30 +352,23 @@ static void deserializeVariant( void SerializationDynamic::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { auto & dynamic_column = assert_cast(column); - UInt8 null_bit; - readBinary(null_bit, istr); - if (null_bit) + auto variant_type = decodeDataType(istr); + if (isNothing(variant_type)) { dynamic_column.insertDefault(); return; } - size_t variant_type_name_size; - readVarUInt(variant_type_name_size, istr); - String variant_type_name(variant_type_name_size, 0); - istr.readStrict(variant_type_name.data(), variant_type_name_size); - + auto variant_type_name = variant_type->getName(); const auto & variant_info = dynamic_column.getVariantInfo(); auto it = variant_info.variant_name_to_discriminator.find(variant_type_name); if (it != variant_info.variant_name_to_discriminator.end()) { - const auto & variant_type = assert_cast(*variant_info.variant_type).getVariant(it->second); deserializeVariant(dynamic_column.getVariantColumn(), variant_type, it->second, istr, [&settings](const ISerialization & serialization, IColumn & variant, ReadBuffer & buf){ serialization.deserializeBinary(variant, buf, settings); }); return; } /// We don't have this variant yet. Let's try to add it. - auto variant_type = DataTypeFactory::instance().get(variant_type_name); if (dynamic_column.addNewVariant(variant_type)) { auto discr = variant_info.variant_name_to_discriminator.at(variant_type_name); diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.cpp b/src/DataTypes/Serializations/SerializationDynamicElement.cpp index dafd6d663b0..211f0ac9377 100644 --- a/src/DataTypes/Serializations/SerializationDynamicElement.cpp +++ b/src/DataTypes/Serializations/SerializationDynamicElement.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -77,7 +78,10 @@ void SerializationDynamicElement::deserializeBinaryBulkStatePrefix( if (auto global_discr = assert_cast(*variant_type).tryGetVariantDiscriminator(dynamic_element_name)) { settings.path.push_back(Substream::DynamicData); - dynamic_element_state->variant_serialization = std::make_shared(nested_serialization, dynamic_element_name, *global_discr); + if (is_null_map_subcolumn) + dynamic_element_state->variant_serialization = std::make_shared(dynamic_element_name, *global_discr); + else + dynamic_element_state->variant_serialization = std::make_shared(nested_serialization, dynamic_element_name, *global_discr); dynamic_element_state->variant_serialization->deserializeBinaryBulkStatePrefix(settings, dynamic_element_state->variant_element_state, cache); settings.path.pop_back(); } @@ -98,7 +102,16 @@ void SerializationDynamicElement::deserializeBinaryBulkWithMultipleStreams( SubstreamsCache * cache) const { if (!state) + { + if (is_null_map_subcolumn) + { + auto mutable_column = result_column->assumeMutable(); + auto & data = assert_cast(*mutable_column).getData(); + data.resize_fill(data.size() + limit, 1); + } + return; + } auto * dynamic_element_state = checkAndGetState(state); @@ -108,6 +121,12 @@ void SerializationDynamicElement::deserializeBinaryBulkWithMultipleStreams( dynamic_element_state->variant_serialization->deserializeBinaryBulkWithMultipleStreams(result_column, limit, settings, dynamic_element_state->variant_element_state, cache); settings.path.pop_back(); } + else if (is_null_map_subcolumn) + { + auto mutable_column = result_column->assumeMutable(); + auto & data = assert_cast(*mutable_column).getData(); + data.resize_fill(data.size() + limit, 1); + } else { auto mutable_column = result_column->assumeMutable(); diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.h b/src/DataTypes/Serializations/SerializationDynamicElement.h index 2ddc3324139..127d14a55e0 100644 --- a/src/DataTypes/Serializations/SerializationDynamicElement.h +++ b/src/DataTypes/Serializations/SerializationDynamicElement.h @@ -13,11 +13,11 @@ private: /// To be able to deserialize Dynamic element as a subcolumn /// we need its type name and global discriminator. String dynamic_element_name; + bool is_null_map_subcolumn; public: - SerializationDynamicElement(const SerializationPtr & nested_, const String & dynamic_element_name_) - : SerializationWrapper(nested_) - , dynamic_element_name(dynamic_element_name_) + SerializationDynamicElement(const SerializationPtr & nested_, const String & dynamic_element_name_, bool is_null_map_subcolumn_ = false) + : SerializationWrapper(nested_), dynamic_element_name(dynamic_element_name_), is_null_map_subcolumn(is_null_map_subcolumn_) { } diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index 70fe5182ade..0bef3c7d79d 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -55,13 +55,13 @@ void SerializationMap::deserializeBinary(Field & field, ReadBuffer & istr, const { size_t size; readVarUInt(size, istr); - if (settings.max_binary_array_size && size > settings.max_binary_array_size) + if (settings.binary.max_binary_string_size && size > settings.binary.max_binary_string_size) throw Exception( ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large map size: {}. The maximum is: {}. To increase the maximum, use setting " "format_binary_max_array_size", size, - settings.max_binary_array_size); + settings.binary.max_binary_string_size); field = Map(); Map & map = field.get(); map.reserve(size); diff --git a/src/DataTypes/Serializations/SerializationString.cpp b/src/DataTypes/Serializations/SerializationString.cpp index 9e39ab23709..9e523d0d745 100644 --- a/src/DataTypes/Serializations/SerializationString.cpp +++ b/src/DataTypes/Serializations/SerializationString.cpp @@ -33,13 +33,13 @@ namespace ErrorCodes void SerializationString::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const { const String & s = field.get(); - if (settings.max_binary_string_size && s.size() > settings.max_binary_string_size) + if (settings.binary.max_binary_string_size && s.size() > settings.binary.max_binary_string_size) throw Exception( ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string size: {}. The maximum is: {}. To increase the maximum, use setting " "format_binary_max_string_size", s.size(), - settings.max_binary_string_size); + settings.binary.max_binary_string_size); writeVarUInt(s.size(), ostr); writeString(s, ostr); @@ -50,13 +50,13 @@ void SerializationString::deserializeBinary(Field & field, ReadBuffer & istr, co { UInt64 size; readVarUInt(size, istr); - if (settings.max_binary_string_size && size > settings.max_binary_string_size) + if (settings.binary.max_binary_string_size && size > settings.binary.max_binary_string_size) throw Exception( ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string size: {}. The maximum is: {}. To increase the maximum, use setting " "format_binary_max_string_size", size, - settings.max_binary_string_size); + settings.binary.max_binary_string_size); field = String(); String & s = field.get(); @@ -68,13 +68,13 @@ void SerializationString::deserializeBinary(Field & field, ReadBuffer & istr, co void SerializationString::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { const StringRef & s = assert_cast(column).getDataAt(row_num); - if (settings.max_binary_string_size && s.size > settings.max_binary_string_size) + if (settings.binary.max_binary_string_size && s.size > settings.binary.max_binary_string_size) throw Exception( ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string size: {}. The maximum is: {}. To increase the maximum, use setting " "format_binary_max_string_size", s.size, - settings.max_binary_string_size); + settings.binary.max_binary_string_size); writeVarUInt(s.size, ostr); writeString(s, ostr); @@ -89,13 +89,13 @@ void SerializationString::deserializeBinary(IColumn & column, ReadBuffer & istr, UInt64 size; readVarUInt(size, istr); - if (settings.max_binary_string_size && size > settings.max_binary_string_size) + if (settings.binary.max_binary_string_size && size > settings.binary.max_binary_string_size) throw Exception( ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string size: {}. The maximum is: {}. To increase the maximum, use setting " "format_binary_max_string_size", size, - settings.max_binary_string_size); + settings.binary.max_binary_string_size); size_t old_chars_size = data.size(); size_t offset = old_chars_size + size + 1; diff --git a/src/DataTypes/Serializations/SerializationVariant.cpp b/src/DataTypes/Serializations/SerializationVariant.cpp index b386fd8ab45..e4d71e84cc7 100644 --- a/src/DataTypes/Serializations/SerializationVariant.cpp +++ b/src/DataTypes/Serializations/SerializationVariant.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -30,12 +31,18 @@ namespace ErrorCodes struct SerializeBinaryBulkStateVariant : public ISerialization::SerializeBinaryBulkState { - std::vector states; + explicit SerializeBinaryBulkStateVariant(UInt64 mode) : discriminators_mode(mode) + { + } + + SerializationVariant::DiscriminatorsSerializationMode discriminators_mode; + std::vector variant_states; }; struct DeserializeBinaryBulkStateVariant : public ISerialization::DeserializeBinaryBulkState { - std::vector states; + ISerialization::DeserializeBinaryBulkStatePtr discriminators_state; + std::vector variant_states; }; void SerializationVariant::enumerateStreams( @@ -65,13 +72,19 @@ void SerializationVariant::enumerateStreams( for (size_t i = 0; i < variants.size(); ++i) { - settings.path.back().creator = std::make_shared(local_discriminators, variant_names[i], i, column_variant ? column_variant->localDiscriminatorByGlobal(i) : i); + DataTypePtr type = type_variant ? type_variant->getVariant(i) : nullptr; + settings.path.back().creator = std::make_shared( + local_discriminators, + variant_names[i], + i, + column_variant ? column_variant->localDiscriminatorByGlobal(i) : i, + !type || type->canBeInsideNullable() || type->lowCardinality()); auto variant_data = SubstreamData(variants[i]) - .withType(type_variant ? type_variant->getVariant(i) : nullptr) + .withType(type) .withColumn(column_variant ? column_variant->getVariantPtrByGlobalDiscriminator(i) : nullptr) .withSerializationInfo(data.serialization_info) - .withDeserializeState(variant_deserialize_state ? variant_deserialize_state->states[i] : nullptr); + .withDeserializeState(variant_deserialize_state ? variant_deserialize_state->variant_states[i] : nullptr); addVariantElementToPath(settings.path, i); settings.path.back().data = variant_data; @@ -79,6 +92,24 @@ void SerializationVariant::enumerateStreams( settings.path.pop_back(); } + /// Variant subcolumns like variant.Type have type Nullable(Type), so we want to support reading null map subcolumn from it: variant.Type.null. + /// Nullable column is created during deserialization of a variant subcolumn according to the discriminators, so we don't have actual Nullable + /// serialization with null map subcolumn. To be able to read null map subcolumn from the variant subcolumn we use special serialization + /// SerializationVariantElementNullMap. + auto null_map_data = SubstreamData(std::make_shared>()) + .withType(type_variant ? std::make_shared() : nullptr) + .withColumn(column_variant ? ColumnUInt8::create() : nullptr); + + for (size_t i = 0; i < variants.size(); ++i) + { + settings.path.back().creator = std::make_shared(local_discriminators, variant_names[i], i, column_variant ? column_variant->localDiscriminatorByGlobal(i) : i); + settings.path.push_back(Substream::VariantElementNullMap); + settings.path.back().variant_element_name = variant_names[i]; + settings.path.back().data = null_map_data; + callback(settings.path); + settings.path.pop_back(); + } + settings.path.pop_back(); } @@ -87,17 +118,26 @@ void SerializationVariant::serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const { - const ColumnVariant & col = assert_cast(column); + settings.path.push_back(Substream::VariantDiscriminators); + auto * discriminators_stream = settings.getter(settings.path); + settings.path.pop_back(); - auto variant_state = std::make_shared(); - variant_state->states.resize(variants.size()); + if (!discriminators_stream) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty stream for VariantDiscriminators in SerializationVariant::serializeBinaryBulkStatePrefix"); + + UInt64 mode = settings.use_compact_variant_discriminators_serialization ? DiscriminatorsSerializationMode::COMPACT : DiscriminatorsSerializationMode::BASIC; + writeBinaryLittleEndian(mode, *discriminators_stream); + + const ColumnVariant & col = assert_cast(column); + auto variant_state = std::make_shared(mode); + variant_state->variant_states.resize(variants.size()); settings.path.push_back(Substream::VariantElements); for (size_t i = 0; i < variants.size(); ++i) { addVariantElementToPath(settings.path, i); - variants[i]->serializeBinaryBulkStatePrefix(col.getVariantByGlobalDiscriminator(i), settings, variant_state->states[i]); + variants[i]->serializeBinaryBulkStatePrefix(col.getVariantByGlobalDiscriminator(i), settings, variant_state->variant_states[i]); settings.path.pop_back(); } @@ -116,7 +156,7 @@ void SerializationVariant::serializeBinaryBulkStateSuffix( for (size_t i = 0; i < variants.size(); ++i) { addVariantElementToPath(settings.path, i); - variants[i]->serializeBinaryBulkStateSuffix(settings, variant_state->states[i]); + variants[i]->serializeBinaryBulkStateSuffix(settings, variant_state->variant_states[i]); settings.path.pop_back(); } settings.path.pop_back(); @@ -128,14 +168,19 @@ void SerializationVariant::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const { + DeserializeBinaryBulkStatePtr discriminators_state = deserializeDiscriminatorsStatePrefix(settings, cache); + if (!discriminators_state) + return; + auto variant_state = std::make_shared(); - variant_state->states.resize(variants.size()); + variant_state->discriminators_state = discriminators_state; + variant_state->variant_states.resize(variants.size()); settings.path.push_back(Substream::VariantElements); for (size_t i = 0; i < variants.size(); ++i) { addVariantElementToPath(settings.path, i); - variants[i]->deserializeBinaryBulkStatePrefix(settings, variant_state->states[i], cache); + variants[i]->deserializeBinaryBulkStatePrefix(settings, variant_state->variant_states[i], cache); settings.path.pop_back(); } @@ -143,6 +188,29 @@ void SerializationVariant::deserializeBinaryBulkStatePrefix( state = std::move(variant_state); } +ISerialization::DeserializeBinaryBulkStatePtr SerializationVariant::deserializeDiscriminatorsStatePrefix( + DeserializeBinaryBulkSettings & settings, + SubstreamsDeserializeStatesCache * cache) +{ + settings.path.push_back(Substream::VariantDiscriminators); + + DeserializeBinaryBulkStatePtr discriminators_state = nullptr; + if (auto cached_state = getFromSubstreamsDeserializeStatesCache(cache, settings.path)) + { + discriminators_state = cached_state; + } + else if (auto * discriminators_stream = settings.getter(settings.path)) + { + UInt64 mode; + readBinaryLittleEndian(mode, *discriminators_stream); + discriminators_state = std::make_shared(mode); + addToSubstreamsDeserializeStatesCache(cache, settings.path, discriminators_state); + } + + settings.path.pop_back(); + return discriminators_state; +} + void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVariantStatistics( const IColumn & column, @@ -165,13 +233,71 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian auto * variant_state = checkAndGetState(state); - /// If offset = 0 and limit == col.size() or we have only NULLs, we don't need to calculate + /// Don't write anything if column is empty. + if (limit == 0) + return; + + /// Write number of rows in this granule in compact mode. + if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT) + writeVarUInt(UInt64(limit), *discriminators_stream); + + /// If column has only one none empty discriminators and no NULLs we don't need to + /// calculate limits for variants and use provided offset/limit. + if (auto non_empty_local_discr = col.getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls()) + { + auto non_empty_global_discr = col.globalDiscriminatorByLocal(*non_empty_local_discr); + + /// In compact mode write the format of the granule and single non-empty discriminator. + if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT) + { + writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream); + writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream); + } + /// For basic mode just serialize this discriminator limit times. + else + { + for (size_t i = 0; i < limit; ++i) + writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream); + } + + settings.path.push_back(Substream::VariantElements); + addVariantElementToPath(settings.path, non_empty_global_discr); + /// We can use the same offset/limit as for whole Variant column + variants[non_empty_global_discr]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(non_empty_global_discr), offset, limit, settings, variant_state->variant_states[non_empty_global_discr]); + variants_statistics[variant_names[non_empty_global_discr]] += limit; + settings.path.pop_back(); + settings.path.pop_back(); + return; + } + /// If column has only NULLs, just serialize NULL discriminators. + else if (col.hasOnlyNulls()) + { + /// In compact mode write single NULL_DISCRIMINATOR. + if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT) + { + writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream); + writeBinaryLittleEndian(ColumnVariant::NULL_DISCRIMINATOR, *discriminators_stream); + } + /// In basic mode write NULL_DISCRIMINATOR limit times. + else + { + for (size_t i = 0; i < limit; ++i) + writeBinaryLittleEndian(ColumnVariant::NULL_DISCRIMINATOR, *discriminators_stream); + } + return; + } + + /// If offset = 0 and limit == col.size() we don't need to calculate /// offsets and limits for variants and need to just serialize whole columns. - if ((offset == 0 && limit == col.size()) || col.hasOnlyNulls()) + if ((offset == 0 && limit == col.size())) { /// First, serialize discriminators. - /// If we have only NULLs or local and global discriminators are the same, just serialize the column as is. - if (col.hasOnlyNulls() || col.hasGlobalVariantsOrder()) + /// Here we are sure that column contains different discriminators, use plain granule format in compact mode. + if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT) + writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::PLAIN), *discriminators_stream); + + /// If local and global discriminators are the same, just serialize the column as is. + if (col.hasGlobalVariantsOrder()) { SerializationNumber().serializeBinaryBulk(col.getLocalDiscriminatorsColumn(), *discriminators_stream, offset, limit); } @@ -188,7 +314,7 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian for (size_t i = 0; i != variants.size(); ++i) { addVariantElementToPath(settings.path, i); - variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), 0, 0, settings, variant_state->states[i]); + variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), 0, 0, settings, variant_state->variant_states[i]); variants_statistics[variant_names[i]] += col.getVariantByGlobalDiscriminator(i).size(); settings.path.pop_back(); } @@ -196,36 +322,16 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian return; } - /// If we have only one non empty variant and no NULLs, we can use the same limit offset for this variant. - if (auto non_empty_local_discr = col.getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls()) - { - /// First, serialize discriminators. - /// We know that all discriminators are the same, so we just need to serialize this discriminator limit times. - auto non_empty_global_discr = col.globalDiscriminatorByLocal(*non_empty_local_discr); - for (size_t i = 0; i != limit; ++i) - writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream); - - /// Second, serialize non-empty variant (other variants are empty and we can skip their serialization). - settings.path.push_back(Substream::VariantElements); - addVariantElementToPath(settings.path, non_empty_global_discr); - /// We can use the same offset/limit as for whole Variant column - variants[non_empty_global_discr]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(non_empty_global_discr), offset, limit, settings, variant_state->states[non_empty_global_discr]); - variants_statistics[variant_names[non_empty_global_discr]] += limit; - settings.path.pop_back(); - settings.path.pop_back(); - return; - } - /// In general case we should iterate through local discriminators in range [offset, offset + limit] to serialize global discriminators and calculate offset/limit pair for each variant. const auto & local_discriminators = col.getLocalDiscriminators(); const auto & offsets = col.getOffsets(); std::vector> variant_offsets_and_limits(variants.size(), {0, 0}); size_t end = offset + limit; + size_t num_non_empty_variants_in_range = 0; + ColumnVariant::Discriminator last_non_empty_variant_discr = 0; for (size_t i = offset; i < end; ++i) { auto global_discr = col.globalDiscriminatorByLocal(local_discriminators[i]); - writeBinaryLittleEndian(global_discr, *discriminators_stream); - if (global_discr != ColumnVariant::NULL_DISCRIMINATOR) { /// If we see this discriminator for the first time, update offset @@ -233,9 +339,38 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian variant_offsets_and_limits[global_discr].first = offsets[i]; /// Update limit for this discriminator. ++variant_offsets_and_limits[global_discr].second; + ++num_non_empty_variants_in_range; + last_non_empty_variant_discr = global_discr; } } + /// In basic mode just serialize discriminators as is row by row. + if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::BASIC) + { + for (size_t i = offset; i < end; ++i) + writeBinaryLittleEndian(col.globalDiscriminatorByLocal(local_discriminators[i]), *discriminators_stream); + } + /// In compact mode check if we have the same discriminator for all rows in this granule. + /// First, check if all values in granule are NULLs. + else if (num_non_empty_variants_in_range == 0) + { + writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream); + writeBinaryLittleEndian(ColumnVariant::NULL_DISCRIMINATOR, *discriminators_stream); + } + /// Then, check if there is only 1 variant and no NULLs in this granule. + else if (num_non_empty_variants_in_range == 1 && variant_offsets_and_limits[last_non_empty_variant_discr].second == limit) + { + writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream); + writeBinaryLittleEndian(last_non_empty_variant_discr, *discriminators_stream); + } + /// Otherwise there are different discriminators in this granule. + else + { + writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::PLAIN), *discriminators_stream); + for (size_t i = offset; i < end; ++i) + writeBinaryLittleEndian(col.globalDiscriminatorByLocal(local_discriminators[i]), *discriminators_stream); + } + /// Serialize variants in global order. settings.path.push_back(Substream::VariantElements); for (size_t i = 0; i != variants.size(); ++i) @@ -249,7 +384,7 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian variant_offsets_and_limits[i].first, variant_offsets_and_limits[i].second, settings, - variant_state->states[i]); + variant_state->variant_states[i]); variants_statistics[variant_names[i]] += variant_offsets_and_limits[i].second; settings.path.pop_back(); } @@ -284,39 +419,68 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams( /// First, deserialize discriminators. settings.path.push_back(Substream::VariantDiscriminators); + + DeserializeBinaryBulkStateVariant * variant_state = nullptr; + std::vector variant_limits; if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path)) { + variant_state = checkAndGetState(state); col.getLocalDiscriminatorsPtr() = cached_discriminators; } - else + else if (auto * discriminators_stream = settings.getter(settings.path)) { - auto * discriminators_stream = settings.getter(settings.path); - if (!discriminators_stream) - return; + variant_state = checkAndGetState(state); + auto * discriminators_state = checkAndGetState(variant_state->discriminators_state); + + /// Deserialize discriminators according to serialization mode. + if (discriminators_state->mode.value == DiscriminatorsSerializationMode::BASIC) + SerializationNumber().deserializeBinaryBulk(*col.getLocalDiscriminatorsPtr()->assumeMutable(), *discriminators_stream, limit, 0); + else + variant_limits = deserializeCompactDiscriminators(col.getLocalDiscriminatorsPtr(), limit, discriminators_stream, settings.continuous_reading, *discriminators_state); - SerializationNumber().deserializeBinaryBulk(*col.getLocalDiscriminatorsPtr()->assumeMutable(), *discriminators_stream, limit, 0); addToSubstreamsCache(cache, settings.path, col.getLocalDiscriminatorsPtr()); } + /// It may happen that there is no such stream, in this case just do nothing. + else + { + settings.path.pop_back(); + return; + } + settings.path.pop_back(); - /// Second, calculate limits for each variant by iterating through new discriminators. - std::vector variant_limits(variants.size(), 0); - auto & discriminators_data = col.getLocalDiscriminators(); - size_t discriminators_offset = discriminators_data.size() - limit; - for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) + /// Second, calculate limits for each variant by iterating through new discriminators + /// if we didn't do it during discriminators deserialization. + if (variant_limits.empty()) { - ColumnVariant::Discriminator discr = discriminators_data[i]; - if (discr != ColumnVariant::NULL_DISCRIMINATOR) - ++variant_limits[discr]; + variant_limits.resize(variants.size(), 0); + auto & discriminators_data = col.getLocalDiscriminators(); + + /// We can actually read less than limit discriminators and we cannot determine the actual number of read rows + /// by discriminators column as it could be taken from the substreams cache. And we need actual number of read + /// rows to fill offsets correctly later if they are not in the cache. We can determine if offsets column is in cache + /// or not by comparing it with discriminators column size (they should be the same when offsets are in cache). + /// If offsets are not in the cache, we can use it's size to determine the actual number of read rows. + size_t num_new_discriminators = limit; + size_t offsets_size = col.getOffsetsPtr()->size(); + if (discriminators_data.size() > offsets_size) + num_new_discriminators = discriminators_data.size() - offsets_size; + size_t discriminators_offset = discriminators_data.size() - num_new_discriminators; + + for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) + { + ColumnVariant::Discriminator discr = discriminators_data[i]; + if (discr != ColumnVariant::NULL_DISCRIMINATOR) + ++variant_limits[discr]; + } } /// Now we can deserialize variants according to their limits. - auto * variant_state = checkAndGetState(state); settings.path.push_back(Substream::VariantElements); for (size_t i = 0; i != variants.size(); ++i) { addVariantElementToPath(settings.path, i); - variants[i]->deserializeBinaryBulkWithMultipleStreams(col.getVariantPtrByLocalDiscriminator(i), variant_limits[i], settings, variant_state->states[i], cache); + variants[i]->deserializeBinaryBulkWithMultipleStreams(col.getVariantPtrByLocalDiscriminator(i), variant_limits[i], settings, variant_state->variant_states[i], cache); settings.path.pop_back(); } settings.path.pop_back(); @@ -336,20 +500,49 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams( } else { - auto & offsets = col.getOffsets(); - offsets.reserve(offsets.size() + limit); std::vector variant_offsets; variant_offsets.reserve(variants.size()); + size_t num_non_empty_variants = 0; + ColumnVariant::Discriminator last_non_empty_discr = 0; for (size_t i = 0; i != variants.size(); ++i) - variant_offsets.push_back(col.getVariantByLocalDiscriminator(i).size() - variant_limits[i]); - - for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) { - ColumnVariant::Discriminator discr = discriminators_data[i]; - if (discr == ColumnVariant::NULL_DISCRIMINATOR) - offsets.emplace_back(); - else - offsets.push_back(variant_offsets[discr]++); + if (variant_limits[i]) + { + ++num_non_empty_variants; + last_non_empty_discr = i; + } + + variant_offsets.push_back(col.getVariantByLocalDiscriminator(i).size() - variant_limits[i]); + } + + auto & discriminators_data = col.getLocalDiscriminators(); + auto & offsets = col.getOffsets(); + size_t num_new_offsets = discriminators_data.size() - offsets.size(); + offsets.reserve(offsets.size() + num_new_offsets); + /// If there are only NULLs were read, fill offsets with 0. + if (num_non_empty_variants == 0) + { + offsets.resize_fill(discriminators_data.size(), 0); + } + /// If there is only 1 variant and no NULLs was read, fill offsets with sequential offsets of this variant. + else if (num_non_empty_variants == 1 && variant_limits[last_non_empty_discr] == num_new_offsets) + { + size_t first_offset = col.getVariantByLocalDiscriminator(last_non_empty_discr).size() - num_new_offsets; + for (size_t i = 0; i != num_new_offsets; ++i) + offsets.push_back(first_offset + i); + } + /// Otherwise iterate through discriminators and fill offsets accordingly. + else + { + size_t start = offsets.size(); + for (size_t i = start; i != discriminators_data.size(); ++i) + { + ColumnVariant::Discriminator discr = discriminators_data[i]; + if (discr == ColumnVariant::NULL_DISCRIMINATOR) + offsets.emplace_back(); + else + offsets.push_back(variant_offsets[discr]++); + } } addToSubstreamsCache(cache, settings.path, col.getOffsetsPtr()); @@ -357,6 +550,72 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams( settings.path.pop_back(); } +std::vector SerializationVariant::deserializeCompactDiscriminators( + DB::ColumnPtr & discriminators_column, + size_t limit, + ReadBuffer * stream, + bool continuous_reading, + DeserializeBinaryBulkStateVariantDiscriminators & state) const +{ + auto & discriminators = assert_cast(*discriminators_column->assumeMutable()); + auto & discriminators_data = discriminators.getData(); + + /// Reset state if we are reading from the start of the granule and not from the previous position in the file. + if (!continuous_reading) + state.remaining_rows_in_granule = 0; + + /// Calculate limits for variants during discriminators deserialization. + std::vector variant_limits(variants.size(), 0); + while (limit) + { + /// If we read all rows from current granule, start reading the next one. + if (state.remaining_rows_in_granule == 0) + { + if (stream->eof()) + return variant_limits; + + readDiscriminatorsGranuleStart(state, stream); + } + + size_t limit_in_granule = std::min(limit, state.remaining_rows_in_granule); + if (state.granule_format == CompactDiscriminatorsGranuleFormat::COMPACT) + { + auto & data = discriminators.getData(); + data.resize_fill(data.size() + limit_in_granule, state.compact_discr); + if (state.compact_discr != ColumnVariant::NULL_DISCRIMINATOR) + variant_limits[state.compact_discr] += limit_in_granule; + } + else + { + SerializationNumber().deserializeBinaryBulk(discriminators, *stream, limit_in_granule, 0); + size_t start = discriminators_data.size() - limit_in_granule; + for (size_t i = start; i != discriminators_data.size(); ++i) + { + ColumnVariant::Discriminator discr = discriminators_data[i]; + if (discr != ColumnVariant::NULL_DISCRIMINATOR) + ++variant_limits[discr]; + } + } + + state.remaining_rows_in_granule -= limit_in_granule; + limit -= limit_in_granule; + } + + return variant_limits; +} + +void SerializationVariant::readDiscriminatorsGranuleStart(DeserializeBinaryBulkStateVariantDiscriminators & state, DB::ReadBuffer * stream) +{ + UInt64 granule_size; + readVarUInt(granule_size, *stream); + state.remaining_rows_in_granule = granule_size; + UInt8 granule_format; + readBinaryLittleEndian(granule_format, *stream); + state.granule_format = static_cast(granule_format); + if (granule_format == CompactDiscriminatorsGranuleFormat::COMPACT) + readBinaryLittleEndian(state.compact_discr, *stream); +} + void SerializationVariant::addVariantElementToPath(DB::ISerialization::SubstreamPath & path, size_t i) const { path.push_back(Substream::VariantElement); diff --git a/src/DataTypes/Serializations/SerializationVariant.h b/src/DataTypes/Serializations/SerializationVariant.h index b6aa1534538..af89632cf81 100644 --- a/src/DataTypes/Serializations/SerializationVariant.h +++ b/src/DataTypes/Serializations/SerializationVariant.h @@ -2,10 +2,18 @@ #include #include +#include namespace DB { + +namespace ErrorCodes +{ + extern const int INCORRECT_DATA; +} + + /// Class for serializing/deserializing column with Variant type. /// It supports both text and binary bulk serializations/deserializations. /// @@ -18,6 +26,17 @@ namespace DB /// /// During binary bulk serialization it transforms local discriminators /// to global and serializes them into a separate stream VariantDiscriminators. +/// There are 2 modes of serialising discriminators: +/// Basic mode, when all discriminators are serialized as is row by row. +/// Compact mode, when we avoid writing the same discriminators in granules when there is +/// only one variant (or only NULLs) in the granule. +/// In compact mode we serialize granules in the following format: +/// +/// There are 2 different formats of granule - plain and compact. +/// Plain format is used when there are different discriminators in this granule, +/// in this format all discriminators are serialized as is row by row. +/// Compact format is used when all discriminators are the same in this granule, +/// in this case only this single discriminator is serialized. /// Each variant is serialized into a separate stream with path VariantElements/VariantElement /// (VariantElements stream is needed for correct sub-columns creation). We store and serialize /// variants in a sparse form (the size of a variant column equals to the number of its discriminator @@ -32,6 +51,25 @@ namespace DB class SerializationVariant : public ISerialization { public: + struct DiscriminatorsSerializationMode + { + enum Value + { + BASIC = 0, /// Store the whole discriminators column. + COMPACT = 1, /// Don't write discriminators in granule if all of them are the same. + }; + + static void checkMode(UInt64 mode) + { + if (mode > Value::COMPACT) + throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid version for SerializationVariant discriminators column."); + } + + explicit DiscriminatorsSerializationMode(UInt64 mode) : value(static_cast(mode)) { checkMode(mode); } + + Value value; + }; + using VariantSerializations = std::vector; explicit SerializationVariant( @@ -123,8 +161,44 @@ public: static std::vector getVariantsDeserializeTextOrder(const DataTypes & variant_types); private: + friend SerializationVariantElement; + friend SerializationVariantElementNullMap; + void addVariantElementToPath(SubstreamPath & path, size_t i) const; + enum CompactDiscriminatorsGranuleFormat + { + PLAIN = 0, /// Granule has different discriminators and they are serialized as is row by row. + COMPACT = 1, /// Granule has single discriminator for all rows and it is serialized as single value. + }; + + struct DeserializeBinaryBulkStateVariantDiscriminators : public ISerialization::DeserializeBinaryBulkState + { + explicit DeserializeBinaryBulkStateVariantDiscriminators(UInt64 mode_) : mode(mode_) + { + } + + DiscriminatorsSerializationMode mode; + + /// Deserialize state of currently read granule in compact mode. + CompactDiscriminatorsGranuleFormat granule_format = CompactDiscriminatorsGranuleFormat::PLAIN; + size_t remaining_rows_in_granule = 0; + ColumnVariant::Discriminator compact_discr = 0; + }; + + static DeserializeBinaryBulkStatePtr deserializeDiscriminatorsStatePrefix( + DeserializeBinaryBulkSettings & settings, + SubstreamsDeserializeStatesCache * cache); + + std::vector deserializeCompactDiscriminators( + ColumnPtr & discriminators_column, + size_t limit, + ReadBuffer * stream, + bool continuous_reading, + DeserializeBinaryBulkStateVariantDiscriminators & state) const; + + static void readDiscriminatorsGranuleStart(DeserializeBinaryBulkStateVariantDiscriminators & state, ReadBuffer * stream); + bool tryDeserializeTextEscapedImpl(IColumn & column, const String & field, const FormatSettings & settings) const; bool tryDeserializeTextQuotedImpl(IColumn & column, const String & field, const FormatSettings & settings) const; bool tryDeserializeWholeTextImpl(IColumn & column, const String & field, const FormatSettings & settings) const; diff --git a/src/DataTypes/Serializations/SerializationVariantElement.cpp b/src/DataTypes/Serializations/SerializationVariantElement.cpp index ec0b4019c2f..8ceab17cba4 100644 --- a/src/DataTypes/Serializations/SerializationVariantElement.cpp +++ b/src/DataTypes/Serializations/SerializationVariantElement.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -12,7 +13,7 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } -struct DeserializeBinaryBulkStateVariantElement : public ISerialization::DeserializeBinaryBulkState +struct SerializationVariantElement::DeserializeBinaryBulkStateVariantElement : public ISerialization::DeserializeBinaryBulkState { /// During deserialization discriminators and variant streams can be shared. /// For example we can read several variant elements together: "select v.UInt32, v.String from table", @@ -24,7 +25,7 @@ struct DeserializeBinaryBulkStateVariantElement : public ISerialization::Deseria /// substream cache correctly. ColumnPtr discriminators; ColumnPtr variant; - + ISerialization::DeserializeBinaryBulkStatePtr discriminators_state; ISerialization::DeserializeBinaryBulkStatePtr variant_element_state; }; @@ -65,7 +66,12 @@ void SerializationVariantElement::serializeBinaryBulkStateSuffix(SerializeBinary void SerializationVariantElement::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const { + DeserializeBinaryBulkStatePtr discriminators_state = SerializationVariant::deserializeDiscriminatorsStatePrefix(settings, cache); + if (!discriminators_state) + return; + auto variant_element_state = std::make_shared(); + variant_element_state->discriminators_state = discriminators_state; addVariantToPath(settings.path); nested_serialization->deserializeBinaryBulkStatePrefix(settings, variant_element_state->variant_element_state, cache); @@ -86,35 +92,61 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( DeserializeBinaryBulkStatePtr & state, SubstreamsCache * cache) const { - auto * variant_element_state = checkAndGetState(state); - /// First, deserialize discriminators from Variant column. settings.path.push_back(Substream::VariantDiscriminators); + + DeserializeBinaryBulkStateVariantElement * variant_element_state = nullptr; + std::optional variant_limit; if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path)) { + variant_element_state = checkAndGetState(state); variant_element_state->discriminators = cached_discriminators; } - else + else if (auto * discriminators_stream = settings.getter(settings.path)) { - auto * discriminators_stream = settings.getter(settings.path); - if (!discriminators_stream) - return; + variant_element_state = checkAndGetState(state); + auto * discriminators_state = checkAndGetState(variant_element_state->discriminators_state); /// If we started to read a new column, reinitialize discriminators column in deserialization state. if (!variant_element_state->discriminators || result_column->empty()) variant_element_state->discriminators = ColumnVariant::ColumnDiscriminators::create(); - SerializationNumber().deserializeBinaryBulk(*variant_element_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0); + /// Deserialize discriminators according to serialization mode. + if (discriminators_state->mode.value == SerializationVariant::DiscriminatorsSerializationMode::BASIC) + SerializationNumber().deserializeBinaryBulk(*variant_element_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0); + else + variant_limit = deserializeCompactDiscriminators( + variant_element_state->discriminators, + variant_discriminator, + limit, + discriminators_stream, + settings.continuous_reading, + variant_element_state->discriminators_state, + this); + addToSubstreamsCache(cache, settings.path, variant_element_state->discriminators); } + else + { + settings.path.pop_back(); + return; + } + settings.path.pop_back(); - /// Iterate through new discriminators to calculate the limit for our variant. + /// We could read less than limit discriminators, but we will need actual number of read rows later. + size_t num_new_discriminators = variant_element_state->discriminators->size() - result_column->size(); + + /// Iterate through new discriminators to calculate the limit for our variant + /// if we didn't do it during discriminators deserialization. const auto & discriminators_data = assert_cast(*variant_element_state->discriminators).getData(); - size_t discriminators_offset = variant_element_state->discriminators->size() - limit; - size_t variant_limit = 0; - for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) - variant_limit += (discriminators_data[i] == variant_discriminator); + size_t discriminators_offset = variant_element_state->discriminators->size() - num_new_discriminators; + if (!variant_limit) + { + variant_limit = 0; + for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) + *variant_limit += (discriminators_data[i] == variant_discriminator); + } /// Now we know the limit for our variant and can deserialize it. @@ -125,19 +157,19 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( auto & nullable_column = assert_cast(*mutable_column); NullMap & null_map = nullable_column.getNullMapData(); /// If we have only our discriminator in range, fill null map with 0. - if (variant_limit == limit) + if (variant_limit == num_new_discriminators) { - null_map.resize_fill(null_map.size() + limit, 0); + null_map.resize_fill(null_map.size() + num_new_discriminators, 0); } /// If no our discriminator in current range, fill null map with 1. else if (variant_limit == 0) { - null_map.resize_fill(null_map.size() + limit, 1); + null_map.resize_fill(null_map.size() + num_new_discriminators, 1); } /// Otherwise we should iterate through discriminators to fill null map. else { - null_map.reserve(null_map.size() + limit); + null_map.reserve(null_map.size() + num_new_discriminators); for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) null_map.push_back(discriminators_data[i] != variant_discriminator); } @@ -159,12 +191,12 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( /// If nothing to deserialize, just insert defaults. if (variant_limit == 0) { - mutable_column->insertManyDefaults(limit); + mutable_column->insertManyDefaults(num_new_discriminators); return; } addVariantToPath(settings.path); - nested_serialization->deserializeBinaryBulkWithMultipleStreams(variant_element_state->variant, variant_limit, settings, variant_element_state->variant_element_state, cache); + nested_serialization->deserializeBinaryBulkWithMultipleStreams(variant_element_state->variant, *variant_limit, settings, variant_element_state->variant_element_state, cache); removeVariantFromPath(settings.path); /// If nothing was deserialized when variant_limit > 0 @@ -173,16 +205,16 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( /// In this case we should just insert default values. if (variant_element_state->variant->empty()) { - mutable_column->insertManyDefaults(limit); + mutable_column->insertManyDefaults(num_new_discriminators); return; } - size_t variant_offset = variant_element_state->variant->size() - variant_limit; + size_t variant_offset = variant_element_state->variant->size() - *variant_limit; /// If we have only our discriminator in range, insert the whole range to result column. - if (variant_limit == limit) + if (variant_limit == num_new_discriminators) { - mutable_column->insertRangeFrom(*variant_element_state->variant, variant_offset, variant_limit); + mutable_column->insertRangeFrom(*variant_element_state->variant, variant_offset, *variant_limit); } /// Otherwise iterate through discriminators and insert value from variant or default value depending on the discriminator. else @@ -197,6 +229,59 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( } } +size_t SerializationVariantElement::deserializeCompactDiscriminators( + DB::ColumnPtr & discriminators_column, + ColumnVariant::Discriminator variant_discriminator, + size_t limit, + DB::ReadBuffer * stream, + bool continuous_reading, + DeserializeBinaryBulkStatePtr & discriminators_state_, + const ISerialization * serialization) +{ + auto * discriminators_state = checkAndGetState(discriminators_state_, serialization); + auto & discriminators = assert_cast(*discriminators_column->assumeMutable()); + auto & discriminators_data = discriminators.getData(); + + /// Reset state if we are reading from the start of the granule and not from the previous position in the file. + if (!continuous_reading) + discriminators_state->remaining_rows_in_granule = 0; + + /// Calculate our variant limit during discriminators deserialization. + size_t variant_limit = 0; + while (limit) + { + /// If we read all rows from current granule, start reading the next one. + if (discriminators_state->remaining_rows_in_granule == 0) + { + if (stream->eof()) + return variant_limit; + + SerializationVariant::readDiscriminatorsGranuleStart(*discriminators_state, stream); + } + + size_t limit_in_granule = std::min(limit, discriminators_state->remaining_rows_in_granule); + if (discriminators_state->granule_format == SerializationVariant::CompactDiscriminatorsGranuleFormat::COMPACT) + { + auto & data = discriminators.getData(); + data.resize_fill(data.size() + limit_in_granule, discriminators_state->compact_discr); + if (discriminators_state->compact_discr == variant_discriminator) + variant_limit += limit_in_granule; + } + else + { + SerializationNumber().deserializeBinaryBulk(discriminators, *stream, limit_in_granule, 0); + size_t start = discriminators_data.size() - limit_in_granule; + for (size_t i = start; i != discriminators_data.size(); ++i) + variant_limit += (discriminators_data[i] == variant_discriminator); + } + + discriminators_state->remaining_rows_in_granule -= limit_in_granule; + limit -= limit_in_granule; + } + + return variant_limit; +} + void SerializationVariantElement::addVariantToPath(DB::ISerialization::SubstreamPath & path) const { path.push_back(Substream::VariantElements); @@ -214,17 +299,19 @@ SerializationVariantElement::VariantSubcolumnCreator::VariantSubcolumnCreator( const ColumnPtr & local_discriminators_, const String & variant_element_name_, ColumnVariant::Discriminator global_variant_discriminator_, - ColumnVariant::Discriminator local_variant_discriminator_) + ColumnVariant::Discriminator local_variant_discriminator_, + bool make_nullable_) : local_discriminators(local_discriminators_) , variant_element_name(variant_element_name_) , global_variant_discriminator(global_variant_discriminator_) , local_variant_discriminator(local_variant_discriminator_) + , make_nullable(make_nullable_) { } DataTypePtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB::DataTypePtr & prev) const { - return makeNullableOrLowCardinalityNullableSafe(prev); + return make_nullable ? makeNullableOrLowCardinalityNullableSafe(prev) : prev; } SerializationPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB::SerializationPtr & prev) const @@ -237,12 +324,12 @@ ColumnPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB: /// Case when original Variant column contained only one non-empty variant and no NULLs. /// In this case just use this variant. if (prev->size() == local_discriminators->size()) - return makeNullableOrLowCardinalityNullableSafe(prev); + return make_nullable ? makeNullableOrLowCardinalityNullableSafe(prev) : prev; /// If this variant is empty, fill result column with default values. if (prev->empty()) { - auto res = makeNullableOrLowCardinalityNullableSafe(prev)->cloneEmpty(); + auto res = make_nullable ? makeNullableOrLowCardinalityNullableSafe(prev)->cloneEmpty() : prev->cloneEmpty(); res->insertManyDefaults(local_discriminators->size()); return res; } @@ -257,16 +344,16 @@ ColumnPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB: /// Now we can create new column from null-map and variant column using IColumn::expand. auto res_column = IColumn::mutate(prev); - /// Special case for LowCardinality. We want the result to be LowCardinality(Nullable), + /// Special case for LowCardinality when we want the result to be LowCardinality(Nullable), /// but we don't have a good way to apply null-mask for LowCardinality(), so, we first /// convert our column to LowCardinality(Nullable()) and then use expand which will /// fill rows with 0 in mask with default value (that is NULL). - if (prev->lowCardinality()) + if (make_nullable && prev->lowCardinality()) res_column = assert_cast(*res_column).cloneNullable(); res_column->expand(null_map, /*inverted = */ true); - if (res_column->canBeInsideNullable()) + if (make_nullable && prev->canBeInsideNullable()) { auto null_map_col = ColumnUInt8::create(); null_map_col->getData() = std::move(null_map); diff --git a/src/DataTypes/Serializations/SerializationVariantElement.h b/src/DataTypes/Serializations/SerializationVariantElement.h index 0ce0a72e250..69101aea0f5 100644 --- a/src/DataTypes/Serializations/SerializationVariantElement.h +++ b/src/DataTypes/Serializations/SerializationVariantElement.h @@ -9,6 +9,7 @@ namespace DB { class SerializationVariant; +class SerializationVariantElementNullMap; /// Serialization for Variant element when we read it as a subcolumn. class SerializationVariantElement final : public SerializationWrapper @@ -66,12 +67,14 @@ public: const String variant_element_name; const ColumnVariant::Discriminator global_variant_discriminator; const ColumnVariant::Discriminator local_variant_discriminator; + bool make_nullable; VariantSubcolumnCreator( const ColumnPtr & local_discriminators_, const String & variant_element_name_, ColumnVariant::Discriminator global_variant_discriminator_, - ColumnVariant::Discriminator local_variant_discriminator_); + ColumnVariant::Discriminator local_variant_discriminator_, + bool make_nullable_); DataTypePtr create(const DataTypePtr & prev) const override; ColumnPtr create(const ColumnPtr & prev) const override; @@ -79,6 +82,18 @@ public: }; private: friend SerializationVariant; + friend SerializationVariantElementNullMap; + + struct DeserializeBinaryBulkStateVariantElement; + + static size_t deserializeCompactDiscriminators( + ColumnPtr & discriminators_column, + ColumnVariant::Discriminator variant_discriminator, + size_t limit, + ReadBuffer * stream, + bool continuous_reading, + DeserializeBinaryBulkStatePtr & discriminators_state_, + const ISerialization * serialization); void addVariantToPath(SubstreamPath & path) const; void removeVariantFromPath(SubstreamPath & path) const; diff --git a/src/DataTypes/Serializations/SerializationVariantElementNullMap.cpp b/src/DataTypes/Serializations/SerializationVariantElementNullMap.cpp new file mode 100644 index 00000000000..f30da4fecf9 --- /dev/null +++ b/src/DataTypes/Serializations/SerializationVariantElementNullMap.cpp @@ -0,0 +1,190 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +struct DeserializeBinaryBulkStateVariantElementNullMap : public ISerialization::DeserializeBinaryBulkState +{ + /// During deserialization discriminators streams can be shared. + /// For example we can read several variant elements together: "select v.UInt32, v.String.null from table", + /// or we can read the whole variant and some of variant elements or their subcolumns: "select v, v.UInt32.null from table". + /// To read the same column from the same stream more than once we use substream cache, + /// but this cache stores the whole column, not only the current range. + /// During deserialization of variant elements or their subcolumns discriminators column is not stored + /// in the result column, so we need to store them inside deserialization state, so we can use + /// substream cache correctly. + ColumnPtr discriminators; + ISerialization::DeserializeBinaryBulkStatePtr discriminators_state; +}; + +void SerializationVariantElementNullMap::enumerateStreams( + DB::ISerialization::EnumerateStreamsSettings & settings, + const DB::ISerialization::StreamCallback & callback, + const DB::ISerialization::SubstreamData &) const +{ + /// We will need stream for discriminators during deserialization. + settings.path.push_back(Substream::VariantDiscriminators); + callback(settings.path); + settings.path.pop_back(); +} + +void SerializationVariantElementNullMap::serializeBinaryBulkStatePrefix( + const IColumn &, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStatePrefix is not implemented for SerializationVariantElementNullMap"); +} + +void SerializationVariantElementNullMap::serializeBinaryBulkStateSuffix(SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStateSuffix is not implemented for SerializationVariantElementNullMap"); +} + +void SerializationVariantElementNullMap::deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const +{ + DeserializeBinaryBulkStatePtr discriminators_state = SerializationVariant::deserializeDiscriminatorsStatePrefix(settings, cache); + if (!discriminators_state) + return; + + auto variant_element_null_map_state = std::make_shared(); + variant_element_null_map_state->discriminators_state = std::move(discriminators_state); + state = std::move(variant_element_null_map_state); +} + +void SerializationVariantElementNullMap::serializeBinaryBulkWithMultipleStreams( + const IColumn &, size_t, size_t, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, + "Method serializeBinaryBulkWithMultipleStreams is not implemented for SerializationVariantElementNullMap"); +} + +void SerializationVariantElementNullMap::deserializeBinaryBulkWithMultipleStreams( + ColumnPtr & result_column, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsCache * cache) const +{ + /// Deserialize discriminators from Variant column. + settings.path.push_back(Substream::VariantDiscriminators); + + DeserializeBinaryBulkStateVariantElementNullMap * variant_element_null_map_state = nullptr; + std::optional variant_limit; + if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path)) + { + variant_element_null_map_state = checkAndGetState(state); + variant_element_null_map_state->discriminators = cached_discriminators; + } + else if (auto * discriminators_stream = settings.getter(settings.path)) + { + variant_element_null_map_state = checkAndGetState(state); + auto * discriminators_state = checkAndGetState( + variant_element_null_map_state->discriminators_state); + + /// If we started to read a new column, reinitialize discriminators column in deserialization state. + if (!variant_element_null_map_state->discriminators || result_column->empty()) + variant_element_null_map_state->discriminators = ColumnVariant::ColumnDiscriminators::create(); + + /// Deserialize discriminators according to serialization mode. + if (discriminators_state->mode.value == SerializationVariant::DiscriminatorsSerializationMode::BASIC) + SerializationNumber().deserializeBinaryBulk( + *variant_element_null_map_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0); + else + variant_limit = SerializationVariantElement::deserializeCompactDiscriminators( + variant_element_null_map_state->discriminators, + variant_discriminator, + limit, + discriminators_stream, + settings.continuous_reading, + variant_element_null_map_state->discriminators_state, + this); + + addToSubstreamsCache(cache, settings.path, variant_element_null_map_state->discriminators); + } + else + { + /// There is no such stream or cached data, it means that there is no Variant column in this part (it could happen after alter table add column). + /// In such cases columns are filled with default values, but for null-map column default value should be 1, not 0. Fill column with 1 here instead. + MutableColumnPtr mutable_column = result_column->assumeMutable(); + auto & data = assert_cast(*mutable_column).getData(); + data.resize_fill(data.size() + limit, 1); + settings.path.pop_back(); + return; + } + settings.path.pop_back(); + + MutableColumnPtr mutable_column = result_column->assumeMutable(); + auto & data = assert_cast(*mutable_column).getData(); + /// Check if there are no such variant in read range. + if (variant_limit && *variant_limit == 0) + { + data.resize_fill(data.size() + limit, 1); + } + /// Check if there is only our variant in read range. + else if (variant_limit && *variant_limit == limit) + { + data.resize_fill(data.size() + limit, 0); + } + /// Iterate through new discriminators to calculate the null map of our variant. + else + { + const auto & discriminators_data + = assert_cast(*variant_element_null_map_state->discriminators).getData(); + size_t discriminators_offset = variant_element_null_map_state->discriminators->size() - limit; + for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) + data.push_back(discriminators_data[i] != variant_discriminator); + } +} + +SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::VariantNullMapSubcolumnCreator( + const ColumnPtr & local_discriminators_, + const String & variant_element_name_, + ColumnVariant::Discriminator global_variant_discriminator_, + ColumnVariant::Discriminator local_variant_discriminator_) + : local_discriminators(local_discriminators_) + , variant_element_name(variant_element_name_) + , global_variant_discriminator(global_variant_discriminator_) + , local_variant_discriminator(local_variant_discriminator_) +{ +} + +DataTypePtr SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::create(const DB::DataTypePtr &) const +{ + return std::make_shared(); +} + +SerializationPtr SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::create(const DB::SerializationPtr &) const +{ + return std::make_shared(variant_element_name, global_variant_discriminator); +} + +ColumnPtr SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::create(const DB::ColumnPtr &) const +{ + /// Iterate through discriminators and create null-map for our variant. + auto null_map_col = ColumnUInt8::create(); + auto & null_map_data = null_map_col->getData(); + null_map_data.reserve(local_discriminators->size()); + const auto & local_discriminators_data = assert_cast(*local_discriminators).getData(); + for (auto local_discr : local_discriminators_data) + null_map_data.push_back(local_discr != local_variant_discriminator); + + return null_map_col; +} + + +} diff --git a/src/DataTypes/Serializations/SerializationVariantElementNullMap.h b/src/DataTypes/Serializations/SerializationVariantElementNullMap.h new file mode 100644 index 00000000000..cd81b445189 --- /dev/null +++ b/src/DataTypes/Serializations/SerializationVariantElementNullMap.h @@ -0,0 +1,107 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +class SerializationVariant; +class SerializationVariantElement; + +/// Serialization for Variant element null map when we read it as a subcolumn. +/// For example, variant.UInt64.null. +/// It requires separate serialization because there is no actual Nullable column +/// and we should construct null map from variant discriminators. +/// The implementation of deserializeBinaryBulk* methods is similar to SerializationVariantElement, +/// but differs in that there is no need to read the actual data of the variant, only discriminators. +class SerializationVariantElementNullMap final : public SimpleTextSerialization +{ +public: + SerializationVariantElementNullMap(const String & variant_element_name_, ColumnVariant::Discriminator variant_discriminator_) + : variant_element_name(variant_element_name_), variant_discriminator(variant_discriminator_) + { + } + + void enumerateStreams( + EnumerateStreamsSettings & settings, + const StreamCallback & callback, + const SubstreamData & data) const override; + + void serializeBinaryBulkStatePrefix( + const IColumn & column, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void serializeBinaryBulkStateSuffix( + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; + + void serializeBinaryBulkWithMultipleStreams( + const IColumn & column, + size_t offset, + size_t limit, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void deserializeBinaryBulkWithMultipleStreams( + ColumnPtr & column, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsCache * cache) const override; + + void serializeBinary(const Field &, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void deserializeBinary(Field &, ReadBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void serializeBinary(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void deserializeBinary(IColumn &, ReadBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void serializeText(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void deserializeText(IColumn &, ReadBuffer &, const FormatSettings &, bool) const override { throwNoSerialization(); } + bool tryDeserializeText(IColumn &, ReadBuffer &, const FormatSettings &, bool) const override { throwNoSerialization(); } + + struct VariantNullMapSubcolumnCreator : public ISubcolumnCreator + { + const ColumnPtr local_discriminators; + const String variant_element_name; + const ColumnVariant::Discriminator global_variant_discriminator; + const ColumnVariant::Discriminator local_variant_discriminator; + + VariantNullMapSubcolumnCreator( + const ColumnPtr & local_discriminators_, + const String & variant_element_name_, + ColumnVariant::Discriminator global_variant_discriminator_, + ColumnVariant::Discriminator local_variant_discriminator_); + + DataTypePtr create(const DataTypePtr & prev) const override; + ColumnPtr create(const ColumnPtr & prev) const override; + SerializationPtr create(const SerializationPtr & prev) const override; + }; +private: + [[noreturn]] static void throwNoSerialization() + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Text/binary serialization is not implemented for variant element null map subcolumn"); + } + + friend SerializationVariant; + friend SerializationVariantElement; + + /// To be able to deserialize Variant element null map as a subcolumn + /// we need variant element type name and global discriminator. + String variant_element_name; + ColumnVariant::Discriminator variant_discriminator; + +}; + +} diff --git a/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp b/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp index 0ae325871fb..033a6ea8a4a 100644 --- a/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp +++ b/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp @@ -72,8 +72,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) DataTypePtr type = DataTypeFactory::instance().get(data_type); FormatSettings settings; - settings.max_binary_string_size = 100; - settings.max_binary_array_size = 100; + settings.binary.max_binary_string_size = 100; + settings.binary.max_binary_string_size = 100; Field field; type->getDefaultSerialization()->deserializeBinary(field, in, settings); diff --git a/src/DataTypes/tests/gtest_data_types_binary_encoding.cpp b/src/DataTypes/tests/gtest_data_types_binary_encoding.cpp new file mode 100644 index 00000000000..4d0bfc67183 --- /dev/null +++ b/src/DataTypes/tests/gtest_data_types_binary_encoding.cpp @@ -0,0 +1,129 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace DB; + +namespace DB::ErrorCodes +{ +extern const int UNSUPPORTED_METHOD; +} + + +void check(const DataTypePtr & type) +{ +// std::cerr << "Check " << type->getName() << "\n"; + WriteBufferFromOwnString ostr; + encodeDataType(type, ostr); + ReadBufferFromString istr(ostr.str()); + DataTypePtr decoded_type = decodeDataType(istr); + ASSERT_TRUE(istr.eof()); + ASSERT_EQ(type->getName(), decoded_type->getName()); + ASSERT_TRUE(type->equals(*decoded_type)); +} + +GTEST_TEST(DataTypesBinaryEncoding, EncodeAndDecode) +{ + tryRegisterAggregateFunctions(); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared("EST")); + check(std::make_shared("CET")); + check(std::make_shared(3)); + check(std::make_shared(3, "EST")); + check(std::make_shared(3, "CET")); + check(std::make_shared()); + check(std::make_shared(10)); + check(DataTypeFactory::instance().get("Enum8('a' = 1, 'b' = 2, 'c' = 3, 'd' = -128)")); + check(DataTypeFactory::instance().get("Enum16('a' = 1, 'b' = 2, 'c' = 3, 'd' = -1000)")); + check(std::make_shared(3, 6)); + check(std::make_shared(3, 6)); + check(std::make_shared(3, 6)); + check(std::make_shared(3, 6)); + check(std::make_shared()); + check(DataTypeFactory::instance().get("Array(UInt32)")); + check(DataTypeFactory::instance().get("Array(Array(Array(UInt32)))")); + check(DataTypeFactory::instance().get("Tuple(UInt32, String, UUID)")); + check(DataTypeFactory::instance().get("Tuple(UInt32, String, Tuple(UUID, Date, IPv4))")); + check(DataTypeFactory::instance().get("Tuple(c1 UInt32, c2 String, c3 UUID)")); + check(DataTypeFactory::instance().get("Tuple(c1 UInt32, c2 String, c3 Tuple(c4 UUID, c5 Date, c6 IPv4))")); + check(std::make_shared()); + check(std::make_shared(IntervalKind::Kind::Nanosecond)); + check(std::make_shared(IntervalKind::Kind::Microsecond)); + check(DataTypeFactory::instance().get("Nullable(UInt32)")); + check(DataTypeFactory::instance().get("Nullable(Nothing)")); + check(DataTypeFactory::instance().get("Nullable(UUID)")); + check(std::make_shared( + DataTypes{ + std::make_shared(), + std::make_shared(), + DataTypeFactory::instance().get("Array(Array(Array(UInt32)))")}, + DataTypeFactory::instance().get("Tuple(c1 UInt32, c2 String, c3 UUID)"))); + DataTypes argument_types = {std::make_shared()}; + Array parameters = {Field(0.1), Field(0.2)}; + AggregateFunctionProperties properties; + AggregateFunctionPtr function = AggregateFunctionFactory::instance().get("quantiles", NullsAction::EMPTY, argument_types, parameters, properties); + check(std::make_shared(function, argument_types, parameters)); + check(std::make_shared(function, argument_types, parameters, 2)); + check(DataTypeFactory::instance().get("AggregateFunction(sum, UInt64)")); + check(DataTypeFactory::instance().get("AggregateFunction(quantiles(0.5, 0.9), UInt64)")); + check(DataTypeFactory::instance().get("AggregateFunction(sequenceMatch('(?1)(?2)'), Date, UInt8, UInt8)")); + check(DataTypeFactory::instance().get("AggregateFunction(sumMapFiltered([1, 4, 8]), Array(UInt64), Array(UInt64))")); + check(DataTypeFactory::instance().get("LowCardinality(UInt32)")); + check(DataTypeFactory::instance().get("LowCardinality(Nullable(String))")); + check(DataTypeFactory::instance().get("Map(String, UInt32)")); + check(DataTypeFactory::instance().get("Map(String, Map(String, Map(String, UInt32)))")); + check(std::make_shared()); + check(std::make_shared()); + check(DataTypeFactory::instance().get("Variant(String, UInt32, Date32)")); + check(std::make_shared()); + check(std::make_shared(10)); + check(std::make_shared(255)); + check(DataTypeFactory::instance().get("Bool")); + check(DataTypeFactory::instance().get("SimpleAggregateFunction(sum, UInt64)")); + check(DataTypeFactory::instance().get("SimpleAggregateFunction(maxMap, Tuple(Array(UInt32), Array(UInt32)))")); + check(DataTypeFactory::instance().get("SimpleAggregateFunction(groupArrayArray(19), Array(UInt64))")); + check(DataTypeFactory::instance().get("Nested(a UInt32, b UInt32)")); + check(DataTypeFactory::instance().get("Nested(a UInt32, b Nested(c String, d Nested(e Date)))")); + check(DataTypeFactory::instance().get("Ring")); + check(DataTypeFactory::instance().get("Point")); + check(DataTypeFactory::instance().get("Polygon")); + check(DataTypeFactory::instance().get("MultiPolygon")); + check(DataTypeFactory::instance().get("Tuple(Map(LowCardinality(String), Array(AggregateFunction(2, quantiles(0.1, 0.2), Float32))), Array(Array(Tuple(UInt32, Tuple(a Map(String, String), b Nullable(Date), c Variant(Tuple(g String, d Array(UInt32)), Date, Map(String, String)))))))")); +} diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp index c85e8f5688a..d81dc7a76d8 100644 --- a/src/Databases/DDLDependencyVisitor.cpp +++ b/src/Databases/DDLDependencyVisitor.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include diff --git a/src/Databases/DDLLoadingDependencyVisitor.cpp b/src/Databases/DDLLoadingDependencyVisitor.cpp index 40234abb20f..67bce915168 100644 --- a/src/Databases/DDLLoadingDependencyVisitor.cpp +++ b/src/Databases/DDLLoadingDependencyVisitor.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -211,6 +212,13 @@ void DDLLoadingDependencyVisitor::extractTableNameFromArgument(const ASTFunction qualified_name.database = table_identifier->getDatabaseName(); qualified_name.table = table_identifier->shortName(); } + else if (arg->as()) + { + /// Allow IN subquery. + /// Do not add tables from the subquery into dependencies, + /// because CREATE will succeed anyway. + return; + } else { assert(false); diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index ccab72cfbae..9e7004e72a9 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -12,10 +13,11 @@ #include #include #include -#include "Common/logger_useful.h" +#include #include #include #include +#include namespace fs = std::filesystem; @@ -106,12 +108,24 @@ void DatabaseAtomic::attachTable(ContextPtr /* context_ */, const String & name, StoragePtr DatabaseAtomic::detachTable(ContextPtr /* context */, const String & name) { + // it is important to call the destructors of not_in_use without + // locked mutex to avoid potential deadlock. DetachedTables not_in_use; - std::lock_guard lock(mutex); - auto table = DatabaseOrdinary::detachTableUnlocked(name); - table_name_to_path.erase(name); - detached_tables.emplace(table->getStorageID().uuid, table); - not_in_use = cleanupDetachedTables(); + StoragePtr table; + { + std::lock_guard lock(mutex); + table = DatabaseOrdinary::detachTableUnlocked(name); + table_name_to_path.erase(name); + detached_tables.emplace(table->getStorageID().uuid, table); + not_in_use = cleanupDetachedTables(); + } + + if (!not_in_use.empty()) + { + not_in_use.clear(); + LOG_DEBUG(log, "Finished removing not used detached tables"); + } + return table; } @@ -397,7 +411,7 @@ DatabaseAtomic::DetachedTables DatabaseAtomic::cleanupDetachedTables() LOG_DEBUG(log, "There are {} detached tables. Start searching non used tables.", detached_tables.size()); while (it != detached_tables.end()) { - if (it->second.unique()) + if (isSharedPtrUnique(it->second)) { not_in_use.emplace(it->first, it->second); it = detached_tables.erase(it); diff --git a/src/Databases/DatabaseDictionary.cpp b/src/Databases/DatabaseDictionary.cpp index adb9a659fcd..52196e75c4a 100644 --- a/src/Databases/DatabaseDictionary.cpp +++ b/src/Databases/DatabaseDictionary.cpp @@ -10,6 +10,7 @@ #include #include #include +#include namespace DB diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index 3a4c1423f7c..05a5e057c55 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -9,6 +9,7 @@ #include #include #include +#include namespace fs = std::filesystem; diff --git a/src/Databases/DatabaseFilesystem.cpp b/src/Databases/DatabaseFilesystem.cpp index b27a816a60d..31701e665a1 100644 --- a/src/Databases/DatabaseFilesystem.cpp +++ b/src/Databases/DatabaseFilesystem.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include diff --git a/src/Databases/DatabaseHDFS.cpp b/src/Databases/DatabaseHDFS.cpp index 060991d1290..eccaae5f22e 100644 --- a/src/Databases/DatabaseHDFS.cpp +++ b/src/Databases/DatabaseHDFS.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index 233db07cd68..da942cebf8f 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -305,7 +306,7 @@ try String table_name = expired_tables.front().table_name; auto it = tables_cache.find(table_name); - if (!it->second.table || it->second.table.unique()) + if (!it->second.table || isSharedPtrUnique(it->second.table)) { LOG_DEBUG(log, "Drop table {} from cache.", backQuote(it->first)); it->second.table.reset(); diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index b8154372116..07a250e72c7 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -28,6 +28,7 @@ #include #include #include +#include namespace fs = std::filesystem; diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 7d4bb07e8ef..6555c4444e2 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -30,6 +31,7 @@ #include #include #include +#include #include #include diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 4ca9afc49eb..7ce2859e962 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include #include #include diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp index 31d6f7876a8..1ef88dc03bc 100644 --- a/src/Databases/DatabaseReplicatedWorker.cpp +++ b/src/Databases/DatabaseReplicatedWorker.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include namespace fs = std::filesystem; diff --git a/src/Databases/DatabaseS3.cpp b/src/Databases/DatabaseS3.cpp index 1589cc1c75d..2b2d978a846 100644 --- a/src/Databases/DatabaseS3.cpp +++ b/src/Databases/DatabaseS3.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include #include diff --git a/src/Databases/MySQL/DatabaseMySQL.cpp b/src/Databases/MySQL/DatabaseMySQL.cpp index 1c82131af0d..7aa29018f4d 100644 --- a/src/Databases/MySQL/DatabaseMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMySQL.cpp @@ -2,6 +2,7 @@ #if USE_MYSQL # include +# include # include # include # include @@ -29,6 +30,7 @@ # include # include # include +# include # include # include # include @@ -354,7 +356,7 @@ void DatabaseMySQL::cleanOutdatedTables() { for (auto iterator = outdated_tables.begin(); iterator != outdated_tables.end();) { - if (!iterator->unique()) + if (!isSharedPtrUnique(*iterator)) ++iterator; else { diff --git a/src/Databases/MySQL/FetchTablesColumnsList.cpp b/src/Databases/MySQL/FetchTablesColumnsList.cpp index e78f4aa2234..0828438cad2 100644 --- a/src/Databases/MySQL/FetchTablesColumnsList.cpp +++ b/src/Databases/MySQL/FetchTablesColumnsList.cpp @@ -2,6 +2,7 @@ #if USE_MYSQL #include +#include #include #include #include diff --git a/src/Databases/MySQL/FetchTablesColumnsList.h b/src/Databases/MySQL/FetchTablesColumnsList.h index 736a0ffd607..ec12cdc73b0 100644 --- a/src/Databases/MySQL/FetchTablesColumnsList.h +++ b/src/Databases/MySQL/FetchTablesColumnsList.h @@ -12,11 +12,12 @@ #include #include -#include namespace DB { +struct Settings; + std::map fetchTablesColumnsList( mysqlxx::PoolWithFailover & pool, const String & database_name, diff --git a/src/Databases/MySQL/MaterializeMetadata.h b/src/Databases/MySQL/MaterializeMetadata.h index e78b7132c8d..36d3fb569d8 100644 --- a/src/Databases/MySQL/MaterializeMetadata.h +++ b/src/Databases/MySQL/MaterializeMetadata.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp index 7ab4235feeb..2c342755337 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -532,13 +533,17 @@ static inline void dumpDataForTables( bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & metadata) { bool opened_transaction = false; - mysqlxx::PoolWithFailover::Entry connection; while (!isCancelled()) { try { - connection = pool.tryGet(); + mysqlxx::PoolWithFailover::Entry connection = pool.tryGet(); + SCOPE_EXIT({ + if (opened_transaction) + connection->query("ROLLBACK").execute(); + }); + if (connection.isNull()) { if (settings->max_wait_time_when_mysql_unavailable < 0) @@ -602,9 +607,6 @@ bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & meta { tryLogCurrentException(log); - if (opened_transaction) - connection->query("ROLLBACK").execute(); - if (settings->max_wait_time_when_mysql_unavailable < 0) throw; @@ -716,6 +718,16 @@ static void writeFieldsToColumn( null_map_column->insertValue(0); } + else + { + // Column is not null but field is null. It's possible due to overrides + if (field.isNull()) + { + column_to.insertDefault(); + return false; + } + } + return true; }; @@ -791,7 +803,7 @@ static void writeFieldsToColumn( if (write_data_to_null_map(value, index)) { - const String & data = value.get(); + const String & data = value.safeGet(); casted_string_column->insertData(data.data(), data.size()); } } diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp index 7ce03c74c58..6b0548b85c7 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -529,7 +530,12 @@ void registerDatabaseMaterializedPostgreSQL(DatabaseFactory & factory) } auto connection_info = postgres::formatConnectionString( - configuration.database, configuration.host, configuration.port, configuration.username, configuration.password); + configuration.database, + configuration.host, + configuration.port, + configuration.username, + configuration.password, + args.context->getSettingsRef().postgresql_connection_attempt_timeout); auto postgresql_replica_settings = std::make_unique(); if (engine_define->settings) diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp index 136fb7fd6d2..a846e23cd4f 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include namespace fs = std::filesystem; @@ -545,8 +546,9 @@ void registerDatabasePostgreSQL(DatabaseFactory & factory) configuration, settings.postgresql_connection_pool_size, settings.postgresql_connection_pool_wait_timeout, - POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, - settings.postgresql_connection_pool_auto_close_connection); + settings.postgresql_connection_pool_retries, + settings.postgresql_connection_pool_auto_close_connection, + settings.postgresql_connection_attempt_timeout); return std::make_shared( args.context, diff --git a/src/Databases/SQLite/DatabaseSQLite.cpp b/src/Databases/SQLite/DatabaseSQLite.cpp index e758ea35de5..132a978140c 100644 --- a/src/Databases/SQLite/DatabaseSQLite.cpp +++ b/src/Databases/SQLite/DatabaseSQLite.cpp @@ -3,6 +3,7 @@ #if USE_SQLITE #include +#include #include #include #include diff --git a/src/Dictionaries/DirectDictionary.cpp b/src/Dictionaries/DirectDictionary.cpp index 2c0f7653aff..f3b32402c20 100644 --- a/src/Dictionaries/DirectDictionary.cpp +++ b/src/Dictionaries/DirectDictionary.cpp @@ -1,6 +1,7 @@ #include "DirectDictionary.h" #include +#include #include #include diff --git a/src/Dictionaries/ExecutablePoolDictionarySource.cpp b/src/Dictionaries/ExecutablePoolDictionarySource.cpp index d8111afdc19..217a67453e4 100644 --- a/src/Dictionaries/ExecutablePoolDictionarySource.cpp +++ b/src/Dictionaries/ExecutablePoolDictionarySource.cpp @@ -8,6 +8,8 @@ #include #include +#include + #include #include #include diff --git a/src/Dictionaries/HTTPDictionarySource.cpp b/src/Dictionaries/HTTPDictionarySource.cpp index dae8ec06d30..663c63dd6c6 100644 --- a/src/Dictionaries/HTTPDictionarySource.cpp +++ b/src/Dictionaries/HTTPDictionarySource.cpp @@ -1,12 +1,13 @@ #include "HTTPDictionarySource.h" +#include #include #include #include #include #include #include -#include #include +#include #include #include #include diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index 9b194acf87f..d7d50dfb0a6 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp index 206516edab0..41fafcc162b 100644 --- a/src/Dictionaries/IPAddressDictionary.cpp +++ b/src/Dictionaries/IPAddressDictionary.cpp @@ -1,11 +1,11 @@ #include "IPAddressDictionary.h" -#include -#include + #include #include #include #include #include +#include #include #include #include @@ -23,6 +23,9 @@ #include #include +#include +#include + namespace DB { diff --git a/src/Dictionaries/PolygonDictionaryImplementations.cpp b/src/Dictionaries/PolygonDictionaryImplementations.cpp index e21fadb0e7e..84abeed2731 100644 --- a/src/Dictionaries/PolygonDictionaryImplementations.cpp +++ b/src/Dictionaries/PolygonDictionaryImplementations.cpp @@ -8,6 +8,7 @@ #include #include +#include #include diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp index c7401386e40..f62a9a009d8 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.cpp +++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp @@ -2,6 +2,7 @@ #include #include +#include #include "DictionarySourceFactory.h" #include "registerDictionaries.h" @@ -205,8 +206,9 @@ void registerDictionarySourcePostgreSQL(DictionarySourceFactory & factory) configuration.replicas_configurations, settings.postgresql_connection_pool_size, settings.postgresql_connection_pool_wait_timeout, - POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, - settings.postgresql_connection_pool_auto_close_connection); + settings.postgresql_connection_pool_retries, + settings.postgresql_connection_pool_auto_close_connection, + settings.postgresql_connection_attempt_timeout); PostgreSQLDictionarySource::Configuration dictionary_configuration { diff --git a/src/Dictionaries/RegExpTreeDictionary.cpp b/src/Dictionaries/RegExpTreeDictionary.cpp index 830f9d09364..2522de60183 100644 --- a/src/Dictionaries/RegExpTreeDictionary.cpp +++ b/src/Dictionaries/RegExpTreeDictionary.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -1002,9 +1003,9 @@ void registerDictionaryRegExpTree(DictionaryFactory & factory) dict_struct, std::move(source_ptr), configuration, - context->getSettings().regexp_dict_allow_hyperscan, - context->getSettings().regexp_dict_flag_case_insensitive, - context->getSettings().regexp_dict_flag_dotall); + context->getSettingsRef().regexp_dict_allow_hyperscan, + context->getSettingsRef().regexp_dict_flag_case_insensitive, + context->getSettingsRef().regexp_dict_flag_dotall); }; factory.registerLayout("regexp_tree", create_layout, true); diff --git a/src/Dictionaries/XDBCDictionarySource.cpp b/src/Dictionaries/XDBCDictionarySource.cpp index 1ebfc4a29b0..590d000f16e 100644 --- a/src/Dictionaries/XDBCDictionarySource.cpp +++ b/src/Dictionaries/XDBCDictionarySource.cpp @@ -10,11 +10,13 @@ #include #include #include +#include #include "DictionarySourceFactory.h" #include "DictionaryStructure.h" #include "readInvalidateQuery.h" #include "registerDictionaries.h" #include +#include #include #include #include "config.h" @@ -239,7 +241,7 @@ void registerDictionarySourceXDBC(DictionarySourceFactory & factory) #if USE_ODBC BridgeHelperPtr bridge = std::make_shared>( global_context, - global_context->getSettings().http_receive_timeout, + global_context->getSettingsRef().http_receive_timeout, config.getString(config_prefix + ".odbc.connection_string"), config.getBool(config_prefix + ".settings.odbc_bridge_use_connection_pooling", global_context->getSettingsRef().odbc_bridge_use_connection_pooling)); diff --git a/src/Dictionaries/registerCacheDictionaries.cpp b/src/Dictionaries/registerCacheDictionaries.cpp index b79261955ff..f2d027575f5 100644 --- a/src/Dictionaries/registerCacheDictionaries.cpp +++ b/src/Dictionaries/registerCacheDictionaries.cpp @@ -2,6 +2,7 @@ #include "CacheDictionaryStorage.h" #include "SSDCacheDictionaryStorage.h" #include +#include #include #include diff --git a/src/Dictionaries/registerHashedDictionary.cpp b/src/Dictionaries/registerHashedDictionary.cpp index 5fc4f5d5cb6..068b71170cc 100644 --- a/src/Dictionaries/registerHashedDictionary.cpp +++ b/src/Dictionaries/registerHashedDictionary.cpp @@ -1,8 +1,8 @@ +#include #include #include #include #include - #include namespace DB diff --git a/src/Dictionaries/registerRangeHashedDictionary.cpp b/src/Dictionaries/registerRangeHashedDictionary.cpp index 8123b811198..bc142a6bddc 100644 --- a/src/Dictionaries/registerRangeHashedDictionary.cpp +++ b/src/Dictionaries/registerRangeHashedDictionary.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include diff --git a/src/Disks/DiskFactory.cpp b/src/Disks/DiskFactory.cpp index de7ee5a74f4..4aa7f6ff564 100644 --- a/src/Disks/DiskFactory.cpp +++ b/src/Disks/DiskFactory.cpp @@ -27,7 +27,8 @@ DiskPtr DiskFactory::create( ContextPtr context, const DisksMap & map, bool attach, - bool custom_disk) const + bool custom_disk, + const std::unordered_set & skip_types) const { const auto disk_type = config.getString(config_prefix + ".type", "local"); @@ -38,6 +39,11 @@ DiskPtr DiskFactory::create( "DiskFactory: the disk '{}' has unknown disk type: {}", name, disk_type); } + if (skip_types.contains(found->first)) + { + return nullptr; + } + const auto & disk_creator = found->second; return disk_creator(name, config, config_prefix, context, map, attach, custom_disk); } diff --git a/src/Disks/DiskFactory.h b/src/Disks/DiskFactory.h index d03ffa6a40f..044ce81dbae 100644 --- a/src/Disks/DiskFactory.h +++ b/src/Disks/DiskFactory.h @@ -42,7 +42,8 @@ public: ContextPtr context, const DisksMap & map, bool attach = false, - bool custom_disk = false) const; + bool custom_disk = false, + const std::unordered_set & skip_types = {}) const; private: using DiskTypeRegistry = std::unordered_map; diff --git a/src/Disks/DiskSelector.cpp b/src/Disks/DiskSelector.cpp index a9260a249dd..f45d12618bf 100644 --- a/src/Disks/DiskSelector.cpp +++ b/src/Disks/DiskSelector.cpp @@ -7,7 +7,6 @@ #include #include -#include namespace DB { @@ -27,7 +26,8 @@ void DiskSelector::assertInitialized() const } -void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, DiskValidator disk_validator) +void DiskSelector::initialize( + const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, DiskValidator disk_validator) { Poco::Util::AbstractConfiguration::Keys keys; config.keys(config_prefix, keys); @@ -36,6 +36,8 @@ void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config, constexpr auto default_disk_name = "default"; bool has_default_disk = false; + constexpr auto local_disk_name = "local"; + bool has_local_disk = false; for (const auto & disk_name : keys) { if (!std::all_of(disk_name.begin(), disk_name.end(), isWordCharASCII)) @@ -44,21 +46,31 @@ void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config, if (disk_name == default_disk_name) has_default_disk = true; + if (disk_name == local_disk_name) + has_local_disk = true; + const auto disk_config_prefix = config_prefix + "." + disk_name; if (disk_validator && !disk_validator(config, disk_config_prefix, disk_name)) continue; - - disks.emplace(disk_name, factory.create(disk_name, config, disk_config_prefix, context, disks)); + auto created_disk + = factory.create(disk_name, config, disk_config_prefix, context, disks, /*attach*/ false, /*custom_disk*/ false, skip_types); + if (created_disk.get()) + { + disks.emplace(disk_name, std::move(created_disk)); + } } if (!has_default_disk) { disks.emplace( - default_disk_name, - std::make_shared( - default_disk_name, context->getPath(), 0, context, config, config_prefix)); + default_disk_name, std::make_shared(default_disk_name, context->getPath(), 0, context, config, config_prefix)); } + if (!has_local_disk && (context->getApplicationType() == Context::ApplicationType::DISKS)) + { + throw_away_local_on_update = true; + disks.emplace(local_disk_name, std::make_shared(local_disk_name, "/", 0, context, config, config_prefix)); + } is_initialized = true; } @@ -76,6 +88,7 @@ DiskSelectorPtr DiskSelector::updateFromConfig( std::shared_ptr result = std::make_shared(*this); constexpr auto default_disk_name = "default"; + constexpr auto local_disk_name = "local"; DisksMap old_disks_minus_new_disks(result->getDisksMap()); for (const auto & disk_name : keys) @@ -86,7 +99,12 @@ DiskSelectorPtr DiskSelector::updateFromConfig( auto disk_config_prefix = config_prefix + "." + disk_name; if (!result->getDisksMap().contains(disk_name)) { - result->addToDiskMap(disk_name, factory.create(disk_name, config, disk_config_prefix, context, result->getDisksMap())); + auto created_disk = factory.create( + disk_name, config, disk_config_prefix, context, result->getDisksMap(), /*attach*/ false, /*custom_disk*/ false, skip_types); + if (created_disk) + { + result->addToDiskMap(disk_name, created_disk); + } } else { @@ -99,6 +117,10 @@ DiskSelectorPtr DiskSelector::updateFromConfig( } old_disks_minus_new_disks.erase(default_disk_name); + if (throw_away_local_on_update) + { + old_disks_minus_new_disks.erase(local_disk_name); + } if (!old_disks_minus_new_disks.empty()) { diff --git a/src/Disks/DiskSelector.h b/src/Disks/DiskSelector.h index 6669b428158..49a1be5cf50 100644 --- a/src/Disks/DiskSelector.h +++ b/src/Disks/DiskSelector.h @@ -20,7 +20,7 @@ class DiskSelector public: static constexpr auto TMP_INTERNAL_DISK_PREFIX = "__tmp_internal_"; - DiskSelector() = default; + explicit DiskSelector(std::unordered_set skip_types_ = {}) : skip_types(skip_types_) { } DiskSelector(const DiskSelector & from) = default; using DiskValidator = std::function; @@ -48,6 +48,10 @@ private: bool is_initialized = false; void assertInitialized() const; + + const std::unordered_set skip_types; + + bool throw_away_local_on_update = false; }; } diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index c77709c27eb..bb9761a3905 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -78,7 +78,6 @@ SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(c std::unique_ptr buf; -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD if (with_file_cache) { auto cache_key = settings.remote_fs_cache->createKeyForPath(object_path); @@ -96,7 +95,6 @@ SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(c /* read_until_position */std::nullopt, cache_log); } -#endif /// Can't wrap CachedOnDiskReadBufferFromFile in CachedInMemoryReadBufferFromFile because the /// former doesn't support seeks. diff --git a/src/Disks/IO/ReadBufferFromWebServer.cpp b/src/Disks/IO/ReadBufferFromWebServer.cpp index 7c5de8a13de..837452aa9f2 100644 --- a/src/Disks/IO/ReadBufferFromWebServer.cpp +++ b/src/Disks/IO/ReadBufferFromWebServer.cpp @@ -1,9 +1,12 @@ #include "ReadBufferFromWebServer.h" -#include +#include +#include +#include #include #include -#include +#include + #include diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp index a2d21cf49c2..60fa2997c50 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp @@ -14,6 +14,15 @@ namespace ProfileEvents { extern const Event RemoteWriteThrottlerBytes; extern const Event RemoteWriteThrottlerSleepMicroseconds; + + extern const Event AzureUpload; + extern const Event AzureStageBlock; + extern const Event AzureCommitBlockList; + + extern const Event DiskAzureUpload; + extern const Event DiskAzureStageBlock; + extern const Event DiskAzureCommitBlockList; + } namespace DB @@ -30,7 +39,7 @@ struct WriteBufferFromAzureBlobStorage::PartData size_t data_size = 0; }; -BufferAllocationPolicyPtr createBufferAllocationPolicy(const AzureObjectStorageSettings & settings) +BufferAllocationPolicyPtr createBufferAllocationPolicy(const AzureBlobStorage::RequestSettings & settings) { BufferAllocationPolicy::Settings allocation_settings; allocation_settings.strict_size = settings.strict_upload_part_size; @@ -48,7 +57,7 @@ WriteBufferFromAzureBlobStorage::WriteBufferFromAzureBlobStorage( const String & blob_path_, size_t buf_size_, const WriteSettings & write_settings_, - std::shared_ptr settings_, + std::shared_ptr settings_, ThreadPoolCallbackRunnerUnsafe schedule_) : WriteBufferFromFileBase(buf_size_, nullptr, 0) , log(getLogger("WriteBufferFromAzureBlobStorage")) @@ -134,6 +143,10 @@ void WriteBufferFromAzureBlobStorage::preFinalize() /// then we use single part upload instead of multi part upload if (block_ids.empty() && detached_part_data.size() == 1 && detached_part_data.front().data_size <= max_single_part_upload_size) { + ProfileEvents::increment(ProfileEvents::AzureUpload); + if (blob_container_client->GetClickhouseOptions().IsClientForDisk) + ProfileEvents::increment(ProfileEvents::DiskAzureUpload); + auto part_data = std::move(detached_part_data.front()); auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast(part_data.memory.data()), part_data.data_size); @@ -164,6 +177,10 @@ void WriteBufferFromAzureBlobStorage::finalizeImpl() if (!block_ids.empty()) { auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); + ProfileEvents::increment(ProfileEvents::AzureCommitBlockList); + if (blob_container_client->GetClickhouseOptions().IsClientForDisk) + ProfileEvents::increment(ProfileEvents::DiskAzureCommitBlockList); + execWithRetry([&](){ block_blob_client.CommitBlockList(block_ids); }, max_unexpected_write_error_retries); LOG_TRACE(log, "Committed {} blocks for blob `{}`", block_ids.size(), blob_path); } @@ -269,6 +286,10 @@ void WriteBufferFromAzureBlobStorage::writePart(WriteBufferFromAzureBlobStorage: auto & data_block_id = std::get<0>(*worker_data); auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); + ProfileEvents::increment(ProfileEvents::AzureStageBlock); + if (blob_container_client->GetClickhouseOptions().IsClientForDisk) + ProfileEvents::increment(ProfileEvents::DiskAzureStageBlock); + Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast(std::get<1>(*worker_data).memory.data()), data_size); execWithRetry([&](){ block_blob_client.StageBlock(data_block_id, memory_stream); }, max_unexpected_write_error_retries, data_size); }; diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h index 10fe871a727..3ee497c4e44 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h @@ -35,7 +35,7 @@ public: const String & blob_path_, size_t buf_size_, const WriteSettings & write_settings_, - std::shared_ptr settings_, + std::shared_ptr settings_, ThreadPoolCallbackRunnerUnsafe schedule_ = {}); ~WriteBufferFromAzureBlobStorage() override; diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp deleted file mode 100644 index 1a5388349f8..00000000000 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp +++ /dev/null @@ -1,272 +0,0 @@ -#include - -#if USE_AZURE_BLOB_STORAGE - -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace Azure::Storage::Blobs; - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - - -void validateStorageAccountUrl(const String & storage_account_url) -{ - const auto * storage_account_url_pattern_str = R"(http(()|s)://[a-z0-9-.:]+(()|/)[a-z0-9]*(()|/))"; - static const RE2 storage_account_url_pattern(storage_account_url_pattern_str); - - if (!re2::RE2::FullMatch(storage_account_url, storage_account_url_pattern)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Blob Storage URL is not valid, should follow the format: {}, got: {}", storage_account_url_pattern_str, storage_account_url); -} - - -void validateContainerName(const String & container_name) -{ - auto len = container_name.length(); - if (len < 3 || len > 64) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "AzureBlob Storage container name is not valid, should have length between 3 and 64, but has length: {}", len); - - const auto * container_name_pattern_str = R"([a-z][a-z0-9-]+)"; - static const RE2 container_name_pattern(container_name_pattern_str); - - if (!re2::RE2::FullMatch(container_name, container_name_pattern)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "AzureBlob Storage container name is not valid, should follow the format: {}, got: {}", - container_name_pattern_str, container_name); -} - - -AzureBlobStorageEndpoint processAzureBlobStorageEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) -{ - String storage_url; - String account_name; - String container_name; - String prefix; - if (config.has(config_prefix + ".endpoint")) - { - String endpoint = config.getString(config_prefix + ".endpoint"); - - /// For some authentication methods account name is not present in the endpoint - /// 'endpoint_contains_account_name' bool is used to understand how to split the endpoint (default : true) - bool endpoint_contains_account_name = config.getBool(config_prefix + ".endpoint_contains_account_name", true); - - size_t pos = endpoint.find("//"); - if (pos == std::string::npos) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected '//' in endpoint"); - - if (endpoint_contains_account_name) - { - size_t acc_pos_begin = endpoint.find('/', pos+2); - if (acc_pos_begin == std::string::npos) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected account_name in endpoint"); - - storage_url = endpoint.substr(0,acc_pos_begin); - size_t acc_pos_end = endpoint.find('/',acc_pos_begin+1); - - if (acc_pos_end == std::string::npos) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected container_name in endpoint"); - - account_name = endpoint.substr(acc_pos_begin+1,(acc_pos_end-acc_pos_begin)-1); - - size_t cont_pos_end = endpoint.find('/', acc_pos_end+1); - - if (cont_pos_end != std::string::npos) - { - container_name = endpoint.substr(acc_pos_end+1,(cont_pos_end-acc_pos_end)-1); - prefix = endpoint.substr(cont_pos_end+1); - } - else - { - container_name = endpoint.substr(acc_pos_end+1); - } - } - else - { - size_t cont_pos_begin = endpoint.find('/', pos+2); - - if (cont_pos_begin == std::string::npos) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected container_name in endpoint"); - - storage_url = endpoint.substr(0,cont_pos_begin); - size_t cont_pos_end = endpoint.find('/',cont_pos_begin+1); - - if (cont_pos_end != std::string::npos) - { - container_name = endpoint.substr(cont_pos_begin+1,(cont_pos_end-cont_pos_begin)-1); - prefix = endpoint.substr(cont_pos_end+1); - } - else - { - container_name = endpoint.substr(cont_pos_begin+1); - } - } - } - else if (config.has(config_prefix + ".connection_string")) - { - storage_url = config.getString(config_prefix + ".connection_string"); - container_name = config.getString(config_prefix + ".container_name"); - } - else if (config.has(config_prefix + ".storage_account_url")) - { - storage_url = config.getString(config_prefix + ".storage_account_url"); - validateStorageAccountUrl(storage_url); - container_name = config.getString(config_prefix + ".container_name"); - } - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected either `storage_account_url` or `connection_string` or `endpoint` in config"); - - if (!container_name.empty()) - validateContainerName(container_name); - std::optional container_already_exists {}; - if (config.has(config_prefix + ".container_already_exists")) - container_already_exists = {config.getBool(config_prefix + ".container_already_exists")}; - return {storage_url, account_name, container_name, prefix, container_already_exists}; -} - - -template -std::unique_ptr getClientWithConnectionString(const String & connection_str, const String & container_name, const BlobClientOptions & client_options) = delete; - -template<> -std::unique_ptr getClientWithConnectionString(const String & connection_str, const String & /*container_name*/, const BlobClientOptions & client_options) -{ - return std::make_unique(BlobServiceClient::CreateFromConnectionString(connection_str, client_options)); -} - -template<> -std::unique_ptr getClientWithConnectionString(const String & connection_str, const String & container_name, const BlobClientOptions & client_options) -{ - return std::make_unique(BlobContainerClient::CreateFromConnectionString(connection_str, container_name, client_options)); -} - -template -std::unique_ptr getAzureBlobStorageClientWithAuth( - const String & url, - const String & container_name, - const Poco::Util::AbstractConfiguration & config, - const String & config_prefix, - const Azure::Storage::Blobs::BlobClientOptions & client_options) -{ - std::string connection_str; - if (config.has(config_prefix + ".connection_string")) - connection_str = config.getString(config_prefix + ".connection_string"); - - if (!connection_str.empty()) - return getClientWithConnectionString(connection_str, container_name, client_options); - - if (config.has(config_prefix + ".account_key") && config.has(config_prefix + ".account_name")) - { - auto storage_shared_key_credential = std::make_shared( - config.getString(config_prefix + ".account_name"), - config.getString(config_prefix + ".account_key") - ); - return std::make_unique(url, storage_shared_key_credential, client_options); - } - - if (config.getBool(config_prefix + ".use_workload_identity", false)) - { - auto workload_identity_credential = std::make_shared(); - return std::make_unique(url, workload_identity_credential, client_options); - } - - auto managed_identity_credential = std::make_shared(); - return std::make_unique(url, managed_identity_credential, client_options); -} - -Azure::Storage::Blobs::BlobClientOptions getAzureBlobClientOptions(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) -{ - Azure::Core::Http::Policies::RetryOptions retry_options; - retry_options.MaxRetries = config.getUInt(config_prefix + ".max_tries", 10); - retry_options.RetryDelay = std::chrono::milliseconds(config.getUInt(config_prefix + ".retry_initial_backoff_ms", 10)); - retry_options.MaxRetryDelay = std::chrono::milliseconds(config.getUInt(config_prefix + ".retry_max_backoff_ms", 1000)); - - using CurlOptions = Azure::Core::Http::CurlTransportOptions; - CurlOptions curl_options; - curl_options.NoSignal = true; - - if (config.has(config_prefix + ".curl_ip_resolve")) - { - auto value = config.getString(config_prefix + ".curl_ip_resolve"); - if (value == "ipv4") - curl_options.IPResolve = CurlOptions::CURL_IPRESOLVE_V4; - else if (value == "ipv6") - curl_options.IPResolve = CurlOptions::CURL_IPRESOLVE_V6; - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected value for option 'curl_ip_resolve': {}. Expected one of 'ipv4' or 'ipv6'", value); - } - - Azure::Storage::Blobs::BlobClientOptions client_options; - client_options.Retry = retry_options; - client_options.Transport.Transport = std::make_shared(curl_options); - - client_options.ClickhouseOptions = Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=true}; - - return client_options; -} - -std::unique_ptr getAzureBlobContainerClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) -{ - auto endpoint = processAzureBlobStorageEndpoint(config, config_prefix); - auto container_name = endpoint.container_name; - auto final_url = endpoint.getEndpoint(); - auto client_options = getAzureBlobClientOptions(config, config_prefix); - - if (endpoint.container_already_exists.value_or(false)) - return getAzureBlobStorageClientWithAuth(final_url, container_name, config, config_prefix, client_options); - - auto blob_service_client = getAzureBlobStorageClientWithAuth(endpoint.getEndpointWithoutContainer(), container_name, config, config_prefix, client_options); - - try - { - return std::make_unique(blob_service_client->CreateBlobContainer(container_name).Value); - } - catch (const Azure::Storage::StorageException & e) - { - /// If container_already_exists is not set (in config), ignore already exists error. - /// (Conflict - The specified container already exists) - if (!endpoint.container_already_exists.has_value() && e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict) - return getAzureBlobStorageClientWithAuth(final_url, container_name, config, config_prefix, client_options); - throw; - } -} - -std::unique_ptr getAzureBlobStorageSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) -{ - std::unique_ptr settings = std::make_unique(); - settings->max_single_part_upload_size = config.getUInt64(config_prefix + ".max_single_part_upload_size", context->getSettings().azure_max_single_part_upload_size); - settings->min_bytes_for_seek = config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024); - settings->max_single_read_retries = config.getInt(config_prefix + ".max_single_read_retries", 3); - settings->max_single_download_retries = config.getInt(config_prefix + ".max_single_download_retries", 3); - settings->list_object_keys_size = config.getInt(config_prefix + ".list_object_keys_size", 1000); - settings->min_upload_part_size = config.getUInt64(config_prefix + ".min_upload_part_size", context->getSettings().azure_min_upload_part_size); - settings->max_upload_part_size = config.getUInt64(config_prefix + ".max_upload_part_size", context->getSettings().azure_max_upload_part_size); - settings->max_single_part_copy_size = config.getUInt64(config_prefix + ".max_single_part_copy_size", context->getSettings().azure_max_single_part_copy_size); - settings->use_native_copy = config.getBool(config_prefix + ".use_native_copy", false); - settings->max_blocks_in_multipart_upload = config.getUInt64(config_prefix + ".max_blocks_in_multipart_upload", 50000); - settings->max_unexpected_write_error_retries = config.getUInt64(config_prefix + ".max_unexpected_write_error_retries", context->getSettings().azure_max_unexpected_write_error_retries); - settings->max_inflight_parts_for_one_file = config.getUInt64(config_prefix + ".max_inflight_parts_for_one_file", context->getSettings().azure_max_inflight_parts_for_one_file); - settings->strict_upload_part_size = config.getUInt64(config_prefix + ".strict_upload_part_size", context->getSettings().azure_strict_upload_part_size); - settings->upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".upload_part_size_multiply_factor", context->getSettings().azure_upload_part_size_multiply_factor); - settings->upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".upload_part_size_multiply_parts_count_threshold", context->getSettings().azure_upload_part_size_multiply_parts_count_threshold); - - return settings; -} - -} - -#endif diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h deleted file mode 100644 index e4775a053c1..00000000000 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h +++ /dev/null @@ -1,58 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AZURE_BLOB_STORAGE - -#include -#include - -namespace DB -{ - -struct AzureBlobStorageEndpoint -{ - const String storage_account_url; - const String account_name; - const String container_name; - const String prefix; - const std::optional container_already_exists; - - String getEndpoint() - { - String url = storage_account_url; - if (url.ends_with('/')) - url.pop_back(); - - if (!account_name.empty()) - url += "/" + account_name; - - if (!container_name.empty()) - url += "/" + container_name; - - if (!prefix.empty()) - url += "/" + prefix; - - return url; - } - - String getEndpointWithoutContainer() - { - String url = storage_account_url; - - if (!account_name.empty()) - url += "/" + account_name; - - return url; - } -}; - -std::unique_ptr getAzureBlobContainerClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); - -AzureBlobStorageEndpoint processAzureBlobStorageEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); - -std::unique_ptr getAzureBlobStorageSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); - -} - -#endif diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp new file mode 100644 index 00000000000..0aa3b9c40b5 --- /dev/null +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp @@ -0,0 +1,352 @@ +#include + +#if USE_AZURE_BLOB_STORAGE + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +namespace AzureBlobStorage +{ + +static void validateStorageAccountUrl(const String & storage_account_url) +{ + const auto * storage_account_url_pattern_str = R"(http(()|s)://[a-z0-9-.:]+(()|/)[a-z0-9]*(()|/))"; + static const RE2 storage_account_url_pattern(storage_account_url_pattern_str); + + if (!re2::RE2::FullMatch(storage_account_url, storage_account_url_pattern)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Blob Storage URL is not valid, should follow the format: {}, got: {}", storage_account_url_pattern_str, storage_account_url); +} + +static void validateContainerName(const String & container_name) +{ + auto len = container_name.length(); + if (len < 3 || len > 64) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "AzureBlob Storage container name is not valid, should have length between 3 and 64, but has length: {}", len); + + const auto * container_name_pattern_str = R"([a-z][a-z0-9-]+)"; + static const RE2 container_name_pattern(container_name_pattern_str); + + if (!re2::RE2::FullMatch(container_name, container_name_pattern)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "AzureBlob Storage container name is not valid, should follow the format: {}, got: {}", + container_name_pattern_str, container_name); +} + +static bool isConnectionString(const std::string & candidate) +{ + return !candidate.starts_with("http"); +} + +String ConnectionParams::getConnectionURL() const +{ + if (std::holds_alternative(auth_method)) + { + auto parsed_connection_string = Azure::Storage::_internal::ParseConnectionString(endpoint.storage_account_url); + return parsed_connection_string.BlobServiceUrl.GetAbsoluteUrl(); + } + + return endpoint.storage_account_url; +} + +std::unique_ptr ConnectionParams::createForService() const +{ + return std::visit([this](const T & auth) + { + if constexpr (std::is_same_v) + return std::make_unique(ServiceClient::CreateFromConnectionString(auth.toUnderType(), client_options)); + else + return std::make_unique(endpoint.getEndpointWithoutContainer(), auth, client_options); + }, auth_method); +} + +std::unique_ptr ConnectionParams::createForContainer() const +{ + return std::visit([this](const T & auth) + { + if constexpr (std::is_same_v) + return std::make_unique(ContainerClient::CreateFromConnectionString(auth.toUnderType(), endpoint.container_name, client_options)); + else + return std::make_unique(endpoint.getEndpoint(), auth, client_options); + }, auth_method); +} + +Endpoint processEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) +{ + String storage_url; + String account_name; + String container_name; + String prefix; + + auto get_container_name = [&] + { + if (config.has(config_prefix + ".container_name")) + return config.getString(config_prefix + ".container_name"); + + if (config.has(config_prefix + ".container")) + return config.getString(config_prefix + ".container"); + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected either `container` or `container_name` parameter in config"); + }; + + if (config.has(config_prefix + ".endpoint")) + { + String endpoint = config.getString(config_prefix + ".endpoint"); + + /// For some authentication methods account name is not present in the endpoint + /// 'endpoint_contains_account_name' bool is used to understand how to split the endpoint (default : true) + bool endpoint_contains_account_name = config.getBool(config_prefix + ".endpoint_contains_account_name", true); + + size_t pos = endpoint.find("//"); + if (pos == std::string::npos) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected '//' in endpoint"); + + if (endpoint_contains_account_name) + { + size_t acc_pos_begin = endpoint.find('/', pos + 2); + if (acc_pos_begin == std::string::npos) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected account_name in endpoint"); + + storage_url = endpoint.substr(0, acc_pos_begin); + size_t acc_pos_end = endpoint.find('/', acc_pos_begin + 1); + + if (acc_pos_end == std::string::npos) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected container_name in endpoint"); + + account_name = endpoint.substr(acc_pos_begin + 1, acc_pos_end - acc_pos_begin - 1); + + size_t cont_pos_end = endpoint.find('/', acc_pos_end + 1); + + if (cont_pos_end != std::string::npos) + { + container_name = endpoint.substr(acc_pos_end + 1, cont_pos_end - acc_pos_end - 1); + prefix = endpoint.substr(cont_pos_end + 1); + } + else + { + container_name = endpoint.substr(acc_pos_end + 1); + } + } + else + { + size_t cont_pos_begin = endpoint.find('/', pos + 2); + + if (cont_pos_begin == std::string::npos) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected container_name in endpoint"); + + storage_url = endpoint.substr(0, cont_pos_begin); + size_t cont_pos_end = endpoint.find('/', cont_pos_begin + 1); + + if (cont_pos_end != std::string::npos) + { + container_name = endpoint.substr(cont_pos_begin + 1,cont_pos_end - cont_pos_begin - 1); + prefix = endpoint.substr(cont_pos_end + 1); + } + else + { + container_name = endpoint.substr(cont_pos_begin + 1); + } + } + } + else if (config.has(config_prefix + ".connection_string")) + { + storage_url = config.getString(config_prefix + ".connection_string"); + container_name = get_container_name(); + } + else if (config.has(config_prefix + ".storage_account_url")) + { + storage_url = config.getString(config_prefix + ".storage_account_url"); + validateStorageAccountUrl(storage_url); + container_name = get_container_name(); + } + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected either `storage_account_url` or `connection_string` or `endpoint` in config"); + + if (!container_name.empty()) + validateContainerName(container_name); + + std::optional container_already_exists {}; + if (config.has(config_prefix + ".container_already_exists")) + container_already_exists = {config.getBool(config_prefix + ".container_already_exists")}; + + return {storage_url, account_name, container_name, prefix, "", container_already_exists}; +} + +void processURL(const String & url, const String & container_name, Endpoint & endpoint, AuthMethod & auth_method) +{ + endpoint.container_name = container_name; + + if (isConnectionString(url)) + { + endpoint.storage_account_url = url; + auth_method = ConnectionString{url}; + return; + } + + auto pos = url.find('?'); + + /// If conneciton_url does not have '?', then its not SAS + if (pos == std::string::npos) + { + endpoint.storage_account_url = url; + auth_method = std::make_shared(); + } + else + { + endpoint.storage_account_url = url.substr(0, pos); + endpoint.sas_auth = url.substr(pos + 1); + auth_method = std::make_shared(); + } +} + +std::unique_ptr getContainerClient(const ConnectionParams & params, bool readonly) +{ + if (params.endpoint.container_already_exists.value_or(false) || readonly) + return params.createForContainer(); + + try + { + auto service_client = params.createForService(); + return std::make_unique(service_client->CreateBlobContainer(params.endpoint.container_name).Value); + } + catch (const Azure::Storage::StorageException & e) + { + /// If container_already_exists is not set (in config), ignore already exists error. + /// (Conflict - The specified container already exists) + if (!params.endpoint.container_already_exists.has_value() && e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict) + return params.createForContainer(); + throw; + } +} + +AuthMethod getAuthMethod(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) +{ + if (config.has(config_prefix + ".account_key") && config.has(config_prefix + ".account_name")) + { + return std::make_shared( + config.getString(config_prefix + ".account_name"), + config.getString(config_prefix + ".account_key") + ); + } + + if (config.has(config_prefix + ".connection_string")) + return ConnectionString{config.getString(config_prefix + ".connection_string")}; + + if (config.getBool(config_prefix + ".use_workload_identity", false)) + return std::make_shared(); + + return std::make_shared(); +} + +BlobClientOptions getClientOptions(const RequestSettings & settings, bool for_disk) +{ + Azure::Core::Http::Policies::RetryOptions retry_options; + retry_options.MaxRetries = static_cast(settings.sdk_max_retries); + retry_options.RetryDelay = std::chrono::milliseconds(settings.sdk_retry_initial_backoff_ms); + retry_options.MaxRetryDelay = std::chrono::milliseconds(settings.sdk_retry_max_backoff_ms); + + Azure::Core::Http::CurlTransportOptions curl_options; + curl_options.NoSignal = true; + curl_options.IPResolve = settings.curl_ip_resolve; + + Azure::Storage::Blobs::BlobClientOptions client_options; + client_options.Retry = retry_options; + client_options.Transport.Transport = std::make_shared(curl_options); + client_options.ClickhouseOptions = Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=for_disk}; + + return client_options; +} + +std::unique_ptr getRequestSettings(const Settings & query_settings) +{ + auto settings = std::make_unique(); + + settings->max_single_part_upload_size = query_settings.azure_max_single_part_upload_size; + settings->max_single_read_retries = query_settings.azure_max_single_read_retries; + settings->max_single_download_retries = query_settings.azure_max_single_read_retries; + settings->list_object_keys_size = query_settings.azure_list_object_keys_size; + settings->min_upload_part_size = query_settings.azure_min_upload_part_size; + settings->max_upload_part_size = query_settings.azure_max_upload_part_size; + settings->max_single_part_copy_size = query_settings.azure_max_single_part_copy_size; + settings->max_blocks_in_multipart_upload = query_settings.azure_max_blocks_in_multipart_upload; + settings->max_unexpected_write_error_retries = query_settings.azure_max_unexpected_write_error_retries; + settings->max_inflight_parts_for_one_file = query_settings.azure_max_inflight_parts_for_one_file; + settings->strict_upload_part_size = query_settings.azure_strict_upload_part_size; + settings->upload_part_size_multiply_factor = query_settings.azure_upload_part_size_multiply_factor; + settings->upload_part_size_multiply_parts_count_threshold = query_settings.azure_upload_part_size_multiply_parts_count_threshold; + settings->sdk_max_retries = query_settings.azure_sdk_max_retries; + settings->sdk_retry_initial_backoff_ms = query_settings.azure_sdk_retry_initial_backoff_ms; + settings->sdk_retry_max_backoff_ms = query_settings.azure_sdk_retry_max_backoff_ms; + + return settings; +} + +std::unique_ptr getRequestSettingsForBackup(const Settings & query_settings, bool use_native_copy) +{ + auto settings = getRequestSettings(query_settings); + settings->use_native_copy = use_native_copy; + return settings; +} + +std::unique_ptr getRequestSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) +{ + auto settings = std::make_unique(); + const auto & settings_ref = context->getSettingsRef(); + + settings->min_bytes_for_seek = config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024); + settings->use_native_copy = config.getBool(config_prefix + ".use_native_copy", false); + + settings->max_single_part_upload_size = config.getUInt64(config_prefix + ".max_single_part_upload_size", settings_ref.azure_max_single_part_upload_size); + settings->max_single_read_retries = config.getUInt64(config_prefix + ".max_single_read_retries", settings_ref.azure_max_single_read_retries); + settings->max_single_download_retries = config.getUInt64(config_prefix + ".max_single_download_retries", settings_ref.azure_max_single_read_retries); + settings->list_object_keys_size = config.getUInt64(config_prefix + ".list_object_keys_size", settings_ref.azure_list_object_keys_size); + settings->min_upload_part_size = config.getUInt64(config_prefix + ".min_upload_part_size", settings_ref.azure_min_upload_part_size); + settings->max_upload_part_size = config.getUInt64(config_prefix + ".max_upload_part_size", settings_ref.azure_max_upload_part_size); + settings->max_single_part_copy_size = config.getUInt64(config_prefix + ".max_single_part_copy_size", settings_ref.azure_max_single_part_copy_size); + settings->max_blocks_in_multipart_upload = config.getUInt64(config_prefix + ".max_blocks_in_multipart_upload", settings_ref.azure_max_blocks_in_multipart_upload); + settings->max_unexpected_write_error_retries = config.getUInt64(config_prefix + ".max_unexpected_write_error_retries", settings_ref.azure_max_unexpected_write_error_retries); + settings->max_inflight_parts_for_one_file = config.getUInt64(config_prefix + ".max_inflight_parts_for_one_file", settings_ref.azure_max_inflight_parts_for_one_file); + settings->strict_upload_part_size = config.getUInt64(config_prefix + ".strict_upload_part_size", settings_ref.azure_strict_upload_part_size); + settings->upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".upload_part_size_multiply_factor", settings_ref.azure_upload_part_size_multiply_factor); + settings->upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".upload_part_size_multiply_parts_count_threshold", settings_ref.azure_upload_part_size_multiply_parts_count_threshold); + + settings->sdk_max_retries = config.getUInt64(config_prefix + ".max_tries", settings_ref.azure_sdk_max_retries); + settings->sdk_retry_initial_backoff_ms = config.getUInt64(config_prefix + ".retry_initial_backoff_ms", settings_ref.azure_sdk_retry_initial_backoff_ms); + settings->sdk_retry_max_backoff_ms = config.getUInt64(config_prefix + ".retry_max_backoff_ms", settings_ref.azure_sdk_retry_max_backoff_ms); + + if (config.has(config_prefix + ".curl_ip_resolve")) + { + using CurlOptions = Azure::Core::Http::CurlTransportOptions; + + auto value = config.getString(config_prefix + ".curl_ip_resolve"); + if (value == "ipv4") + settings->curl_ip_resolve = CurlOptions::CURL_IPRESOLVE_V4; + else if (value == "ipv6") + settings->curl_ip_resolve = CurlOptions::CURL_IPRESOLVE_V6; + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected value for option 'curl_ip_resolve': {}. Expected one of 'ipv4' or 'ipv6'", value); + } + + return settings; +} + +} + +} + +#endif diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h new file mode 100644 index 00000000000..bb2f0270924 --- /dev/null +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h @@ -0,0 +1,139 @@ +#pragma once +#include "config.h" + +#if USE_AZURE_BLOB_STORAGE + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace DB +{ + +struct Settings; + +namespace AzureBlobStorage +{ + +using ServiceClient = Azure::Storage::Blobs::BlobServiceClient; +using ContainerClient = Azure::Storage::Blobs::BlobContainerClient; +using BlobClient = Azure::Storage::Blobs::BlobClient; +using BlobClientOptions = Azure::Storage::Blobs::BlobClientOptions; + +struct RequestSettings +{ + RequestSettings() = default; + + size_t max_single_part_upload_size = 100 * 1024 * 1024; /// NOTE: on 32-bit machines it will be at most 4GB, but size_t is also used in BufferBase for offset + size_t min_bytes_for_seek = 1024 * 1024; + size_t max_single_read_retries = 3; + size_t max_single_download_retries = 3; + size_t list_object_keys_size = 1000; + size_t min_upload_part_size = 16 * 1024 * 1024; + size_t max_upload_part_size = 5ULL * 1024 * 1024 * 1024; + size_t max_single_part_copy_size = 256 * 1024 * 1024; + size_t max_unexpected_write_error_retries = 4; + size_t max_inflight_parts_for_one_file = 20; + size_t max_blocks_in_multipart_upload = 50000; + size_t strict_upload_part_size = 0; + size_t upload_part_size_multiply_factor = 2; + size_t upload_part_size_multiply_parts_count_threshold = 500; + size_t sdk_max_retries = 10; + size_t sdk_retry_initial_backoff_ms = 10; + size_t sdk_retry_max_backoff_ms = 1000; + bool use_native_copy = false; + + using CurlOptions = Azure::Core::Http::CurlTransportOptions; + CurlOptions::CurlOptIPResolve curl_ip_resolve = CurlOptions::CURL_IPRESOLVE_WHATEVER; +}; + +struct Endpoint +{ + String storage_account_url; + String account_name; + String container_name; + String prefix; + String sas_auth; + std::optional container_already_exists; + + String getEndpoint() const + { + String url = storage_account_url; + if (url.ends_with('/')) + url.pop_back(); + + if (!account_name.empty()) + url += "/" + account_name; + + if (!container_name.empty()) + url += "/" + container_name; + + if (!prefix.empty()) + url += "/" + prefix; + + if (!sas_auth.empty()) + url += "?" + sas_auth; + + return url; + } + + String getEndpointWithoutContainer() const + { + String url = storage_account_url; + + if (!account_name.empty()) + url += "/" + account_name; + + if (!sas_auth.empty()) + url += "?" + sas_auth; + + return url; + } +}; + +using ConnectionString = StrongTypedef; + +using AuthMethod = std::variant< + ConnectionString, + std::shared_ptr, + std::shared_ptr, + std::shared_ptr>; + +struct ConnectionParams +{ + Endpoint endpoint; + AuthMethod auth_method; + BlobClientOptions client_options; + + String getContainer() const { return endpoint.container_name; } + String getConnectionURL() const; + + std::unique_ptr createForService() const; + std::unique_ptr createForContainer() const; +}; + +Endpoint processEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); +void processURL(const String & url, const String & container_name, Endpoint & endpoint, AuthMethod & auth_method); + +std::unique_ptr getContainerClient(const ConnectionParams & params, bool readonly); + +BlobClientOptions getClientOptions(const RequestSettings & settings, bool for_disk); +AuthMethod getAuthMethod(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); + +std::unique_ptr getRequestSettings(const Settings & query_settings); +std::unique_ptr getRequestSettingsForBackup(const Settings & query_settings, bool use_native_copy); +std::unique_ptr getRequestSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); + +} + +} + +#endif diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index 0ebe885a3e7..bc16955143b 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include #include @@ -104,7 +104,7 @@ private: AzureObjectStorage::AzureObjectStorage( const String & name_, - AzureClientPtr && client_, + ClientPtr && client_, SettingsPtr && settings_, const String & object_namespace_, const String & description_) @@ -397,24 +397,49 @@ void AzureObjectStorage::copyObject( /// NOLINT } void AzureObjectStorage::applyNewSettings( - const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, - ContextPtr context, const ApplyNewSettingsOptions &) + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + ContextPtr context, + const ApplyNewSettingsOptions & options) { - auto new_settings = getAzureBlobStorageSettings(config, config_prefix, context); + auto new_settings = AzureBlobStorage::getRequestSettings(config, config_prefix, context); settings.set(std::move(new_settings)); - /// We don't update client + + if (!options.allow_client_change) + return; + + bool is_client_for_disk = client.get()->GetClickhouseOptions().IsClientForDisk; + + AzureBlobStorage::ConnectionParams params + { + .endpoint = AzureBlobStorage::processEndpoint(config, config_prefix), + .auth_method = AzureBlobStorage::getAuthMethod(config, config_prefix), + .client_options = AzureBlobStorage::getClientOptions(*settings.get(), is_client_for_disk), + }; + + auto new_client = AzureBlobStorage::getContainerClient(params, /*readonly=*/ true); + client.set(std::move(new_client)); } -std::unique_ptr AzureObjectStorage::cloneObjectStorage(const std::string &, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) +std::unique_ptr AzureObjectStorage::cloneObjectStorage( + const std::string & new_namespace, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + ContextPtr context) { - return std::make_unique( - name, - getAzureBlobContainerClient(config, config_prefix), - getAzureBlobStorageSettings(config, config_prefix, context), - object_namespace, - description - ); + auto new_settings = AzureBlobStorage::getRequestSettings(config, config_prefix, context); + bool is_client_for_disk = client.get()->GetClickhouseOptions().IsClientForDisk; + + AzureBlobStorage::ConnectionParams params + { + .endpoint = AzureBlobStorage::processEndpoint(config, config_prefix), + .auth_method = AzureBlobStorage::getAuthMethod(config, config_prefix), + .client_options = AzureBlobStorage::getClientOptions(*new_settings, is_client_for_disk), + }; + + auto new_client = AzureBlobStorage::getContainerClient(params, /*readonly=*/ true); + return std::make_unique(name, std::move(new_client), std::move(new_settings), new_namespace, params.endpoint.getEndpointWithoutContainer()); } } diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index c342929d656..2c7ce5e18dc 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -7,6 +7,8 @@ #include #include #include +#include +#include namespace Poco { @@ -16,71 +18,15 @@ class Logger; namespace DB { -struct AzureObjectStorageSettings -{ - AzureObjectStorageSettings( - uint64_t max_single_part_upload_size_, - uint64_t min_bytes_for_seek_, - int max_single_read_retries_, - int max_single_download_retries_, - int list_object_keys_size_, - size_t min_upload_part_size_, - size_t max_upload_part_size_, - size_t max_single_part_copy_size_, - bool use_native_copy_, - size_t max_unexpected_write_error_retries_, - size_t max_inflight_parts_for_one_file_, - size_t strict_upload_part_size_, - size_t upload_part_size_multiply_factor_, - size_t upload_part_size_multiply_parts_count_threshold_) - : max_single_part_upload_size(max_single_part_upload_size_) - , min_bytes_for_seek(min_bytes_for_seek_) - , max_single_read_retries(max_single_read_retries_) - , max_single_download_retries(max_single_download_retries_) - , list_object_keys_size(list_object_keys_size_) - , min_upload_part_size(min_upload_part_size_) - , max_upload_part_size(max_upload_part_size_) - , max_single_part_copy_size(max_single_part_copy_size_) - , use_native_copy(use_native_copy_) - , max_unexpected_write_error_retries(max_unexpected_write_error_retries_) - , max_inflight_parts_for_one_file(max_inflight_parts_for_one_file_) - , strict_upload_part_size(strict_upload_part_size_) - , upload_part_size_multiply_factor(upload_part_size_multiply_factor_) - , upload_part_size_multiply_parts_count_threshold(upload_part_size_multiply_parts_count_threshold_) - { - } - - AzureObjectStorageSettings() = default; - - size_t max_single_part_upload_size = 100 * 1024 * 1024; /// NOTE: on 32-bit machines it will be at most 4GB, but size_t is also used in BufferBase for offset - uint64_t min_bytes_for_seek = 1024 * 1024; - size_t max_single_read_retries = 3; - size_t max_single_download_retries = 3; - int list_object_keys_size = 1000; - size_t min_upload_part_size = 16 * 1024 * 1024; - size_t max_upload_part_size = 5ULL * 1024 * 1024 * 1024; - size_t max_single_part_copy_size = 256 * 1024 * 1024; - bool use_native_copy = false; - size_t max_unexpected_write_error_retries = 4; - size_t max_inflight_parts_for_one_file = 20; - size_t max_blocks_in_multipart_upload = 50000; - size_t strict_upload_part_size = 0; - size_t upload_part_size_multiply_factor = 2; - size_t upload_part_size_multiply_parts_count_threshold = 500; -}; - -using AzureClient = Azure::Storage::Blobs::BlobContainerClient; -using AzureClientPtr = std::unique_ptr; - class AzureObjectStorage : public IObjectStorage { public: - - using SettingsPtr = std::unique_ptr; + using ClientPtr = std::unique_ptr; + using SettingsPtr = std::unique_ptr; AzureObjectStorage( const String & name_, - AzureClientPtr && client_, + ClientPtr && client_, SettingsPtr && settings_, const String & object_namespace_, const String & description_); @@ -159,12 +105,8 @@ public: bool isRemote() const override { return true; } - std::shared_ptr getSettings() { return settings.get(); } - - std::shared_ptr getAzureBlobStorageClient() override - { - return client.get(); - } + std::shared_ptr getSettings() const { return settings.get(); } + std::shared_ptr getAzureBlobStorageClient() const override { return client.get(); } bool supportParallelWrite() const override { return true; } @@ -174,8 +116,8 @@ private: const String name; /// client used to access the files in the Blob Storage cloud - MultiVersion client; - MultiVersion settings; + MultiVersion client; + MultiVersion settings; const String object_namespace; /// container + prefix /// We use source url without container and prefix as description, because in Azure there are no limitations for operations between different containers. diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h index 727dbeed853..93ef2659cbb 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h @@ -127,7 +127,7 @@ public: const FileCacheSettings & getCacheSettings() const { return cache_settings; } #if USE_AZURE_BLOB_STORAGE - std::shared_ptr getAzureBlobStorageClient() override + std::shared_ptr getAzureBlobStorageClient() const override { return object_storage->getAzureBlobStorageClient(); } diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index 59cc82d8c81..5c45a258806 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -195,7 +195,6 @@ public: /// DiskObjectStorage(CachedObjectStorage(CachedObjectStorage(S3ObjectStorage))) String getStructure() const { return fmt::format("DiskObjectStorage-{}({})", getName(), object_storage->getName()); } -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD /// Add a cache layer. /// Example: DiskObjectStorage(S3ObjectStorage) -> DiskObjectStorage(CachedObjectStorage(S3ObjectStorage)) /// There can be any number of cache layers: @@ -204,7 +203,6 @@ public: /// Get names of all cache layers. Name is how cache is defined in configuration file. NameSet getCacheLayersNames() const override; -#endif bool supportsStat() const override { return metadata_storage->supportsStat(); } struct stat stat(const String & path) const override; diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp index 44854633d65..56d5d11ef8a 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp @@ -222,11 +222,7 @@ ObjectKeyWithMetadata DiskObjectStorageMetadata::popLastObject() bool DiskObjectStorageMetadata::getWriteFullObjectKeySetting() { -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD return Context::getGlobalContextInstance()->getServerSettings().storage_metadata_write_full_object_key; -#else - return false; -#endif } } diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index 9f5c14fdb7c..ceea4d5a2bb 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -75,6 +75,7 @@ struct RelativePathWithMetadata virtual std::string getPath() const { return relative_path; } virtual bool isArchive() const { return false; } virtual std::string getPathToArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); } + virtual size_t fileSizeInArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); } }; struct ObjectKeyWithMetadata @@ -260,7 +261,7 @@ public: virtual void setKeysGenerator(ObjectStorageKeysGeneratorPtr) { } #if USE_AZURE_BLOB_STORAGE - virtual std::shared_ptr getAzureBlobStorageClient() + virtual std::shared_ptr getAzureBlobStorageClient() const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "This function is only implemented for AzureBlobStorage"); } diff --git a/src/Disks/ObjectStorages/MetadataOperationsHolder.h b/src/Disks/ObjectStorages/MetadataOperationsHolder.h index 8997f40b9a2..a042f4bd8b9 100644 --- a/src/Disks/ObjectStorages/MetadataOperationsHolder.h +++ b/src/Disks/ObjectStorages/MetadataOperationsHolder.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include diff --git a/src/Disks/ObjectStorages/MetadataStorageFactory.cpp b/src/Disks/ObjectStorages/MetadataStorageFactory.cpp index ab7c2069b43..a690ecd2757 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFactory.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFactory.cpp @@ -2,9 +2,7 @@ #include #include #include -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD #include -#endif #include #include @@ -135,7 +133,6 @@ void registerPlainRewritableMetadataStorage(MetadataStorageFactory & factory) }); } -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD void registerMetadataStorageFromStaticFilesWebServer(MetadataStorageFactory & factory) { factory.registerMetadataStorageType("web", []( @@ -147,7 +144,6 @@ void registerMetadataStorageFromStaticFilesWebServer(MetadataStorageFactory & fa return std::make_shared(assert_cast(*object_storage)); }); } -#endif void registerMetadataStorages() { @@ -155,9 +151,7 @@ void registerMetadataStorages() registerMetadataStorageFromDisk(factory); registerPlainMetadataStorage(factory); registerPlainRewritableMetadataStorage(factory); -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD registerMetadataStorageFromStaticFilesWebServer(factory); -#endif } } diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index 1bf8250adff..d42da6dc9c4 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -7,25 +7,24 @@ #include #include #endif -#if USE_HDFS && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_HDFS #include #include #endif -#if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_AZURE_BLOB_STORAGE #include -#include +#include #endif -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD #include #include #include -#endif #include #include #include #include #include #include +#include #include @@ -284,7 +283,7 @@ void registerS3PlainRewritableObjectStorage(ObjectStorageFactory & factory) #endif -#if USE_HDFS && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_HDFS void registerHDFSObjectStorage(ObjectStorageFactory & factory) { factory.registerObjectStorageType( @@ -309,7 +308,7 @@ void registerHDFSObjectStorage(ObjectStorageFactory & factory) } #endif -#if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_AZURE_BLOB_STORAGE void registerAzureObjectStorage(ObjectStorageFactory & factory) { auto creator = []( @@ -319,21 +318,27 @@ void registerAzureObjectStorage(ObjectStorageFactory & factory) const ContextPtr & context, bool /* skip_access_check */) -> ObjectStoragePtr { - AzureBlobStorageEndpoint endpoint = processAzureBlobStorageEndpoint(config, config_prefix); + auto azure_settings = AzureBlobStorage::getRequestSettings(config, config_prefix, context); + + AzureBlobStorage::ConnectionParams params + { + .endpoint = AzureBlobStorage::processEndpoint(config, config_prefix), + .auth_method = AzureBlobStorage::getAuthMethod(config, config_prefix), + .client_options = AzureBlobStorage::getClientOptions(*azure_settings, /*for_disk=*/ true), + }; return createObjectStorage( ObjectStorageType::Azure, config, config_prefix, name, - getAzureBlobContainerClient(config, config_prefix), - getAzureBlobStorageSettings(config, config_prefix, context), - endpoint.prefix.empty() ? endpoint.container_name : endpoint.container_name + "/" + endpoint.prefix, - endpoint.getEndpointWithoutContainer()); + AzureBlobStorage::getContainerClient(params, /*readonly=*/ false), std::move(azure_settings), + params.endpoint.prefix.empty() ? params.endpoint.container_name : params.endpoint.container_name + "/" + params.endpoint.prefix, + params.endpoint.getEndpointWithoutContainer()); }; + factory.registerObjectStorageType("azure_blob_storage", creator); factory.registerObjectStorageType("azure", creator); } #endif -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD void registerWebObjectStorage(ObjectStorageFactory & factory) { factory.registerObjectStorageType("web", []( @@ -381,7 +386,6 @@ void registerLocalObjectStorage(ObjectStorageFactory & factory) factory.registerObjectStorageType("local_blob_storage", creator); factory.registerObjectStorageType("local", creator); } -#endif void registerObjectStorages() { @@ -393,18 +397,16 @@ void registerObjectStorages() registerS3PlainRewritableObjectStorage(factory); #endif -#if USE_HDFS && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_HDFS registerHDFSObjectStorage(factory); #endif -#if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_AZURE_BLOB_STORAGE registerAzureObjectStorage(factory); #endif -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD registerWebObjectStorage(factory); registerLocalObjectStorage(factory); -#endif } } diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index a6781e0ab35..a6672e14e10 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 62df98f51e6..de78ce4594d 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -1,14 +1,14 @@ #include -#include -#include #if USE_AWS_S3 +#include #include #include #include #include #include +#include #include #include #include @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp index e837e056acc..7f7a3fe1a62 100644 --- a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp @@ -3,6 +3,8 @@ #include #include +#include + #include #include #include diff --git a/src/Disks/ObjectStorages/createMetadataStorageMetrics.h b/src/Disks/ObjectStorages/createMetadataStorageMetrics.h index 6dddc227ade..5cf1fbef2ab 100644 --- a/src/Disks/ObjectStorages/createMetadataStorageMetrics.h +++ b/src/Disks/ObjectStorages/createMetadataStorageMetrics.h @@ -1,14 +1,14 @@ #pragma once +#include "config.h" + #if USE_AWS_S3 # include #endif -#if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_AZURE_BLOB_STORAGE # include #endif -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD -# include -#endif +#include #include namespace ProfileEvents @@ -42,7 +42,7 @@ inline MetadataStorageMetrics MetadataStorageMetrics::create inline MetadataStorageMetrics MetadataStorageMetrics::create() { @@ -53,7 +53,6 @@ inline MetadataStorageMetrics MetadataStorageMetrics::create inline MetadataStorageMetrics MetadataStorageMetrics::create() { @@ -62,6 +61,5 @@ inline MetadataStorageMetrics MetadataStorageMetrics::create #include #include +#include #include @@ -175,7 +176,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.parquet.write_page_index = settings.output_format_parquet_write_page_index; format_settings.parquet.local_read_min_bytes_for_seek = settings.input_format_parquet_local_file_min_bytes_for_seek; format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8; - format_settings.pretty.color = settings.output_format_pretty_color; + format_settings.pretty.color = settings.output_format_pretty_color.valueOr(2); format_settings.pretty.max_column_pad_width = settings.output_format_pretty_max_column_pad_width; format_settings.pretty.max_rows = settings.output_format_pretty_max_rows; format_settings.pretty.max_value_width = settings.output_format_pretty_max_value_width; @@ -243,6 +244,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.orc.output_row_index_stride = settings.output_format_orc_row_index_stride; format_settings.orc.use_fast_decoder = settings.input_format_orc_use_fast_decoder; format_settings.orc.filter_push_down = settings.input_format_orc_filter_push_down; + format_settings.orc.read_use_writer_time_zone = settings.input_format_orc_read_use_writer_time_zone; format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields; format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode; format_settings.capn_proto.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference; @@ -269,9 +271,13 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.markdown.escape_special_characters = settings.output_format_markdown_escape_special_characters; format_settings.bson.output_string_as_string = settings.output_format_bson_string_as_string; format_settings.bson.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_bson_skip_fields_with_unsupported_types_in_schema_inference; - format_settings.max_binary_string_size = settings.format_binary_max_string_size; - format_settings.max_binary_array_size = settings.format_binary_max_array_size; + format_settings.binary.max_binary_string_size = settings.format_binary_max_string_size; + format_settings.binary.max_binary_array_size = settings.format_binary_max_array_size; + format_settings.binary.encode_types_in_binary_format = settings.output_format_binary_encode_types_in_binary_format; + format_settings.binary.decode_types_in_binary_format = settings.input_format_binary_decode_types_in_binary_format; format_settings.native.allow_types_conversion = settings.input_format_native_allow_types_conversion; + format_settings.native.encode_types_in_binary_format = settings.output_format_native_encode_types_in_binary_format; + format_settings.native.decode_types_in_binary_format = settings.input_format_native_decode_types_in_binary_format; format_settings.max_parser_depth = context->getSettingsRef().max_parser_depth; format_settings.client_protocol_version = context->getClientProtocolVersion(); format_settings.date_time_overflow_behavior = settings.date_time_overflow_behavior; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 8ac783a1d86..a736bb1a558 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -1,10 +1,9 @@ #pragma once -#include #include +#include #include #include -#include namespace DB { @@ -106,8 +105,6 @@ struct FormatSettings UInt64 input_allow_errors_num = 0; Float32 input_allow_errors_ratio = 0; - UInt64 max_binary_string_size = 1_GiB; - UInt64 max_binary_array_size = 1_GiB; UInt64 client_protocol_version = 0; UInt64 max_parser_depth = DBMS_DEFAULT_MAX_PARSER_DEPTH; @@ -121,6 +118,14 @@ struct FormatSettings ZSTD }; + struct + { + UInt64 max_binary_string_size = 1_GiB; + UInt64 max_binary_array_size = 1_GiB; + bool encode_types_in_binary_format = false; + bool decode_types_in_binary_format = false; + } binary{}; + struct { UInt64 row_group_size = 1000000; @@ -287,7 +292,8 @@ struct FormatSettings UInt64 max_value_width = 10000; UInt64 max_value_width_apply_for_single_value = false; bool highlight_digit_groups = true; - SettingFieldUInt64Auto color{"auto"}; + /// Set to 2 for auto + UInt64 color = 2; bool output_format_pretty_row_numbers = false; UInt64 output_format_pretty_single_large_number_tip_threshold = 1'000'000; @@ -403,6 +409,7 @@ struct FormatSettings bool use_fast_decoder = true; bool filter_push_down = true; UInt64 output_row_index_stride = 10'000; + bool read_use_writer_time_zone = false; } orc{}; /// For capnProto format we should determine how to @@ -458,6 +465,8 @@ struct FormatSettings struct { bool allow_types_conversion = true; + bool encode_types_in_binary_format = false; + bool decode_types_in_binary_format = false; } native{}; struct diff --git a/src/Formats/JSONExtractTree.cpp b/src/Formats/JSONExtractTree.cpp new file mode 100644 index 00000000000..242d2dc9f80 --- /dev/null +++ b/src/Formats/JSONExtractTree.cpp @@ -0,0 +1,1660 @@ +#include +#include + +#include +#if USE_SIMDJSON +#include +#endif +#if USE_RAPIDJSON +#include +#endif + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +template +void jsonElementToString(const typename JSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings) +{ + if (element.isInt64()) + { + writeIntText(element.getInt64(), buf); + return; + } + if (element.isUInt64()) + { + writeIntText(element.getUInt64(), buf); + return; + } + if (element.isDouble()) + { + writeFloatText(element.getDouble(), buf); + return; + } + if (element.isBool()) + { + if (element.getBool()) + writeCString("true", buf); + else + writeCString("false", buf); + return; + } + if (element.isString()) + { + writeJSONString(element.getString(), buf, format_settings); + return; + } + if (element.isArray()) + { + writeChar('[', buf); + bool need_comma = false; + for (auto value : element.getArray()) + { + if (std::exchange(need_comma, true)) + writeChar(',', buf); + jsonElementToString(value, buf, format_settings); + } + writeChar(']', buf); + return; + } + if (element.isObject()) + { + writeChar('{', buf); + bool need_comma = false; + for (auto [key, value] : element.getObject()) + { + if (std::exchange(need_comma, true)) + writeChar(',', buf); + writeJSONString(key, buf, format_settings); + writeChar(':', buf); + jsonElementToString(value, buf, format_settings); + } + writeChar('}', buf); + return; + } + if (element.isNull()) + { + writeCString("null", buf); + return; + } +} + +template +bool tryGetNumericValueFromJSONElement( + NumberType & value, const typename JSONParser::Element & element, bool convert_bool_to_integer, String & error) +{ + switch (element.type()) + { + case ElementType::DOUBLE: + if constexpr (std::is_floating_point_v) + { + /// We permit inaccurate conversion of double to float. + /// Example: double 0.1 from JSON is not representable in float. + /// But it will be more convenient for user to perform conversion. + value = static_cast(element.getDouble()); + } + else if (!accurate::convertNumeric(element.getDouble(), value)) + { + error = fmt::format("cannot convert double value {} to {}", element.getDouble(), TypeName); + return false; + } + break; + case ElementType::UINT64: + if (!accurate::convertNumeric(element.getUInt64(), value)) + { + error = fmt::format("cannot convert UInt64 value {} to {}", element.getUInt64(), TypeName); + return false; + } + break; + case ElementType::INT64: + if (!accurate::convertNumeric(element.getInt64(), value)) + { + error = fmt::format("cannot convert Int64 value {} to {}", element.getInt64(), TypeName); + return false; + } + break; + case ElementType::BOOL: + if constexpr (is_integer) + { + if (convert_bool_to_integer) + { + value = static_cast(element.getBool()); + break; + } + } + error = fmt::format("cannot convert bool value to {}", TypeName); + return false; + case ElementType::STRING: { + auto rb = ReadBufferFromMemory{element.getString()}; + if constexpr (std::is_floating_point_v) + { + if (!tryReadFloatText(value, rb) || !rb.eof()) + { + error = fmt::format("cannot parse {} value here: {}", TypeName, element.getString()); + return false; + } + } + else + { + if (tryReadIntText(value, rb) && rb.eof()) + break; + + /// Try to parse float and convert it to integer. + Float64 tmp_float; + rb.position() = rb.buffer().begin(); + if (!tryReadFloatText(tmp_float, rb) || !rb.eof()) + { + error = fmt::format("cannot parse {} value here: {}", TypeName, element.getString()); + return false; + } + + if (!accurate::convertNumeric(tmp_float, value)) + { + error = fmt::format("cannot parse {} value here: {}", TypeName, element.getString()); + return false; + } + } + break; + } + default: + return false; + } + + return true; +} + +namespace +{ + +template +String jsonElementToString(const typename JSONParser::Element & element, const FormatSettings & format_settings) +{ + WriteBufferFromOwnString buf; + jsonElementToString(element, buf, format_settings); + return buf.str(); +} + +template +class NumericNode : public JSONExtractTreeNode +{ +public: + explicit NumericNode(bool is_bool_type_ = false) : is_bool_type(is_bool_type_) { } + + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull()) + { + if (format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + error = fmt::format("cannot parse {} value from null", TypeName); + return false; + } + + NumberType value; + if (!tryGetNumericValueFromJSONElement(value, element, insert_settings.convert_bool_to_integer || is_bool_type, error)) + { + if (error.empty()) + error = fmt::format("cannot read {} value from JSON element: {}", TypeName, jsonElementToString(element, format_settings)); + return false; + } + + if (is_bool_type) + value = static_cast(value); + + auto & col_vec = assert_cast &>(column); + col_vec.insertValue(value); + return true; + } + +protected: + bool is_bool_type; +}; + +template +class LowCardinalityNumericNode : public NumericNode +{ +public: + explicit LowCardinalityNumericNode(bool is_nullable_, bool is_bool_type_ = false) + : NumericNode(is_bool_type_), is_nullable(is_nullable_) + { + } + + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull()) + { + if (is_nullable || format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + error = fmt::format("cannot parse {} value from null", TypeName); + return false; + } + + NumberType value; + if (!tryGetNumericValueFromJSONElement(value, element, insert_settings.convert_bool_to_integer || this->is_bool_type, error)) + { + if (error.empty()) + error = fmt::format("cannot read {} value from JSON element: {}", TypeName, jsonElementToString(element, format_settings)); + return false; + } + + if (this->is_bool_type) + value = static_cast(value); + + auto & col_lc = assert_cast(column); + col_lc.insertData(reinterpret_cast(&value), sizeof(value)); + return true; + } + +private: + bool is_nullable; +}; + +template +class StringNode : public JSONExtractTreeNode +{ +public: + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull()) + { + if (format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + error = "cannot parse String value from null"; + return false; + } + + if (!element.isString()) + { + auto & col_str = assert_cast(column); + auto & chars = col_str.getChars(); + WriteBufferFromVector buf(chars, AppendModeTag()); + jsonElementToString(element, buf, format_settings); + buf.finalize(); + chars.push_back(0); + col_str.getOffsets().push_back(chars.size()); + } + else + { + auto value = element.getString(); + auto & col_str = assert_cast(column); + col_str.insertData(value.data(), value.size()); + } + return true; + } +}; + +template +class LowCardinalityStringNode : public JSONExtractTreeNode +{ +public: + explicit LowCardinalityStringNode(bool is_nullable_) : is_nullable(is_nullable_) { } + + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull()) + { + if (is_nullable || format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + error = "cannot parse String value from null"; + return false; + } + + if (!element.isString()) + { + auto value = jsonElementToString(element, format_settings); + assert_cast(column).insertData(value.data(), value.size()); + } + else + { + auto value = element.getString(); + assert_cast(column).insertData(value.data(), value.size()); + } + + return true; + } + +private: + bool is_nullable; +}; + +template +class FixedStringNode : public JSONExtractTreeNode +{ +public: + explicit FixedStringNode(size_t fixed_length_) : fixed_length(fixed_length_) { } + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull()) + { + if (format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + error = "cannot parse FixedString value from null"; + return false; + } + + if (!element.isString()) + return checkValueSizeAndInsert(column, jsonElementToString(element, format_settings), error); + return checkValueSizeAndInsert(column, element.getString(), error); + } + +private: + template + bool checkValueSizeAndInsert(IColumn & column, const T & value, String & error) const + { + if (value.size() > fixed_length) + { + error = fmt::format("too large string for FixedString({}): {}", fixed_length, value); + return false; + } + assert_cast(column).insertData(value.data(), value.size()); + return true; + } + + size_t fixed_length; +}; + +template +class LowCardinalityFixedStringNode : public JSONExtractTreeNode +{ +public: + explicit LowCardinalityFixedStringNode(bool is_nullable_, size_t fixed_length_) : is_nullable(is_nullable_), fixed_length(fixed_length_) + { + } + + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull()) + { + if (is_nullable || format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + error = "cannot parse FixedString value from null"; + return false; + } + + if (!element.isString()) + return checkValueSizeAndInsert(column, jsonElementToString(element, format_settings), error); + return checkValueSizeAndInsert(column, element.getString(), error); + } + +private: + template + bool checkValueSizeAndInsert(IColumn & column, const T & value, String & error) const + { + if (value.size() > fixed_length) + { + error = fmt::format("too large string for FixedString({}): {}", fixed_length, value); + return false; + } + + // For the non low cardinality case of FixedString, the padding is done in the FixedString Column implementation. + // In order to avoid having to pass the data to a FixedString Column and read it back (which would slow down the execution) + // the data is padded here and written directly to the Low Cardinality Column + if (value.size() == fixed_length) + { + assert_cast(column).insertData(value.data(), value.size()); + } + else + { + String padded_value(value); + padded_value.resize(fixed_length, '\0'); + assert_cast(column).insertData(padded_value.data(), padded_value.size()); + } + return true; + } + + bool is_nullable; + size_t fixed_length; +}; + +template +class UUIDNode : public JSONExtractTreeNode +{ +public: + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull() && format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + if (!element.isString()) + { + error = fmt::format("cannot read UUID value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + auto data = element.getString(); + UUID uuid; + if (!tryParse(uuid, data)) + { + error = fmt::format("cannot parse UUID value here: {}", data); + return false; + } + + assert_cast(column).insert(uuid); + return true; + } + + + static bool tryParse(UUID & uuid, std::string_view data) + { + ReadBufferFromMemory buf(data.data(), data.size()); + return tryReadUUIDText(uuid, buf) && buf.eof(); + } +}; + +template +class LowCardinalityUUIDNode : public JSONExtractTreeNode +{ +public: + explicit LowCardinalityUUIDNode(bool is_nullable_) : is_nullable(is_nullable_) { } + + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull() && (is_nullable || format_settings.null_as_default)) + { + column.insertDefault(); + return true; + } + + if (!element.isString()) + { + error = fmt::format("cannot read UUID value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + auto data = element.getString(); + ReadBufferFromMemory buf(data.data(), data.size()); + UUID uuid; + if (!tryReadUUIDText(uuid, buf) || !buf.eof()) + { + error = fmt::format("cannot parse UUID value here: {}", data); + return false; + } + assert_cast(column).insertData(reinterpret_cast(&uuid), sizeof(uuid)); + return true; + } + +private: + bool is_nullable; +}; + +template +class DateNode : public JSONExtractTreeNode +{ +public: + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull() && format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + if (!element.isString()) + { + error = fmt::format("cannot read Date value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + auto data = element.getString(); + ReadBufferFromMemory buf(data.data(), data.size()); + DateType date; + if (!tryReadDateText(date, buf) || !buf.eof()) + { + error = fmt::format("cannot parse Date value here: {}", data); + return false; + } + + assert_cast &>(column).insertValue(date); + return true; + } +}; + +template +class DateTimeNode : public JSONExtractTreeNode, public TimezoneMixin +{ +public: + explicit DateTimeNode(const DataTypeDateTime & datetime_type) : TimezoneMixin(datetime_type) { } + + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull() && format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + time_t value; + if (element.isString()) + { + if (!tryParse(value, element.getString(), format_settings.date_time_input_format)) + { + error = fmt::format("cannot parse DateTime value here: {}", element.getString()); + return false; + } + } + else if (element.isUInt64()) + { + value = element.getUInt64(); + } + else + { + error = fmt::format("cannot read DateTime value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + assert_cast(column).insert(value); + return true; + } + + bool tryParse(time_t & value, std::string_view data, FormatSettings::DateTimeInputFormat date_time_input_format) const + { + ReadBufferFromMemory buf(data.data(), data.size()); + switch (date_time_input_format) + { + case FormatSettings::DateTimeInputFormat::Basic: + if (tryReadDateTimeText(value, buf, time_zone) && buf.eof()) + return true; + break; + case FormatSettings::DateTimeInputFormat::BestEffort: + if (tryParseDateTimeBestEffort(value, buf, time_zone, utc_time_zone) && buf.eof()) + return true; + break; + case FormatSettings::DateTimeInputFormat::BestEffortUS: + if (tryParseDateTimeBestEffortUS(value, buf, time_zone, utc_time_zone) && buf.eof()) + return true; + break; + } + + return false; + } +}; + +template +class DecimalNode : public JSONExtractTreeNode +{ +public: + explicit DecimalNode(const DataTypePtr & type) : scale(assert_cast &>(*type).getScale()) { } + + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override + { + DecimalType value{}; + + switch (element.type()) + { + case ElementType::DOUBLE: + value = convertToDecimal, DataTypeDecimal>(element.getDouble(), scale); + break; + case ElementType::UINT64: + value = convertToDecimal, DataTypeDecimal>(element.getUInt64(), scale); + break; + case ElementType::INT64: + value = convertToDecimal, DataTypeDecimal>(element.getInt64(), scale); + break; + case ElementType::STRING: { + auto rb = ReadBufferFromMemory{element.getString()}; + if (!SerializationDecimal::tryReadText(value, rb, DecimalUtils::max_precision, scale)) + { + error = fmt::format("cannot parse Decimal value here: {}", element.getString()); + return false; + } + break; + } + case ElementType::NULL_VALUE: { + if (!format_settings.null_as_default) + { + error = "cannot convert null to Decimal value"; + return false; + } + break; + } + default: + { + error = fmt::format("cannot read Decimal value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + } + + assert_cast &>(column).insertValue(value); + return true; + } + +private: + UInt32 scale; +}; + + +template +class DateTime64Node : public JSONExtractTreeNode, public TimezoneMixin +{ +public: + explicit DateTime64Node(const DataTypeDateTime64 & datetime64_type) : TimezoneMixin(datetime64_type), scale(datetime64_type.getScale()) + { + } + + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull() && format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + DateTime64 value; + if (element.isString()) + { + if (!tryParse(value, element.getString(), format_settings.date_time_input_format)) + { + error = fmt::format("cannot parse DateTime64 value here: {}", element.getString()); + return false; + } + } + else + { + switch (element.type()) + { + case ElementType::DOUBLE: + value = convertToDecimal, DataTypeDecimal>(element.getDouble(), scale); + break; + case ElementType::UINT64: + value = convertToDecimal, DataTypeDecimal>(element.getUInt64(), scale); + break; + case ElementType::INT64: + value = convertToDecimal, DataTypeDecimal>(element.getInt64(), scale); + break; + default: + error = fmt::format("cannot read DateTime64 value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + } + + assert_cast(column).insert(value); + return true; + } + + bool tryParse(DateTime64 & value, std::string_view data, FormatSettings::DateTimeInputFormat date_time_input_format) const + { + ReadBufferFromMemory buf(data.data(), data.size()); + switch (date_time_input_format) + { + case FormatSettings::DateTimeInputFormat::Basic: + if (tryReadDateTime64Text(value, scale, buf, time_zone) && buf.eof()) + return true; + break; + case FormatSettings::DateTimeInputFormat::BestEffort: + if (tryParseDateTime64BestEffort(value, scale, buf, time_zone, utc_time_zone) && buf.eof()) + return true; + break; + case FormatSettings::DateTimeInputFormat::BestEffortUS: + if (tryParseDateTime64BestEffortUS(value, scale, buf, time_zone, utc_time_zone) && buf.eof()) + return true; + break; + } + + return false; + } + +private: + UInt32 scale; +}; + +template +class EnumNode : public JSONExtractTreeNode +{ +public: + explicit EnumNode(const std::vector> & name_value_pairs_) : name_value_pairs(name_value_pairs_) + { + for (const auto & name_value_pair : name_value_pairs) + { + name_to_value_map.emplace(name_value_pair.first, name_value_pair.second); + only_values.emplace(name_value_pair.second); + } + } + + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull()) + { + if (format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + error = "cannot convert null to Enum value"; + return false; + } + + auto & col_vec = assert_cast &>(column); + + if (element.isInt64()) + { + Type value; + if (!accurate::convertNumeric(element.getInt64(), value) || !only_values.contains(value)) + { + error = fmt::format("cannot convert value {} to enum: there is no such value in enum", element.getInt64()); + return false; + } + col_vec.insertValue(value); + return true; + } + + if (element.isUInt64()) + { + Type value; + if (!accurate::convertNumeric(element.getUInt64(), value) || !only_values.contains(value)) + { + error = fmt::format("cannot convert value {} to enum: there is no such value in enum", element.getUInt64()); + return false; + } + col_vec.insertValue(value); + return true; + } + + if (element.isString()) + { + auto value = name_to_value_map.find(element.getString()); + if (value == name_to_value_map.end()) + { + error = fmt::format("cannot convert value {} to enum: there is no such value in enum", element.getString()); + return false; + } + col_vec.insertValue(value->second); + return true; + } + + error = fmt::format("cannot read Enum value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + +private: + std::vector> name_value_pairs; + std::unordered_map name_to_value_map; + std::unordered_set only_values; +}; + +template +class IPv4Node : public JSONExtractTreeNode +{ +public: + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull() && format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + if (!element.isString()) + { + error = fmt::format("cannot read IPv4 value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + auto data = element.getString(); + IPv4 value; + if (!tryParse(value, data)) + { + error = fmt::format("cannot parse IPv4 value here: {}", data); + return false; + } + + assert_cast(column).insert(value); + return true; + } + + static bool tryParse(IPv4 & value, std::string_view data) + { + ReadBufferFromMemory buf(data.data(), data.size()); + return tryReadIPv4Text(value, buf) && buf.eof(); + } +}; + +template +class IPv6Node : public JSONExtractTreeNode +{ +public: + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull() && format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + if (!element.isString()) + { + error = fmt::format("cannot read IPv6 value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + auto data = element.getString(); + IPv6 value; + if (!tryParse(value, data)) + { + error = fmt::format("cannot parse IPv6 value here: {}", data); + return false; + } + + assert_cast(column).insert(value); + return true; + } + + + static bool tryParse(IPv6 & value, std::string_view data) + { + ReadBufferFromMemory buf(data.data(), data.size()); + return tryReadIPv6Text(value, buf) && buf.eof(); + } +}; + +template +class NullableNode : public JSONExtractTreeNode +{ +public: + explicit NullableNode(std::unique_ptr> nested_) : nested(std::move(nested_)) { } + + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull()) + { + column.insertDefault(); + return true; + } + + auto & col_null = assert_cast(column); + if (!nested->insertResultToColumn(col_null.getNestedColumn(), element, insert_settings, format_settings, error)) + return false; + col_null.getNullMapColumn().insertValue(0); + return true; + } + +private: + std::unique_ptr> nested; +}; + +template +class LowCardinalityNode : public JSONExtractTreeNode +{ +public: + explicit LowCardinalityNode(bool is_nullable_, std::unique_ptr> nested_) + : is_nullable(is_nullable_), nested(std::move(nested_)) + { + } + + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull() && (is_nullable || format_settings.null_as_default)) + { + column.insertDefault(); + return true; + } + + auto & col_lc = assert_cast(column); + auto tmp_nested = col_lc.getDictionary().getNestedColumn()->cloneEmpty(); + if (!nested->insertResultToColumn(*tmp_nested, element, insert_settings, format_settings, error)) + return false; + + col_lc.insertFromFullColumn(*tmp_nested, 0); + return true; + } + +private: + bool is_nullable; + std::unique_ptr> nested; +}; + +template +class ArrayNode : public JSONExtractTreeNode +{ +public: + explicit ArrayNode(std::unique_ptr> nested_) : nested(std::move(nested_)) { } + + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override + { + if (element.isNull() && format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + if (!element.isArray()) + { + error = fmt::format("cannot read Array value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + auto array = element.getArray(); + + auto & col_arr = assert_cast(column); + auto & data = col_arr.getData(); + size_t old_size = data.size(); + bool were_valid_elements = false; + + for (auto value : array) + { + if (nested->insertResultToColumn(data, value, insert_settings, format_settings, error)) + { + were_valid_elements = true; + } + else if (insert_settings.insert_default_on_invalid_elements_in_complex_types) + { + data.insertDefault(); + } + else + { + data.popBack(data.size() - old_size); + return false; + } + } + + if (!were_valid_elements) + { + data.popBack(data.size() - old_size); + return false; + } + + col_arr.getOffsets().push_back(data.size()); + return true; + } + +private: + std::unique_ptr> nested; +}; + +template +class TupleNode : public JSONExtractTreeNode +{ +public: + TupleNode(std::vector>> nested_, const std::vector & explicit_names_) + : nested(std::move(nested_)), explicit_names(explicit_names_) + { + for (size_t i = 0; i != explicit_names.size(); ++i) + name_to_index_map.emplace(explicit_names[i], i); + } + + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override + { + auto & tuple = assert_cast(column); + size_t old_size = column.size(); + bool were_valid_elements = false; + + auto set_size = [&](size_t size) + { + for (size_t i = 0; i != tuple.tupleSize(); ++i) + { + auto & col = tuple.getColumn(i); + if (col.size() != size) + { + if (col.size() > size) + col.popBack(col.size() - size); + else + while (col.size() < size) + col.insertDefault(); + } + } + }; + + if (element.isArray()) + { + auto array = element.getArray(); + auto it = array.begin(); + + for (size_t index = 0; (index != nested.size()) && (it != array.end()); ++index) + { + if (nested[index]->insertResultToColumn(tuple.getColumn(index), *it++, insert_settings, format_settings, error)) + { + were_valid_elements = true; + } + else if (insert_settings.insert_default_on_invalid_elements_in_complex_types) + { + tuple.getColumn(index).insertDefault(); + } + else + { + set_size(old_size); + error += fmt::format("(during reading tuple {} element)", index); + return false; + } + } + + set_size(old_size + static_cast(were_valid_elements)); + return were_valid_elements; + } + + if (element.isObject()) + { + auto object = element.getObject(); + if (name_to_index_map.empty()) + { + auto it = object.begin(); + for (size_t index = 0; (index != nested.size()) && (it != object.end()); ++index) + { + if (nested[index]->insertResultToColumn(tuple.getColumn(index), (*it++).second, insert_settings, format_settings, error)) + { + were_valid_elements = true; + } + else if (insert_settings.insert_default_on_invalid_elements_in_complex_types) + { + tuple.getColumn(index).insertDefault(); + } + else + { + set_size(old_size); + error += fmt::format("(during reading tuple {} element)", index); + return false; + } + } + } + else + { + for (const auto & [key, value] : object) + { + auto index = name_to_index_map.find(key); + if (index != name_to_index_map.end()) + { + if (nested[index->second]->insertResultToColumn(tuple.getColumn(index->second), value, insert_settings, format_settings, error)) + { + were_valid_elements = true; + } + else if (!insert_settings.insert_default_on_invalid_elements_in_complex_types) + { + set_size(old_size); + error += fmt::format("(during reading tuple element \"{}\")", key); + return false; + } + } + } + } + + set_size(old_size + static_cast(were_valid_elements)); + return were_valid_elements; + } + + error = fmt::format("cannot read Tuple value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + +private: + std::vector>> nested; + std::vector explicit_names; + std::unordered_map name_to_index_map; +}; + +template +class MapNode : public JSONExtractTreeNode +{ +public: + explicit MapNode(std::unique_ptr> value_) : value(std::move(value_)) { } + + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override + { + if (!element.isObject()) + { + error = fmt::format("cannot read Map value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + auto & map_col = assert_cast(column); + auto & offsets = map_col.getNestedColumn().getOffsets(); + auto & tuple_col = map_col.getNestedData(); + auto & key_col = tuple_col.getColumn(0); + auto & value_col = tuple_col.getColumn(1); + size_t old_size = tuple_col.size(); + + auto object = element.getObject(); + auto it = object.begin(); + for (; it != object.end(); ++it) + { + auto pair = *it; + + /// Insert key + key_col.insertData(pair.first.data(), pair.first.size()); + + /// Insert value + if (!value->insertResultToColumn(value_col, pair.second, insert_settings, format_settings, error)) + { + if (insert_settings.insert_default_on_invalid_elements_in_complex_types) + { + value_col.insertDefault(); + } + else + { + key_col.popBack(key_col.size() - offsets.back()); + value_col.popBack(value_col.size() - offsets.back()); + error += fmt::format("(during reading value of key \"{}\")", pair.first); + return false; + } + } + } + + offsets.push_back(old_size + object.size()); + return true; + } + +private: + std::unique_ptr> value; +}; + +template +class VariantNode : public JSONExtractTreeNode +{ +public: + VariantNode(std::vector>> variant_nodes_, std::vector order_) + : variant_nodes(std::move(variant_nodes_)), order(std::move(order_)) + { + } + + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override + { + auto & column_variant = assert_cast(column); + for (size_t i : order) + { + auto & variant = column_variant.getVariantByGlobalDiscriminator(i); + if (variant_nodes[i]->insertResultToColumn(variant, element, insert_settings, format_settings, error)) + { + column_variant.getLocalDiscriminators().push_back(column_variant.localDiscriminatorByGlobal(i)); + column_variant.getOffsets().push_back(variant.size() - 1); + return true; + } + } + + error = fmt::format("cannot read Map value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + +private: + std::vector>> variant_nodes; + /// Order in which we should try variants nodes. + /// For example, String should be always the last one. + std::vector order; +}; + + +template +class DynamicNode : public JSONExtractTreeNode +{ +public: + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override + { + auto & column_dynamic = assert_cast(column); + /// First, check if element is NULL. + if (element.isNull()) + { + column_dynamic.insertDefault(); + return true; + } + + auto & variant_column = column_dynamic.getVariantColumn(); + auto variant_info = column_dynamic.getVariantInfo(); + /// Second, infer ClickHouse type for this element and add it as a new variant. + auto element_type = elementToDataType(element, format_settings); + if (column_dynamic.addNewVariant(element_type)) + { + auto node = buildJSONExtractTree(element_type, "Dynamic inference"); + auto global_discriminator = variant_info.variant_name_to_discriminator[element_type->getName()]; + auto & variant = variant_column.getVariantByGlobalDiscriminator(global_discriminator); + if (!node->insertResultToColumn(variant, element, insert_settings, format_settings, error)) + return false; + variant_column.getLocalDiscriminators().push_back(variant_column.localDiscriminatorByGlobal(global_discriminator)); + variant_column.getOffsets().push_back(variant.size() - 1); + return true; + } + + /// We couldn't add new variant. Try to insert element into current variants. + auto variant_node = buildJSONExtractTree(variant_info.variant_type, "Dynamic inference"); + if (variant_node->insertResultToColumn(variant_column, element, insert_settings, format_settings, error)) + return true; + + /// We couldn't insert element into any existing variant, add String variant and read value as String. + column_dynamic.addStringVariant(); + auto string_global_discriminator = variant_info.variant_name_to_discriminator["String"]; + auto & string_column = variant_column.getVariantByGlobalDiscriminator(string_global_discriminator); + if (!getStringNode()->insertResultToColumn(string_column, element, insert_settings, format_settings, error)) + return false; + variant_column.getLocalDiscriminators().push_back(variant_column.localDiscriminatorByGlobal(string_global_discriminator)); + variant_column.getOffsets().push_back(string_column.size() - 1); + return true; + } + + static const std::unique_ptr> & getStringNode() + { + static const std::unique_ptr> string_node + = buildJSONExtractTree(std::make_shared(), "Dynamic inference"); + return string_node; + } + + static DataTypePtr elementToDataType(const typename JSONParser::Element & element, const FormatSettings & format_settings) + { + JSONInferenceInfo json_inference_info; + auto type = elementToDataTypeImpl(element, format_settings, json_inference_info); + transformFinalInferredJSONTypeIfNeeded(type, format_settings, &json_inference_info); + return type; + } + +private: + static DataTypePtr elementToDataTypeImpl(const typename JSONParser::Element & element, const FormatSettings & format_settings, JSONInferenceInfo & json_inference_info) + { + switch (element.type()) + { + case ElementType::NULL_VALUE: + return makeNullable(std::make_shared()); + case ElementType::BOOL: + return DataTypeFactory::instance().get("Bool"); + case ElementType::INT64: + { + auto type = std::make_shared(); + if (element.getInt64() < 0) + json_inference_info.negative_integers.insert(type.get()); + return type; + } + case ElementType::UINT64: + return std::make_shared(); + case ElementType::DOUBLE: + return std::make_shared(); + case ElementType::STRING: + { + auto data = element.getString(); + + if (auto type = tryInferDateOrDateTimeFromString(data, format_settings)) + return type; + + if (format_settings.json.try_infer_numbers_from_strings) + { + if (auto type = tryInferJSONNumberFromString(data, format_settings, &json_inference_info)) + { + json_inference_info.numbers_parsed_from_json_strings.insert(type.get()); + return type; + } + } + + return std::make_shared(); + } + case ElementType::ARRAY: + { + auto array = element.getArray(); + DataTypes types; + types.reserve(array.size()); + for (auto value : array) + types.push_back(makeNullableSafe(elementToDataTypeImpl(value, format_settings, json_inference_info))); + + if (types.empty()) + return std::make_shared(makeNullable(std::make_shared())); + + if (checkIfTypesAreEqual(types)) + return std::make_shared(types.back()); + + /// For JSON if we have not complete types, we should not try to transform them + /// and return it as a Tuple. + /// For example, if we have types [Nullable(Float64), Nullable(Nothing), Nullable(Float64)] + /// it can be Array(Nullable(Float64)) or Tuple(Nullable(Float64), , Nullable(Float64)) and + /// we can't determine which one it is right now. But we will be able to do it later + /// when we will have the final top level type. + /// For example, we can have JSON element [[42.42, null, 43.43], [44.44, "Some string", 45.45]] and we should + /// determine the type for this element as Tuple(Nullable(Float64), Nullable(String), Nullable(Float64)). + for (const auto & type : types) + { + if (!checkIfTypeIsComplete(type)) + return std::make_shared(types); + } + + auto types_copy = types; + transformInferredJSONTypesIfNeeded(types_copy, format_settings, &json_inference_info); + + if (checkIfTypesAreEqual(types_copy)) + return std::make_shared(types_copy.back()); + + return std::make_shared(types); + } + case ElementType::OBJECT: { + /// TODO: Use new JSON type here when it's ready. + return std::make_shared(std::make_shared(), makeNullable(std::make_shared())); + } + } + } +}; + +} + +template +std::unique_ptr> buildJSONExtractTree(const DataTypePtr & type, const char * source_for_exception_message) +{ + switch (type->getTypeId()) + { + case TypeIndex::UInt8: + return std::make_unique>(isBool(type)); + case TypeIndex::UInt16: + return std::make_unique>(); + case TypeIndex::UInt32: + return std::make_unique>(); + case TypeIndex::UInt64: + return std::make_unique>(); + case TypeIndex::UInt128: + return std::make_unique>(); + case TypeIndex::UInt256: + return std::make_unique>(); + case TypeIndex::Int8: + return std::make_unique>(); + case TypeIndex::Int16: + return std::make_unique>(); + case TypeIndex::Int32: + return std::make_unique>(); + case TypeIndex::Int64: + return std::make_unique>(); + case TypeIndex::Int128: + return std::make_unique>(); + case TypeIndex::Int256: + return std::make_unique>(); + case TypeIndex::Float32: + return std::make_unique>(); + case TypeIndex::Float64: + return std::make_unique>(); + case TypeIndex::String: + return std::make_unique>(); + case TypeIndex::FixedString: + return std::make_unique>(assert_cast(*type).getN()); + case TypeIndex::UUID: + return std::make_unique>(); + case TypeIndex::IPv4: + return std::make_unique>(); + case TypeIndex::IPv6: + return std::make_unique>(); + case TypeIndex::Date:; + return std::make_unique>(); + case TypeIndex::Date32: + return std::make_unique>(); + case TypeIndex::DateTime: + return std::make_unique>(assert_cast(*type)); + case TypeIndex::DateTime64: + return std::make_unique>(assert_cast(*type)); + case TypeIndex::Decimal32: + return std::make_unique>(type); + case TypeIndex::Decimal64: + return std::make_unique>(type); + case TypeIndex::Decimal128: + return std::make_unique>(type); + case TypeIndex::Decimal256: + return std::make_unique>(type); + case TypeIndex::Enum8: + return std::make_unique>(assert_cast(*type).getValues()); + case TypeIndex::Enum16: + return std::make_unique>(assert_cast(*type).getValues()); + case TypeIndex::LowCardinality: + { + /// To optimize inserting into LowCardinality we have special nodes for LowCardinality of numeric and string types. + const auto & lc_type = assert_cast(*type); + auto dictionary_type = removeNullable(lc_type.getDictionaryType()); + bool is_nullable = lc_type.isLowCardinalityNullable(); + + switch (dictionary_type->getTypeId()) + { + case TypeIndex::UInt8: + return std::make_unique>(is_nullable, isBool(type)); + case TypeIndex::UInt16: + return std::make_unique>(is_nullable); + case TypeIndex::UInt32: + return std::make_unique>(is_nullable); + case TypeIndex::UInt64: + return std::make_unique>(is_nullable); + case TypeIndex::Int8: + return std::make_unique>(is_nullable); + case TypeIndex::Int16: + return std::make_unique>(is_nullable); + case TypeIndex::Int32: + return std::make_unique>(is_nullable); + case TypeIndex::Int64: + return std::make_unique>(is_nullable); + case TypeIndex::Float32: + return std::make_unique>(is_nullable); + case TypeIndex::Float64: + return std::make_unique>(is_nullable); + case TypeIndex::String: + return std::make_unique>(is_nullable); + case TypeIndex::FixedString: + return std::make_unique>(is_nullable, assert_cast(*dictionary_type).getN()); + case TypeIndex::UUID: + return std::make_unique>(is_nullable); + default: + return std::make_unique>(is_nullable, buildJSONExtractTree(dictionary_type, source_for_exception_message)); + } + } + case TypeIndex::Nullable: + return std::make_unique>(buildJSONExtractTree(assert_cast(*type).getNestedType(), source_for_exception_message)); + case TypeIndex::Array: + return std::make_unique>(buildJSONExtractTree(assert_cast(*type).getNestedType(), source_for_exception_message)); + case TypeIndex::Tuple: + { + const auto & tuple = assert_cast(*type); + const auto & tuple_elements = tuple.getElements(); + std::vector>> elements; + elements.reserve(tuple_elements.size()); + for (const auto & tuple_element : tuple_elements) + elements.emplace_back(buildJSONExtractTree(tuple_element, source_for_exception_message)); + return std::make_unique>(std::move(elements), tuple.haveExplicitNames() ? tuple.getElementNames() : Strings{}); + } + case TypeIndex::Map: + { + const auto & map_type = assert_cast(*type); + const auto & key_type = map_type.getKeyType(); + if (!isString(removeLowCardinality(key_type))) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "{} doesn't support the return type schema: {} with key type not String", + source_for_exception_message, + type->getName()); + + const auto & value_type = map_type.getValueType(); + return std::make_unique>(buildJSONExtractTree(value_type, source_for_exception_message)); + } + case TypeIndex::Variant: + { + const auto & variant_type = assert_cast(*type); + const auto & variants = variant_type.getVariants(); + std::vector>> variant_nodes; + variant_nodes.reserve(variants.size()); + for (const auto & variant : variants) + variant_nodes.push_back(buildJSONExtractTree(variant, source_for_exception_message)); + return std::make_unique>(std::move(variant_nodes), SerializationVariant::getVariantsDeserializeTextOrder(variants)); + } + case TypeIndex::Dynamic: + return std::make_unique>(); + default: + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "{} doesn't support the return type schema: {}", + source_for_exception_message, + type->getName()); + } +} + +#if USE_SIMDJSON +template void jsonElementToString(const SimdJSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings); +template std::unique_ptr> buildJSONExtractTree(const DataTypePtr & type, const char * source_for_exception_message); +#endif + +#if USE_RAPIDJSON +template void jsonElementToString(const RapidJSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings); +template std::unique_ptr> buildJSONExtractTree(const DataTypePtr & type, const char * source_for_exception_message); +template bool tryGetNumericValueFromJSONElement(Float64 & value, const RapidJSONParser::Element & element, bool convert_bool_to_integer, String & error); +#else +template void jsonElementToString(const DummyJSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings); +template std::unique_ptr> buildJSONExtractTree(const DataTypePtr & type, const char * source_for_exception_message); +#endif + +} diff --git a/src/Formats/JSONExtractTree.h b/src/Formats/JSONExtractTree.h new file mode 100644 index 00000000000..b5e82506548 --- /dev/null +++ b/src/Formats/JSONExtractTree.h @@ -0,0 +1,41 @@ +#pragma once +#include +#include +#include + + +namespace DB +{ + +struct JSONExtractInsertSettings +{ + /// If false, JSON boolean values won't be inserted into columns with integer types + /// It's used in JSONExtractInt64/JSONExtractUInt64/... functions. + bool convert_bool_to_integer = true; + /// If true, when complex type like Array/Map has both valid and invalid elements, + /// the default value will be inserted on invalid elements. + /// For example, if we have [1, "hello", 2] and type Array(UInt32), + /// we will insert [1, 0, 2] in the column. Used in all JSONExtract functions. + bool insert_default_on_invalid_elements_in_complex_types = false; +}; + +template +class JSONExtractTreeNode +{ +public: + JSONExtractTreeNode() = default; + virtual ~JSONExtractTreeNode() = default; + virtual bool insertResultToColumn(IColumn &, const typename JSONParser::Element &, const JSONExtractInsertSettings & insert_setting, const FormatSettings & format_settings, String & error) const = 0; +}; + +/// Build a tree for insertion JSON element into a column with provided data type. +template +std::unique_ptr> buildJSONExtractTree(const DataTypePtr & type, const char * source_for_exception_message); + +template +void jsonElementToString(const typename JSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings); + +template +bool tryGetNumericValueFromJSONElement(NumberType & value, const typename JSONParser::Element & element, bool convert_bool_to_integer, String & error); + +} diff --git a/src/Formats/NativeReader.cpp b/src/Formats/NativeReader.cpp index fa5d41d6536..f9b56cf4f61 100644 --- a/src/Formats/NativeReader.cpp +++ b/src/Formats/NativeReader.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -31,8 +32,8 @@ namespace ErrorCodes } -NativeReader::NativeReader(ReadBuffer & istr_, UInt64 server_revision_) - : istr(istr_), server_revision(server_revision_) +NativeReader::NativeReader(ReadBuffer & istr_, UInt64 server_revision_, std::optional format_settings_) + : istr(istr_), server_revision(server_revision_), format_settings(format_settings_) { } @@ -40,16 +41,12 @@ NativeReader::NativeReader( ReadBuffer & istr_, const Block & header_, UInt64 server_revision_, - bool skip_unknown_columns_, - bool null_as_default_, - bool allow_types_conversion_, + std::optional format_settings_, BlockMissingValues * block_missing_values_) : istr(istr_) , header(header_) , server_revision(server_revision_) - , skip_unknown_columns(skip_unknown_columns_) - , null_as_default(null_as_default_) - , allow_types_conversion(allow_types_conversion_) + , format_settings(std::move(format_settings_)) , block_missing_values(block_missing_values_) { } @@ -83,13 +80,14 @@ void NativeReader::resetParser() use_index = false; } -void NativeReader::readData(const ISerialization & serialization, ColumnPtr & column, ReadBuffer & istr, size_t rows, double avg_value_size_hint) +static void readData(const ISerialization & serialization, ColumnPtr & column, ReadBuffer & istr, const std::optional & format_settings, size_t rows, double avg_value_size_hint) { ISerialization::DeserializeBinaryBulkSettings settings; settings.getter = [&](ISerialization::SubstreamPath) -> ReadBuffer * { return &istr; }; settings.avg_value_size_hint = avg_value_size_hint; settings.position_independent_encoding = false; settings.native_format = true; + settings.data_types_binary_encoding = format_settings && format_settings->native.decode_types_in_binary_format; ISerialization::DeserializeBinaryBulkStatePtr state; @@ -167,8 +165,16 @@ Block NativeReader::read() /// Type String type_name; - readBinary(type_name, istr); - column.type = data_type_factory.get(type_name); + if (format_settings && format_settings->native.decode_types_in_binary_format) + { + column.type = decodeDataType(istr); + type_name = column.type->getName(); + } + else + { + readBinary(type_name, istr); + column.type = data_type_factory.get(type_name); + } setVersionToAggregateFunctions(column.type, true, server_revision); @@ -203,7 +209,7 @@ Block NativeReader::read() double avg_value_size_hint = avg_value_size_hints.empty() ? 0 : avg_value_size_hints[i]; if (rows) /// If no rows, nothing to read. - readData(*serialization, read_column, istr, rows, avg_value_size_hint); + readData(*serialization, read_column, istr, format_settings, rows, avg_value_size_hint); column.column = std::move(read_column); @@ -214,12 +220,12 @@ Block NativeReader::read() { auto & header_column = header.getByName(column.name); - if (null_as_default) + if (format_settings && format_settings->null_as_default) insertNullAsDefaultIfNeeded(column, header_column, header.getPositionByName(column.name), block_missing_values); if (!header_column.type->equals(*column.type)) { - if (allow_types_conversion) + if (format_settings && format_settings->native.allow_types_conversion) { try { @@ -246,7 +252,7 @@ Block NativeReader::read() } else { - if (!skip_unknown_columns) + if (format_settings && !format_settings->skip_unknown_fields) throw Exception(ErrorCodes::INCORRECT_DATA, "Unknown column with name {} found while reading data in Native format", column.name); use_in_result = false; } diff --git a/src/Formats/NativeReader.h b/src/Formats/NativeReader.h index 3cec4afd997..97b6ea22b15 100644 --- a/src/Formats/NativeReader.h +++ b/src/Formats/NativeReader.h @@ -20,7 +20,7 @@ class NativeReader { public: /// If a non-zero server_revision is specified, additional block information may be expected and read. - NativeReader(ReadBuffer & istr_, UInt64 server_revision_); + NativeReader(ReadBuffer & istr_, UInt64 server_revision_, std::optional format_settings_ = std::nullopt); /// For cases when data structure (header) is known in advance. /// NOTE We may use header for data validation and/or type conversions. It is not implemented. @@ -28,9 +28,7 @@ public: ReadBuffer & istr_, const Block & header_, UInt64 server_revision_, - bool skip_unknown_columns_ = false, - bool null_as_default_ = false, - bool allow_types_conversion_ = false, + std::optional format_settings_ = std::nullopt, BlockMissingValues * block_missing_values_ = nullptr); /// For cases when we have an index. It allows to skip columns. Only columns specified in the index will be read. @@ -38,8 +36,6 @@ public: IndexForNativeFormat::Blocks::const_iterator index_block_it_, IndexForNativeFormat::Blocks::const_iterator index_block_end_); - static void readData(const ISerialization & serialization, ColumnPtr & column, ReadBuffer & istr, size_t rows, double avg_value_size_hint); - Block getHeader() const; void resetParser(); @@ -50,9 +46,7 @@ private: ReadBuffer & istr; Block header; UInt64 server_revision; - bool skip_unknown_columns = false; - bool null_as_default = false; - bool allow_types_conversion = false; + std::optional format_settings = std::nullopt; BlockMissingValues * block_missing_values = nullptr; bool use_index = false; diff --git a/src/Formats/NativeWriter.cpp b/src/Formats/NativeWriter.cpp index b150561a5fc..3c87e489b1c 100644 --- a/src/Formats/NativeWriter.cpp +++ b/src/Formats/NativeWriter.cpp @@ -14,6 +14,7 @@ #include #include #include +#include namespace DB { @@ -25,10 +26,20 @@ namespace ErrorCodes NativeWriter::NativeWriter( - WriteBuffer & ostr_, UInt64 client_revision_, const Block & header_, bool remove_low_cardinality_, - IndexForNativeFormat * index_, size_t initial_size_of_file_) - : ostr(ostr_), client_revision(client_revision_), header(header_), - index(index_), initial_size_of_file(initial_size_of_file_), remove_low_cardinality(remove_low_cardinality_) + WriteBuffer & ostr_, + UInt64 client_revision_, + const Block & header_, + std::optional format_settings_, + bool remove_low_cardinality_, + IndexForNativeFormat * index_, + size_t initial_size_of_file_) + : ostr(ostr_) + , client_revision(client_revision_) + , header(header_) + , index(index_) + , initial_size_of_file(initial_size_of_file_) + , remove_low_cardinality(remove_low_cardinality_) + , format_settings(std::move(format_settings_)) { if (index) { @@ -45,7 +56,7 @@ void NativeWriter::flush() } -static void writeData(const ISerialization & serialization, const ColumnPtr & column, WriteBuffer & ostr, UInt64 offset, UInt64 limit) +static void writeData(const ISerialization & serialization, const ColumnPtr & column, WriteBuffer & ostr, const std::optional & format_settings, UInt64 offset, UInt64 limit) { /** If there are columns-constants - then we materialize them. * (Since the data type does not know how to serialize / deserialize constants.) @@ -57,6 +68,7 @@ static void writeData(const ISerialization & serialization, const ColumnPtr & co settings.getter = [&ostr](ISerialization::SubstreamPath) -> WriteBuffer * { return &ostr; }; settings.position_independent_encoding = false; settings.low_cardinality_max_dictionary_size = 0; + settings.data_types_binary_encoding = format_settings && format_settings->native.encode_types_in_binary_format; ISerialization::SerializeBinaryBulkStatePtr state; serialization.serializeBinaryBulkStatePrefix(*full_column, settings, state); @@ -121,15 +133,22 @@ size_t NativeWriter::write(const Block & block) setVersionToAggregateFunctions(column.type, include_version, include_version ? std::optional(client_revision) : std::nullopt); /// Type - String type_name = column.type->getName(); + if (format_settings && format_settings->native.encode_types_in_binary_format) + { + encodeDataType(column.type, ostr); + } + else + { + String type_name = column.type->getName(); - /// For compatibility, we will not send explicit timezone parameter in DateTime data type - /// to older clients, that cannot understand it. - if (client_revision < DBMS_MIN_REVISION_WITH_TIME_ZONE_PARAMETER_IN_DATETIME_DATA_TYPE - && startsWith(type_name, "DateTime(")) - type_name = "DateTime"; + /// For compatibility, we will not send explicit timezone parameter in DateTime data type + /// to older clients, that cannot understand it. + if (client_revision < DBMS_MIN_REVISION_WITH_TIME_ZONE_PARAMETER_IN_DATETIME_DATA_TYPE + && startsWith(type_name, "DateTime(")) + type_name = "DateTime"; - writeStringBinary(type_name, ostr); + writeStringBinary(type_name, ostr); + } /// Serialization. Dynamic, if client supports it. SerializationPtr serialization; @@ -161,7 +180,7 @@ size_t NativeWriter::write(const Block & block) /// Data if (rows) /// Zero items of data is always represented as zero number of bytes. - writeData(*serialization, column.column, ostr, 0, 0); + writeData(*serialization, column.column, ostr, format_settings, 0, 0); if (index) { diff --git a/src/Formats/NativeWriter.h b/src/Formats/NativeWriter.h index 7bb377d2e4a..b4903243d45 100644 --- a/src/Formats/NativeWriter.h +++ b/src/Formats/NativeWriter.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { @@ -23,7 +24,7 @@ public: /** If non-zero client_revision is specified, additional block information can be written. */ NativeWriter( - WriteBuffer & ostr_, UInt64 client_revision_, const Block & header_, bool remove_low_cardinality_ = false, + WriteBuffer & ostr_, UInt64 client_revision_, const Block & header_, std::optional format_settings_ = std::nullopt, bool remove_low_cardinality_ = false, IndexForNativeFormat * index_ = nullptr, size_t initial_size_of_file_ = 0); Block getHeader() const { return header; } @@ -44,6 +45,7 @@ private: CompressedWriteBuffer * ostr_concrete = nullptr; bool remove_low_cardinality; + std::optional format_settings; }; } diff --git a/src/Formats/NumpyDataTypes.h b/src/Formats/NumpyDataTypes.h index 062f743c0ea..6ccdf65f457 100644 --- a/src/Formats/NumpyDataTypes.h +++ b/src/Formats/NumpyDataTypes.h @@ -2,6 +2,7 @@ #include #include #include +#include namespace ErrorCodes { diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp index 735b536986d..617595c19a2 100644 --- a/src/Formats/ReadSchemaUtils.cpp +++ b/src/Formats/ReadSchemaUtils.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 31faea2e13e..3c374ada9e6 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -225,19 +225,6 @@ namespace Paths paths; }; - bool checkIfTypesAreEqual(const DataTypes & types) - { - if (types.empty()) - return true; - - for (size_t i = 1; i < types.size(); ++i) - { - if (!types[0]->equals(*types[i])) - return false; - } - return true; - } - void updateTypeIndexes(DataTypes & data_types, TypeIndexesSet & type_indexes) { type_indexes.clear(); @@ -272,24 +259,31 @@ namespace type_indexes.erase(TypeIndex::Nothing); } - /// If we have both Int64 and UInt64, convert all Int64 to UInt64, + /// If we have both Int64 and UInt64, convert all not-negative Int64 to UInt64, /// because UInt64 is inferred only in case of Int64 overflow. - void transformIntegers(DataTypes & data_types, TypeIndexesSet & type_indexes) + void transformIntegers(DataTypes & data_types, TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info) { if (!type_indexes.contains(TypeIndex::Int64) || !type_indexes.contains(TypeIndex::UInt64)) return; + bool have_negative_integers = false; for (auto & type : data_types) { if (WhichDataType(type).isInt64()) - type = std::make_shared(); + { + bool is_negative = json_info && json_info->negative_integers.contains(type.get()); + have_negative_integers |= is_negative; + if (!is_negative) + type = std::make_shared(); + } } - type_indexes.erase(TypeIndex::Int64); + if (!have_negative_integers) + type_indexes.erase(TypeIndex::Int64); } /// If we have both Int64 and Float64 types, convert all Int64 to Float64. - void transformIntegersAndFloatsToFloats(DataTypes & data_types, TypeIndexesSet & type_indexes) + void transformIntegersAndFloatsToFloats(DataTypes & data_types, TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info) { bool have_floats = type_indexes.contains(TypeIndex::Float64); bool have_integers = type_indexes.contains(TypeIndex::Int64) || type_indexes.contains(TypeIndex::UInt64); @@ -300,7 +294,12 @@ namespace { WhichDataType which(type); if (which.isInt64() || which.isUInt64()) - type = std::make_shared(); + { + auto new_type = std::make_shared(); + if (json_info && json_info->numbers_parsed_from_json_strings.erase(type.get())) + json_info->numbers_parsed_from_json_strings.insert(new_type.get()); + type = new_type; + } } type_indexes.erase(TypeIndex::Int64); @@ -635,9 +634,9 @@ namespace if (settings.try_infer_integers) { /// Transform Int64 to UInt64 if needed. - transformIntegers(data_types, type_indexes); + transformIntegers(data_types, type_indexes, json_info); /// Transform integers to floats if needed. - transformIntegersAndFloatsToFloats(data_types, type_indexes); + transformIntegersAndFloatsToFloats(data_types, type_indexes, json_info); } /// Transform Date to DateTime or both to String if needed. @@ -887,7 +886,7 @@ namespace } template - DataTypePtr tryInferNumber(ReadBuffer & buf, const FormatSettings & settings) + DataTypePtr tryInferNumber(ReadBuffer & buf, const FormatSettings & settings, JSONInferenceInfo * json_info) { if (buf.eof()) return nullptr; @@ -911,7 +910,12 @@ namespace Int64 tmp_int; buf.position() = number_start; if (tryReadIntText(tmp_int, buf)) - return std::make_shared(); + { + auto type = std::make_shared(); + if (json_info && tmp_int < 0) + json_info->negative_integers.insert(type.get()); + return type; + } /// In case of Int64 overflow we can try to infer UInt64. UInt64 tmp_uint; @@ -934,7 +938,12 @@ namespace Int64 tmp_int; if (tryReadIntText(tmp_int, peekable_buf)) - return std::make_shared(); + { + auto type = std::make_shared(); + if (json_info && tmp_int < 0) + json_info->negative_integers.insert(type.get()); + return type; + } peekable_buf.rollbackToCheckpoint(/* drop= */ true); /// In case of Int64 overflow we can try to infer UInt64. @@ -952,7 +961,7 @@ namespace } template - DataTypePtr tryInferNumberFromStringImpl(std::string_view field, const FormatSettings & settings) + DataTypePtr tryInferNumberFromStringImpl(std::string_view field, const FormatSettings & settings, JSONInferenceInfo * json_inference_info = nullptr) { ReadBufferFromString buf(field); @@ -960,7 +969,12 @@ namespace { Int64 tmp_int; if (tryReadIntText(tmp_int, buf) && buf.eof()) - return std::make_shared(); + { + auto type = std::make_shared(); + if (json_inference_info && tmp_int < 0) + json_inference_info->negative_integers.insert(type.get()); + return type; + } /// We can safely get back to the start of buffer, because we read from a string and we didn't reach eof. buf.position() = buf.buffer().begin(); @@ -1011,7 +1025,7 @@ namespace { if (settings.json.try_infer_numbers_from_strings) { - if (auto number_type = tryInferNumberFromStringImpl(field, settings)) + if (auto number_type = tryInferNumberFromStringImpl(field, settings, json_info)) { json_info->numbers_parsed_from_json_strings.insert(number_type.get()); return number_type; @@ -1254,10 +1268,23 @@ namespace } /// Number - return tryInferNumber(buf, settings); + return tryInferNumber(buf, settings, json_info); } } +bool checkIfTypesAreEqual(const DataTypes & types) +{ + if (types.empty()) + return true; + + for (size_t i = 1; i < types.size(); ++i) + { + if (!types[0]->equals(*types[i])) + return false; + } + return true; +} + void transformInferredTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings) { DataTypes types = {first, second}; @@ -1275,6 +1302,11 @@ void transformInferredJSONTypesIfNeeded( second = std::move(types[1]); } +void transformInferredJSONTypesIfNeeded(DataTypes & types, const FormatSettings & settings, JSONInferenceInfo * json_info) +{ + transformInferredTypesIfNeededImpl(types, settings, json_info); +} + void transformInferredJSONTypesFromDifferentFilesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings) { JSONInferenceInfo json_info; @@ -1396,6 +1428,12 @@ DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSetting return tryInferNumberFromStringImpl(field, settings); } +DataTypePtr tryInferJSONNumberFromString(std::string_view field, const FormatSettings & settings, JSONInferenceInfo * json_info) +{ + return tryInferNumberFromStringImpl(field, settings, json_info); + +} + DataTypePtr tryInferDateOrDateTimeFromString(std::string_view field, const FormatSettings & settings) { if (settings.try_infer_dates && tryInferDate(field)) diff --git a/src/Formats/SchemaInferenceUtils.h b/src/Formats/SchemaInferenceUtils.h index bcf3d194825..06c14c0797a 100644 --- a/src/Formats/SchemaInferenceUtils.h +++ b/src/Formats/SchemaInferenceUtils.h @@ -2,6 +2,7 @@ #include #include +#include #include @@ -18,6 +19,11 @@ struct JSONInferenceInfo /// We store numbers that were parsed from strings. /// It's used in types transformation to change such numbers back to string if needed. std::unordered_set numbers_parsed_from_json_strings; + /// Store integer types that were inferred from negative numbers. + /// It's used to determine common type for Int64 and UInt64 + /// TODO: check it not only in JSON formats. + std::unordered_set negative_integers; + /// Indicates if currently we are inferring type for Map/Object key. bool is_object_key = false; /// When we transform types for the same column from different files @@ -48,6 +54,7 @@ DataTypePtr tryInferDateOrDateTimeFromString(std::string_view field, const Forma /// Try to parse a number value from a string. By default, it tries to parse Float64, /// but if setting try_infer_integers is enabled, it also tries to parse Int64. DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSettings & settings); +DataTypePtr tryInferJSONNumberFromString(std::string_view field, const FormatSettings & settings, JSONInferenceInfo * json_info); /// It takes two types inferred for the same column and tries to transform them to a common type if possible. /// It's also used when we try to infer some not ordinary types from another types. @@ -77,6 +84,7 @@ void transformInferredTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, c /// Example 2: /// We merge DataTypeJSONPaths types to a single DataTypeJSONPaths type with union of all JSON paths. void transformInferredJSONTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings, JSONInferenceInfo * json_info); +void transformInferredJSONTypesIfNeeded(DataTypes & types, const FormatSettings & settings, JSONInferenceInfo * json_info); /// Make final transform for types inferred in JSON format. It does 3 types of transformation: /// 1) Checks if type is unnamed Tuple(...), tries to transform nested types to find a common type for them and if all nested types @@ -107,4 +115,6 @@ NamesAndTypesList getNamesAndRecursivelyNullableTypes(const Block & header); /// Check if type contains Nothing, like Array(Tuple(Nullable(Nothing), String)) bool checkIfTypeIsComplete(const DataTypePtr & type); +bool checkIfTypesAreEqual(const DataTypes & types); + } diff --git a/src/Functions/CastOverloadResolver.cpp b/src/Functions/CastOverloadResolver.cpp index 0f54ff52ba2..10a08c6e35f 100644 --- a/src/Functions/CastOverloadResolver.cpp +++ b/src/Functions/CastOverloadResolver.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include diff --git a/src/Functions/FunctionBase64Conversion.h b/src/Functions/FunctionBase64Conversion.h index 083179c3ca8..363b9ee3a31 100644 --- a/src/Functions/FunctionBase64Conversion.h +++ b/src/Functions/FunctionBase64Conversion.h @@ -202,7 +202,7 @@ public: {"value", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_arguments); + validateFunctionArguments(*this, arguments, mandatory_arguments); return std::make_shared(); } diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index 5d19ba44d9b..ee4e28a7858 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -1703,7 +1703,7 @@ public: { if constexpr (is_division) { - if (context->getSettingsRef().decimal_check_overflow) + if (decimalCheckArithmeticOverflow(context)) { /// Check overflow by using operands scale (based on big decimal division implementation details): /// big decimal arithmetic is based on big integers, decimal operands are converted to big integers diff --git a/src/Functions/FunctionBitTestMany.h b/src/Functions/FunctionBitTestMany.h index 71e94b1e71d..950e4ab4ea8 100644 --- a/src/Functions/FunctionBitTestMany.h +++ b/src/Functions/FunctionBitTestMany.h @@ -16,6 +16,7 @@ namespace ErrorCodes { extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int PARAMETER_OUT_OF_BOUND; extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; } @@ -146,6 +147,9 @@ private: const auto pos = pos_col_const->getUInt(0); if (pos < 8 * sizeof(ValueType)) mask = mask | (ValueType(1) << pos); + else + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, + "The bit position argument {} is out of bounds for number", static_cast(pos)); } else { @@ -186,13 +190,20 @@ private: for (const auto i : collections::range(0, mask.size())) if (pos[i] < 8 * sizeof(ValueType)) mask[i] = mask[i] | (ValueType(1) << pos[i]); + else + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, + "The bit position argument {} is out of bounds for number", static_cast(pos[i])); return true; } else if (const auto pos_col_const = checkAndGetColumnConst>(pos_col_untyped)) { const auto & pos = pos_col_const->template getValue(); - const auto new_mask = pos < 8 * sizeof(ValueType) ? ValueType(1) << pos : 0; + if (pos >= 8 * sizeof(ValueType)) + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, + "The bit position argument {} is out of bounds for number", static_cast(pos)); + + const auto new_mask = ValueType(1) << pos; for (const auto i : collections::range(0, mask.size())) mask[i] = mask[i] | new_mask; diff --git a/src/Functions/FunctionCustomWeekToDateOrDate32.h b/src/Functions/FunctionCustomWeekToDateOrDate32.h index c904bfaec30..6ae8c847b65 100644 --- a/src/Functions/FunctionCustomWeekToDateOrDate32.h +++ b/src/Functions/FunctionCustomWeekToDateOrDate32.h @@ -1,4 +1,5 @@ #pragma once +#include #include namespace DB diff --git a/src/Functions/FunctionDateOrDateTimeToDateOrDate32.h b/src/Functions/FunctionDateOrDateTimeToDateOrDate32.h index 6eb3e534b62..28caef36879 100644 --- a/src/Functions/FunctionDateOrDateTimeToDateOrDate32.h +++ b/src/Functions/FunctionDateOrDateTimeToDateOrDate32.h @@ -1,4 +1,5 @@ #pragma once +#include #include namespace DB diff --git a/src/Functions/FunctionDateOrDateTimeToDateTimeOrDateTime64.h b/src/Functions/FunctionDateOrDateTimeToDateTimeOrDateTime64.h index 9f1066fd687..d5a1f28b7cd 100644 --- a/src/Functions/FunctionDateOrDateTimeToDateTimeOrDateTime64.h +++ b/src/Functions/FunctionDateOrDateTimeToDateTimeOrDateTime64.h @@ -1,4 +1,6 @@ #pragma once + +#include #include namespace DB diff --git a/src/Functions/FunctionFactory.cpp b/src/Functions/FunctionFactory.cpp index 004ef745a93..de6d5a9e1c1 100644 --- a/src/Functions/FunctionFactory.cpp +++ b/src/Functions/FunctionFactory.cpp @@ -4,6 +4,7 @@ #include #include +#include #include diff --git a/src/Functions/FunctionGenerateRandomStructure.cpp b/src/Functions/FunctionGenerateRandomStructure.cpp index 6dc68134502..0c6f4287ecb 100644 --- a/src/Functions/FunctionGenerateRandomStructure.cpp +++ b/src/Functions/FunctionGenerateRandomStructure.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -354,6 +355,11 @@ namespace } } + FunctionPtr FunctionGenerateRandomStructure::create(DB::ContextPtr context) + { + return std::make_shared(context->getSettingsRef().allow_suspicious_low_cardinality_types.value); + } + DataTypePtr FunctionGenerateRandomStructure::getReturnTypeImpl(const DataTypes & arguments) const { if (arguments.size() > 2) diff --git a/src/Functions/FunctionGenerateRandomStructure.h b/src/Functions/FunctionGenerateRandomStructure.h index 894096a6e07..5901565696f 100644 --- a/src/Functions/FunctionGenerateRandomStructure.h +++ b/src/Functions/FunctionGenerateRandomStructure.h @@ -17,10 +17,7 @@ public: { } - static FunctionPtr create(ContextPtr context) - { - return std::make_shared(context->getSettingsRef().allow_suspicious_low_cardinality_types.value); - } + static FunctionPtr create(ContextPtr context); String getName() const override { return name; } diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp index 593646240ca..c658063b66f 100644 --- a/src/Functions/FunctionHelpers.cpp +++ b/src/Functions/FunctionHelpers.cpp @@ -95,22 +95,21 @@ ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName return res; } -void validateArgumentType(const IFunction & func, const DataTypes & arguments, - size_t argument_index, bool (* validator_func)(const IDataType &), - const char * expected_type_description) -{ - if (arguments.size() <= argument_index) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Incorrect number of arguments of function {}", - func.getName()); - - const auto & argument = arguments[argument_index]; - if (!validator_func(*argument)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of {} argument of function {}, expected {}", - argument->getName(), std::to_string(argument_index), func.getName(), expected_type_description); -} - namespace { + +String withOrdinalEnding(size_t i) +{ + switch (i) + { + case 0: return "1st"; + case 1: return "2nd"; + case 2: return "3rd"; + default: return std::to_string(i) + "th"; + } + +} + void validateArgumentsImpl(const IFunction & func, const ColumnsWithTypeAndName & arguments, size_t argument_offset, @@ -120,20 +119,18 @@ void validateArgumentsImpl(const IFunction & func, { const auto argument_index = i + argument_offset; if (argument_index >= arguments.size()) - { break; - } const auto & arg = arguments[i + argument_offset]; const auto & descriptor = descriptors[i]; if (int error_code = descriptor.isValid(arg.type, arg.column); error_code != 0) throw Exception(error_code, - "Illegal type of argument #{}{} of function {}{}{}", - argument_offset + i + 1, // +1 is for human-friendly 1-based indexing - (descriptor.argument_name ? " '" + std::string(descriptor.argument_name) + "'" : String{}), + "A value of illegal type was provided as {} argument '{}' to function '{}'. Expected: {}, got: {}", + withOrdinalEnding(argument_offset + i), + descriptor.name, func.getName(), - (descriptor.expected_type_description ? String(", expected ") + descriptor.expected_type_description : String{}), - (arg.type ? ", got " + arg.type->getName() : String{})); + descriptor.type_name, + arg.type ? arg.type->getName() : ""); } } @@ -141,52 +138,42 @@ void validateArgumentsImpl(const IFunction & func, int FunctionArgumentDescriptor::isValid(const DataTypePtr & data_type, const ColumnPtr & column) const { - if (type_validator_func && (data_type == nullptr || !type_validator_func(*data_type))) + if (name.empty() || type_name.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "name or type_name are not set"); + + if (type_validator && (data_type == nullptr || !type_validator(*data_type))) return ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT; - if (column_validator_func && (column == nullptr || !column_validator_func(*column))) + if (column_validator && (column == nullptr || !column_validator(*column))) return ErrorCodes::ILLEGAL_COLUMN; return 0; } -void validateFunctionArgumentTypes(const IFunction & func, - const ColumnsWithTypeAndName & arguments, - const FunctionArgumentDescriptors & mandatory_args, - const FunctionArgumentDescriptors & optional_args) +void validateFunctionArguments(const IFunction & func, + const ColumnsWithTypeAndName & arguments, + const FunctionArgumentDescriptors & mandatory_args, + const FunctionArgumentDescriptors & optional_args) { if (arguments.size() < mandatory_args.size() || arguments.size() > mandatory_args.size() + optional_args.size()) { - auto join_argument_types = [](const auto & args, const String sep = ", ") - { - String result; - for (const auto & a : args) - { - using A = std::decay_t; - if constexpr (std::is_same_v) - { - if (a.argument_name) - result += "'" + std::string(a.argument_name) + "' : "; - if (a.expected_type_description) - result += a.expected_type_description; - } - else if constexpr (std::is_same_v) - result += a.type->getName(); + auto argument_singular_or_plural = [](const auto & args) -> std::string_view { return args.size() == 1 ? "argument" : "arguments"; }; - result += sep; - } - - if (!args.empty()) - result.erase(result.end() - sep.length(), result.end()); - - return result; - }; + String expected_args_string; + if (!mandatory_args.empty() && !optional_args.empty()) + expected_args_string = fmt::format("{} mandatory {} and {} optional {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args), optional_args.size(), argument_singular_or_plural(optional_args)); + else if (!mandatory_args.empty() && optional_args.empty()) + expected_args_string = fmt::format("{} {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args)); /// intentionally not "_mandatory_ arguments" + else if (mandatory_args.empty() && !optional_args.empty()) + expected_args_string = fmt::format("{} optional {}", optional_args.size(), argument_singular_or_plural(optional_args)); + else + expected_args_string = "0 arguments"; throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Incorrect number of arguments for function {} provided {}{}, expected {}{} ({}{})", - func.getName(), arguments.size(), (!arguments.empty() ? " (" + join_argument_types(arguments) + ")" : String{}), - mandatory_args.size(), (!optional_args.empty() ? " to " + std::to_string(mandatory_args.size() + optional_args.size()) : ""), - join_argument_types(mandatory_args), (!optional_args.empty() ? ", [" + join_argument_types(optional_args) + "]" : "")); + "An incorrect number of arguments was specified for function '{}'. Expected {}, got {}", + func.getName(), + expected_args_string, + fmt::format("{} {}", arguments.size(), argument_singular_or_plural(arguments))); } validateArgumentsImpl(func, arguments, 0, mandatory_args); diff --git a/src/Functions/FunctionHelpers.h b/src/Functions/FunctionHelpers.h index 6267d8eacc4..4f93b236bcb 100644 --- a/src/Functions/FunctionHelpers.h +++ b/src/Functions/FunctionHelpers.h @@ -115,77 +115,58 @@ ColumnWithTypeAndName columnGetNested(const ColumnWithTypeAndName & col); /// column if it is nullable. ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName & columns); -/// Checks argument type at specified index with predicate. -/// throws if there is no argument at specified index or if predicate returns false. -void validateArgumentType(const IFunction & func, const DataTypes & arguments, - size_t argument_index, bool (* validator_func)(const IDataType &), - const char * expected_type_description); - -/** Simple validator that is used in conjunction with validateFunctionArgumentTypes() to check if function arguments are as expected - * - * Also it is used to generate function description when arguments do not match expected ones. - * Any field can be null: - * `argument_name` - if not null, reported via type check errors. - * `expected_type_description` - if not null, reported via type check errors. - * `type_validator_func` - if not null, used to validate data type of function argument. - * `column_validator_func` - if not null, used to validate column of function argument. - */ +/// Expected arguments for a function. Can be used in conjunction with validateFunctionArguments() to check that the user-provided +/// arguments match the expected arguments. struct FunctionArgumentDescriptor { - const char * argument_name; + /// The argument name, e.g. "longitude". + /// Should not be empty. + std::string_view name; + /// A function which validates the argument data type. + /// May be nullptr. using TypeValidator = bool (*)(const IDataType &); - TypeValidator type_validator_func; + TypeValidator type_validator; + + /// A function which validates the argument column. + /// May be nullptr. using ColumnValidator = bool (*)(const IColumn &); - ColumnValidator column_validator_func; + ColumnValidator column_validator; - const char * expected_type_description; + /// The expected argument type, e.g. "const String" or "UInt64". + /// Should not be empty. + std::string_view type_name; - /** Validate argument type and column. - * - * Returns non-zero error code if: - * Validator != nullptr && (Value == nullptr || Validator(*Value) == false) - * For: - * Validator is either `type_validator_func` or `column_validator_func` - * Value is either `data_type` or `column` respectively. - * ILLEGAL_TYPE_OF_ARGUMENT if type validation fails - * - */ + /// Validate argument type and column. int isValid(const DataTypePtr & data_type, const ColumnPtr & column) const; }; using FunctionArgumentDescriptors = std::vector; -/** Validate that function arguments match specification. - * - * Designed to simplify argument validation for functions with variable arguments - * (e.g. depending on result type or other trait). - * First, checks that number of arguments is as expected (including optional arguments). - * Second, checks that mandatory args present and have valid type. - * Third, checks optional arguments types, skipping ones that are missing. - * - * Please note that if you have several optional arguments, like f([a, b, c]), - * only these calls are considered valid: - * f(a) - * f(a, b) - * f(a, b, c) - * - * But NOT these: f(a, c), f(b, c) - * In other words you can't omit middle optional arguments (just like in regular C++). - * - * If any mandatory arg is missing, throw an exception, with explicit description of expected arguments. - */ -void validateFunctionArgumentTypes(const IFunction & func, const ColumnsWithTypeAndName & arguments, - const FunctionArgumentDescriptors & mandatory_args, - const FunctionArgumentDescriptors & optional_args = {}); +/// Validates that the user-provided arguments match the expected arguments. +/// +/// Checks that +/// - the number of provided arguments matches the number of mandatory/optional arguments, +/// - all mandatory arguments are present and have the right type, +/// - optional arguments - if present - have the right type. +/// +/// With multiple optional arguments, e.g. f([a, b, c]), provided arguments must match left-to-right. E.g. these calls are considered valid: +/// f(a) +/// f(a, b) +/// f(a, b, c) +/// but these are NOT: +/// f(a, c) +/// f(b, c) +void validateFunctionArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments, + const FunctionArgumentDescriptors & mandatory_args, + const FunctionArgumentDescriptors & optional_args = {}); /// Checks if a list of array columns have equal offsets. Return a pair of nested columns and offsets if true, otherwise throw. std::pair, const ColumnArray::Offset *> checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments); -/** Return ColumnNullable of src, with null map as OR-ed null maps of args columns. - * Or ColumnConst(ColumnNullable) if the result is always NULL or if the result is constant and always not NULL. - */ +/// Return ColumnNullable of src, with null map as OR-ed null maps of args columns. +/// Or ColumnConst(ColumnNullable) if the result is always NULL or if the result is constant and always not NULL. ColumnPtr wrapInNullable(const ColumnPtr & src, const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count); struct NullPresence diff --git a/src/Functions/FunctionJoinGet.cpp b/src/Functions/FunctionJoinGet.cpp index 0a2859fe864..a67f3a977db 100644 --- a/src/Functions/FunctionJoinGet.cpp +++ b/src/Functions/FunctionJoinGet.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/src/Functions/FunctionStringReplace.h b/src/Functions/FunctionStringReplace.h index aee04a5969a..b4bcfa514a8 100644 --- a/src/Functions/FunctionStringReplace.h +++ b/src/Functions/FunctionStringReplace.h @@ -40,7 +40,7 @@ public: {"replacement", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/FunctionTokens.h b/src/Functions/FunctionTokens.h index d6cf6a24983..f1435ca5651 100644 --- a/src/Functions/FunctionTokens.h +++ b/src/Functions/FunctionTokens.h @@ -17,6 +17,7 @@ #include #include #include +#include namespace DB @@ -194,7 +195,7 @@ static inline void checkArgumentsWithSeparatorAndOptionalMaxSubstrings( {"max_substrings", static_cast(&isNativeInteger), isColumnConst, "const Number"}, }; - validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args); + validateFunctionArguments(func, arguments, mandatory_args, optional_args); } static inline void checkArgumentsWithOptionalMaxSubstrings(const IFunction & func, const ColumnsWithTypeAndName & arguments) @@ -207,7 +208,7 @@ static inline void checkArgumentsWithOptionalMaxSubstrings(const IFunction & fun {"max_substrings", static_cast(&isNativeInteger), isColumnConst, "const Number"}, }; - validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args); + validateFunctionArguments(func, arguments, mandatory_args, optional_args); } } diff --git a/src/Functions/FunctionUnixTimestamp64.cpp b/src/Functions/FunctionUnixTimestamp64.cpp new file mode 100644 index 00000000000..a22c035a0ba --- /dev/null +++ b/src/Functions/FunctionUnixTimestamp64.cpp @@ -0,0 +1,13 @@ +#include +#include + +namespace DB +{ + +FunctionFromUnixTimestamp64::FunctionFromUnixTimestamp64(size_t target_scale_, const char * name_, ContextPtr context) + : target_scale(target_scale_) + , name(name_) + , allow_nonconst_timezone_arguments(context->getSettingsRef().allow_nonconst_timezone_arguments) +{} + +} diff --git a/src/Functions/FunctionUnixTimestamp64.h b/src/Functions/FunctionUnixTimestamp64.h index c418163343b..bbd7e586a67 100644 --- a/src/Functions/FunctionUnixTimestamp64.h +++ b/src/Functions/FunctionUnixTimestamp64.h @@ -47,7 +47,7 @@ public: FunctionArgumentDescriptors args{ {"value", static_cast(&isDateTime64), nullptr, "DateTime64"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } @@ -107,11 +107,7 @@ private: const bool allow_nonconst_timezone_arguments; public: - FunctionFromUnixTimestamp64(size_t target_scale_, const char * name_, ContextPtr context) - : target_scale(target_scale_) - , name(name_) - , allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments) - {} + FunctionFromUnixTimestamp64(size_t target_scale_, const char * name_, ContextPtr context); String getName() const override { return name; } size_t getNumberOfArguments() const override { return 0; } diff --git a/src/Functions/FunctionsAES.h b/src/Functions/FunctionsAES.h index 524b4f82acd..7af6265eba9 100644 --- a/src/Functions/FunctionsAES.h +++ b/src/Functions/FunctionsAES.h @@ -165,7 +165,7 @@ private: }); } - validateFunctionArgumentTypes(*this, arguments, + validateFunctionArguments(*this, arguments, FunctionArgumentDescriptors{ {"mode", static_cast(&isStringOrFixedString), isColumnConst, "encryption mode string"}, {"input", static_cast(&isStringOrFixedString), {}, "plaintext"}, @@ -438,7 +438,7 @@ private: }); } - validateFunctionArgumentTypes(*this, arguments, + validateFunctionArguments(*this, arguments, FunctionArgumentDescriptors{ {"mode", static_cast(&isStringOrFixedString), isColumnConst, "decryption mode string"}, {"input", static_cast(&isStringOrFixedString), {}, "ciphertext"}, diff --git a/src/Functions/FunctionsBinaryRepresentation.cpp b/src/Functions/FunctionsBinaryRepresentation.cpp index 0f3f8be96a7..ab10d402df4 100644 --- a/src/Functions/FunctionsBinaryRepresentation.cpp +++ b/src/Functions/FunctionsBinaryRepresentation.cpp @@ -3,14 +3,14 @@ #include #include #include -#include -#include #include #include #include #include #include #include +#include +#include namespace DB { @@ -218,10 +218,7 @@ struct UnbinImpl static constexpr auto name = "unbin"; static constexpr size_t word_size = 8; - static void decode(const char * pos, const char * end, char *& out) - { - binStringDecode(pos, end, out); - } + static void decode(const char * pos, const char * end, char *& out) { binStringDecode(pos, end, out, word_size); } }; /// Encode number or string to string with binary or hexadecimal representation @@ -651,7 +648,15 @@ public: size_t size = in_offsets.size(); out_offsets.resize(size); - out_vec.resize(in_vec.size() / word_size + size); + + size_t max_out_len = 0; + for (size_t i = 0; i < in_offsets.size(); ++i) + { + const size_t len = in_offsets[i] - (i == 0 ? 0 : in_offsets[i - 1]) + - /* trailing zero symbol that is always added in ColumnString and that is ignored while decoding */ 1; + max_out_len += (len + word_size - 1) / word_size + /* trailing zero symbol that is always added by Impl::decode */ 1; + } + out_vec.resize(max_out_len); char * begin = reinterpret_cast(out_vec.data()); char * pos = begin; @@ -661,6 +666,7 @@ public: { size_t new_offset = in_offsets[i]; + /// `new_offset - 1` because in ColumnString each string is stored with trailing zero byte Impl::decode(reinterpret_cast(&in_vec[prev_offset]), reinterpret_cast(&in_vec[new_offset - 1]), pos); out_offsets[i] = pos - begin; @@ -668,6 +674,9 @@ public: prev_offset = new_offset; } + chassert( + static_cast(pos - begin) <= out_vec.size(), + fmt::format("too small amount of memory was preallocated: needed {}, but have only {}", pos - begin, out_vec.size())); out_vec.resize(pos - begin); return col_res; @@ -680,11 +689,11 @@ public: ColumnString::Offsets & out_offsets = col_res->getOffsets(); const ColumnString::Chars & in_vec = col_fix_string->getChars(); - size_t n = col_fix_string->getN(); + const size_t n = col_fix_string->getN(); size_t size = col_fix_string->size(); out_offsets.resize(size); - out_vec.resize(in_vec.size() / word_size + size); + out_vec.resize(((n + word_size - 1) / word_size + /* trailing zero symbol that is always added by Impl::decode */ 1) * size); char * begin = reinterpret_cast(out_vec.data()); char * pos = begin; @@ -694,6 +703,7 @@ public: { size_t new_offset = prev_offset + n; + /// here we don't subtract 1 from `new_offset` because in ColumnFixedString strings are stored without trailing zero byte Impl::decode(reinterpret_cast(&in_vec[prev_offset]), reinterpret_cast(&in_vec[new_offset]), pos); out_offsets[i] = pos - begin; @@ -701,6 +711,9 @@ public: prev_offset = new_offset; } + chassert( + static_cast(pos - begin) <= out_vec.size(), + fmt::format("too small amount of memory was preallocated: needed {}, but have only {}", pos - begin, out_vec.size())); out_vec.resize(pos - begin); return col_res; diff --git a/src/Functions/FunctionsBitToArray.cpp b/src/Functions/FunctionsBitToArray.cpp index adabda1a7f8..beaaccad6db 100644 --- a/src/Functions/FunctionsBitToArray.cpp +++ b/src/Functions/FunctionsBitToArray.cpp @@ -284,7 +284,12 @@ public: { while (x) { - result_array_values_data.push_back(std::countr_zero(x)); + /// С++20 char8_t is not an unsigned integral type anymore https://godbolt.org/z/Mqcb7qn58 + /// and thus you cannot use std::countr_zero on it. + if constexpr (std::is_same_v) + result_array_values_data.push_back(std::countr_zero(static_cast(x))); + else + result_array_values_data.push_back(std::countr_zero(x)); x &= (x - 1); } } @@ -336,4 +341,3 @@ REGISTER_FUNCTION(BitToArray) } } - diff --git a/src/Functions/FunctionsCodingIP.cpp b/src/Functions/FunctionsCodingIP.cpp index e01967274f4..a134e39fbcd 100644 --- a/src/Functions/FunctionsCodingIP.cpp +++ b/src/Functions/FunctionsCodingIP.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 2a0b2f1d075..1342e3f2c5d 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -2020,7 +2021,7 @@ public: DataTypePtr getReturnTypeImplRemovedNullable(const ColumnsWithTypeAndName & arguments) const { - FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, nullptr}}; + FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, "any type"}}; FunctionArgumentDescriptors optional_args; if constexpr (to_decimal) @@ -2049,7 +2050,7 @@ public: optional_args.push_back({"timezone", static_cast(&isString), nullptr, "String"}); } - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); if constexpr (std::is_same_v) { @@ -2390,7 +2391,7 @@ public: if (isDateTime64(arguments)) { - validateFunctionArgumentTypes(*this, arguments, + validateFunctionArguments(*this, arguments, FunctionArgumentDescriptors{{"string", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"}}, // optional FunctionArgumentDescriptors{ diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index fbd987577e9..47040545677 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -1,10 +1,1064 @@ -#include +#include +#include + +#include + +#include +#include + +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include #include +#include +#include +#include +#include + +#include + +#include "config.h" namespace DB { +namespace ErrorCodes +{ +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int ILLEGAL_COLUMN; +extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +template +concept HasIndexOperator = requires (T t) +{ + t[0]; +}; + +/// Functions to parse JSONs and extract values from it. +/// The first argument of all these functions gets a JSON, +/// after that there are any number of arguments specifying path to a desired part from the JSON's root. +/// For example, +/// select JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) = -100 + +class FunctionJSONHelpers +{ +public: + template typename Impl, class JSONParser> + class Executor + { + public: + static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, const FormatSettings & format_settings) + { + MutableColumnPtr to{result_type->createColumn()}; + to->reserve(input_rows_count); + + if (arguments.empty()) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument", String(Name::name)); + + const auto & first_column = arguments[0]; + if (!isString(first_column.type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "The first argument of function {} should be a string containing JSON, illegal type: " + "{}", String(Name::name), first_column.type->getName()); + + const ColumnPtr & arg_json = first_column.column; + const auto * col_json_const = typeid_cast(arg_json.get()); + const auto * col_json_string + = typeid_cast(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get()); + + if (!col_json_string) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {}", arg_json->getName()); + + const ColumnString::Chars & chars = col_json_string->getChars(); + const ColumnString::Offsets & offsets = col_json_string->getOffsets(); + + size_t num_index_arguments = Impl::getNumberOfIndexArguments(arguments); + std::vector moves = prepareMoves(Name::name, arguments, 1, num_index_arguments); + + /// Preallocate memory in parser if necessary. + JSONParser parser; + if constexpr (has_member_function_reserve::value) + { + size_t max_size = calculateMaxSize(offsets); + if (max_size) + parser.reserve(max_size); + } + + Impl impl; + + /// prepare() does Impl-specific preparation before handling each row. + if constexpr (has_member_function_prepare::*)(const char *, const ColumnsWithTypeAndName &, const DataTypePtr &)>::value) + impl.prepare(Name::name, arguments, result_type); + + using Element = typename JSONParser::Element; + + Element document; + bool document_ok = false; + if (col_json_const) + { + std::string_view json{reinterpret_cast(chars.data()), offsets[0] - 1}; + document_ok = parser.parse(json, document); + } + + String error; + for (const auto i : collections::range(0, input_rows_count)) + { + if (!col_json_const) + { + std::string_view json{reinterpret_cast(&chars[offsets[i - 1]]), offsets[i] - offsets[i - 1] - 1}; + document_ok = parser.parse(json, document); + } + + bool added_to_column = false; + if (document_ok) + { + /// Perform moves. + Element element; + std::string_view last_key; + bool moves_ok = performMoves(arguments, i, document, moves, element, last_key); + + if (moves_ok) + added_to_column = impl.insertResultToColumn(*to, element, last_key, format_settings, error); + } + + /// We add default value (=null or zero) if something goes wrong, we don't throw exceptions in these JSON functions. + if (!added_to_column) + to->insertDefault(); + } + return to; + } + }; + +private: + BOOST_TTI_HAS_MEMBER_FUNCTION(reserve) + BOOST_TTI_HAS_MEMBER_FUNCTION(prepare) + + /// Represents a move of a JSON iterator described by a single argument passed to a JSON function. + /// For example, the call JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) + /// contains two moves: {MoveType::ConstKey, "b"} and {MoveType::ConstIndex, 1}. + /// Keys and indices can be nonconst, in this case they are calculated for each row. + enum class MoveType : uint8_t + { + Key, + Index, + ConstKey, + ConstIndex, + }; + + struct Move + { + explicit Move(MoveType type_, size_t index_ = 0) : type(type_), index(index_) {} + Move(MoveType type_, const String & key_) : type(type_), key(key_) {} + MoveType type; + size_t index = 0; + String key; + }; + + static std::vector prepareMoves( + const char * function_name, + const ColumnsWithTypeAndName & columns, + size_t first_index_argument, + size_t num_index_arguments) + { + std::vector moves; + moves.reserve(num_index_arguments); + for (const auto i : collections::range(first_index_argument, first_index_argument + num_index_arguments)) + { + const auto & column = columns[i]; + if (!isString(column.type) && !isNativeInteger(column.type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "The argument {} of function {} should be a string specifying key " + "or an integer specifying index, illegal type: {}", + std::to_string(i + 1), String(function_name), column.type->getName()); + + if (column.column && isColumnConst(*column.column)) + { + const auto & column_const = assert_cast(*column.column); + if (isString(column.type)) + moves.emplace_back(MoveType::ConstKey, column_const.getValue()); + else + moves.emplace_back(MoveType::ConstIndex, column_const.getInt(0)); + } + else + { + if (isString(column.type)) + moves.emplace_back(MoveType::Key, ""); + else + moves.emplace_back(MoveType::Index, 0); + } + } + return moves; + } + + + /// Performs moves of types MoveType::Index and MoveType::ConstIndex. + template + static bool performMoves(const ColumnsWithTypeAndName & arguments, size_t row, + const typename JSONParser::Element & document, const std::vector & moves, + typename JSONParser::Element & element, std::string_view & last_key) + { + typename JSONParser::Element res_element = document; + std::string_view key; + + for (size_t j = 0; j != moves.size(); ++j) + { + switch (moves[j].type) + { + case MoveType::ConstIndex: + { + if (!moveToElementByIndex(res_element, static_cast(moves[j].index), key)) + return false; + break; + } + case MoveType::ConstKey: + { + key = moves[j].key; + if (!moveToElementByKey(res_element, key)) + return false; + break; + } + case MoveType::Index: + { + Int64 index = (*arguments[j + 1].column)[row].get(); + if (!moveToElementByIndex(res_element, static_cast(index), key)) + return false; + break; + } + case MoveType::Key: + { + key = arguments[j + 1].column->getDataAt(row).toView(); + if (!moveToElementByKey(res_element, key)) + return false; + break; + } + } + } + + element = res_element; + last_key = key; + return true; + } + + template + static bool moveToElementByIndex(typename JSONParser::Element & element, int index, std::string_view & out_key) + { + if (element.isArray()) + { + auto array = element.getArray(); + if (index >= 0) + --index; + else + index += array.size(); + + if (static_cast(index) >= array.size()) + return false; + element = array[index]; + out_key = {}; + return true; + } + + if constexpr (HasIndexOperator) + { + if (element.isObject()) + { + auto object = element.getObject(); + if (index >= 0) + --index; + else + index += object.size(); + + if (static_cast(index) >= object.size()) + return false; + std::tie(out_key, element) = object[index]; + return true; + } + } + + return {}; + } + + /// Performs moves of types MoveType::Key and MoveType::ConstKey. + template + static bool moveToElementByKey(typename JSONParser::Element & element, std::string_view key) + { + if (!element.isObject()) + return false; + auto object = element.getObject(); + return object.find(key, element); + } + + static size_t calculateMaxSize(const ColumnString::Offsets & offsets) + { + size_t max_size = 0; + for (const auto i : collections::range(0, offsets.size())) + { + size_t size = offsets[i] - offsets[i - 1]; + max_size = std::max(max_size, size); + } + if (max_size) + --max_size; + return max_size; + } + +}; + +template +class JSONExtractImpl; + +template +class JSONExtractKeysAndValuesImpl; + +/** +* Functions JSONExtract and JSONExtractKeysAndValues force the return type - it is specified in the last argument. +* For example - `SELECT JSONExtract(materialize('{"a": 131231, "b": 1234}'), 'b', 'LowCardinality(FixedString(4))')` +* But by default ClickHouse decides on its own whether the return type will be LowCardinality based on the types of +* input arguments. +* And for these specific functions we cannot rely on this mechanism, so these functions have their own implementation - +* just convert all of the LowCardinality input columns to full ones, execute and wrap the resulting column in LowCardinality +* if needed. +*/ +template typename Impl> +constexpr bool functionForcesTheReturnType() +{ + return std::is_same_v, JSONExtractImpl> || std::is_same_v, JSONExtractKeysAndValuesImpl>; +} + +template typename Impl> +class ExecutableFunctionJSON : public IExecutableFunction +{ + +public: + explicit ExecutableFunctionJSON(const NullPresence & null_presence_, bool allow_simdjson_, const DataTypePtr & json_return_type_, const FormatSettings & format_settings_) + : null_presence(null_presence_), allow_simdjson(allow_simdjson_), json_return_type(json_return_type_), format_settings(format_settings_) + { + /// Don't escape forward slashes during converting JSON elements to raw string. + format_settings.json.escape_forward_slashes = false; + /// Don't insert default values on null during traversing the JSON element. + /// We allow to insert null only to Nullable columns in JSONExtract functions. + format_settings.null_as_default = false; + } + + String getName() const override { return Name::name; } + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForLowCardinalityColumns() const override + { + return !functionForcesTheReturnType(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + if (null_presence.has_null_constant) + return result_type->createColumnConstWithDefaultValue(input_rows_count); + + if constexpr (functionForcesTheReturnType()) + { + ColumnsWithTypeAndName columns_without_low_cardinality = arguments; + + for (auto & column : columns_without_low_cardinality) + { + column.column = recursiveRemoveLowCardinality(column.column); + column.type = recursiveRemoveLowCardinality(column.type); + } + + ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(columns_without_low_cardinality) : columns_without_low_cardinality; + ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); + + if (null_presence.has_nullable) + temporary_result = wrapInNullable(temporary_result, columns_without_low_cardinality, result_type, input_rows_count); + + if (result_type->lowCardinality()) + temporary_result = recursiveLowCardinalityTypeConversion(temporary_result, json_return_type, result_type); + + return temporary_result; + } + else + { + ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(arguments) : arguments; + ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); + + if (null_presence.has_nullable) + temporary_result = wrapInNullable(temporary_result, arguments, result_type, input_rows_count); + + if (result_type->lowCardinality()) + temporary_result = recursiveLowCardinalityTypeConversion(temporary_result, json_return_type, result_type); + + return temporary_result; + } + } + +private: + + ColumnPtr + chooseAndRunJSONParser(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const + { +#if USE_SIMDJSON + if (allow_simdjson) + return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count, format_settings); +#endif + +#if USE_RAPIDJSON + return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count, format_settings); +#else + return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count, format_settings); +#endif + } + + NullPresence null_presence; + bool allow_simdjson; + DataTypePtr json_return_type; + FormatSettings format_settings; +}; + + +template typename Impl> +class FunctionBaseFunctionJSON : public IFunctionBase +{ +public: + explicit FunctionBaseFunctionJSON( + const NullPresence & null_presence_, + bool allow_simdjson_, + DataTypes argument_types_, + DataTypePtr return_type_, + DataTypePtr json_return_type_, + const FormatSettings & format_settings_) + : null_presence(null_presence_) + , allow_simdjson(allow_simdjson_) + , argument_types(std::move(argument_types_)) + , return_type(std::move(return_type_)) + , json_return_type(std::move(json_return_type_)) + , format_settings(format_settings_) + { + } + + String getName() const override { return Name::name; } + + const DataTypes & getArgumentTypes() const override + { + return argument_types; + } + + const DataTypePtr & getResultType() const override + { + return return_type; + } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName &) const override + { + return std::make_unique>(null_presence, allow_simdjson, json_return_type, format_settings); + } + +private: + NullPresence null_presence; + bool allow_simdjson; + DataTypes argument_types; + DataTypePtr return_type; + DataTypePtr json_return_type; + FormatSettings format_settings; +}; + +/// We use IFunctionOverloadResolver instead of IFunction to handle non-default NULL processing. +/// Both NULL and JSON NULL should generate NULL value. If any argument is NULL, return NULL. +template typename Impl> +class JSONOverloadResolver : public IFunctionOverloadResolver, WithContext +{ +public: + static constexpr auto name = Name::name; + + String getName() const override { return name; } + + static FunctionOverloadResolverPtr create(ContextPtr context_) + { + return std::make_unique(context_); + } + + explicit JSONOverloadResolver(ContextPtr context_) : WithContext(context_) {} + + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForLowCardinalityColumns() const override + { + return !functionForcesTheReturnType(); + } + + FunctionBasePtr build(const ColumnsWithTypeAndName & arguments) const override + { + bool has_nothing_argument = false; + for (const auto & arg : arguments) + has_nothing_argument |= isNothing(arg.type); + + DataTypePtr json_return_type = Impl::getReturnType(Name::name, createBlockWithNestedColumns(arguments)); + NullPresence null_presence = getNullPresense(arguments); + DataTypePtr return_type; + if (has_nothing_argument) + return_type = std::make_shared(); + else if (null_presence.has_null_constant) + return_type = makeNullable(std::make_shared()); + else if (null_presence.has_nullable) + return_type = makeNullable(json_return_type); + else + return_type = json_return_type; + + DataTypes argument_types; + argument_types.reserve(arguments.size()); + for (const auto & argument : arguments) + argument_types.emplace_back(argument.type); + return std::make_unique>( + null_presence, getContext()->getSettingsRef().allow_simdjson, argument_types, return_type, json_return_type, getFormatSettings(getContext())); + } +}; + +struct NameJSONHas { static constexpr auto name{"JSONHas"}; }; +struct NameIsValidJSON { static constexpr auto name{"isValidJSON"}; }; +struct NameJSONLength { static constexpr auto name{"JSONLength"}; }; +struct NameJSONKey { static constexpr auto name{"JSONKey"}; }; +struct NameJSONType { static constexpr auto name{"JSONType"}; }; +struct NameJSONExtractInt { static constexpr auto name{"JSONExtractInt"}; }; +struct NameJSONExtractUInt { static constexpr auto name{"JSONExtractUInt"}; }; +struct NameJSONExtractFloat { static constexpr auto name{"JSONExtractFloat"}; }; +struct NameJSONExtractBool { static constexpr auto name{"JSONExtractBool"}; }; +struct NameJSONExtractString { static constexpr auto name{"JSONExtractString"}; }; +struct NameJSONExtract { static constexpr auto name{"JSONExtract"}; }; +struct NameJSONExtractKeysAndValues { static constexpr auto name{"JSONExtractKeysAndValues"}; }; +struct NameJSONExtractRaw { static constexpr auto name{"JSONExtractRaw"}; }; +struct NameJSONExtractArrayRaw { static constexpr auto name{"JSONExtractArrayRaw"}; }; +struct NameJSONExtractKeysAndValuesRaw { static constexpr auto name{"JSONExtractKeysAndValuesRaw"}; }; +struct NameJSONExtractKeys { static constexpr auto name{"JSONExtractKeys"}; }; + + +template +class JSONHasImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared(); } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view, const FormatSettings &, String &) + { + ColumnVector & col_vec = assert_cast &>(dest); + col_vec.insertValue(1); + return true; + } +}; + + +template +class IsValidJSONImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) + { + if (arguments.size() != 1) + { + /// IsValidJSON() shouldn't get parameters other than JSON. + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} needs exactly one argument", + String(function_name)); + } + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName &) { return 0; } + + static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view, const FormatSettings &, String &) + { + /// This function is called only if JSON is valid. + /// If JSON isn't valid then `FunctionJSON::Executor::run()` adds default value (=zero) to `dest` without calling this function. + ColumnVector & col_vec = assert_cast &>(dest); + col_vec.insertValue(1); + return true; + } +}; + + +template +class JSONLengthImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings &, String &) + { + size_t size; + if (element.isArray()) + size = element.getArray().size(); + else if (element.isObject()) + size = element.getObject().size(); + else + return false; + + ColumnVector & col_vec = assert_cast &>(dest); + col_vec.insertValue(size); + return true; + } +}; + + +template +class JSONKeyImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view last_key, const FormatSettings &, String &) + { + if (last_key.empty()) + return false; + ColumnString & col_str = assert_cast(dest); + col_str.insertData(last_key.data(), last_key.size()); + return true; + } +}; + + +template +class JSONTypeImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + static const std::vector> values = { + {"Array", '['}, + {"Object", '{'}, + {"String", '"'}, + {"Int64", 'i'}, + {"UInt64", 'u'}, + {"Double", 'd'}, + {"Bool", 'b'}, + {"Null", 0}, /// the default value for the column. + }; + return std::make_shared>(values); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings &, String &) + { + UInt8 type; + switch (element.type()) + { + case ElementType::INT64: + type = 'i'; + break; + case ElementType::UINT64: + type = 'u'; + break; + case ElementType::DOUBLE: + type = 'd'; + break; + case ElementType::STRING: + type = '"'; + break; + case ElementType::ARRAY: + type = '['; + break; + case ElementType::OBJECT: + type = '{'; + break; + case ElementType::BOOL: + type = 'b'; + break; + case ElementType::NULL_VALUE: + type = 0; + break; + } + + ColumnVector & col_vec = assert_cast &>(dest); + col_vec.insertValue(type); + return true; + } +}; + + +template +class JSONExtractNumericImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared>(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static const std::unique_ptr> & getInsertNode() + { + static const std::unique_ptr> node = buildJSONExtractTree(std::make_shared>()); + } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings &, String & error) + { + NumberType value; + + tryGetNumericValueFromJSONElement(value, element, convert_bool_to_integer, error); + auto & col_vec = assert_cast &>(dest); + col_vec.insertValue(value); + return true; + } +}; + + +template +using JSONExtractInt64Impl = JSONExtractNumericImpl; +template +using JSONExtractUInt64Impl = JSONExtractNumericImpl; +template +using JSONExtractFloat64Impl = JSONExtractNumericImpl; + + +template +class JSONExtractBoolImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings &, String &) + { + bool value; + switch (element.type()) + { + case ElementType::BOOL: + value = element.getBool(); + break; + case ElementType::INT64: + value = element.getInt64() != 0; + break; + case ElementType::UINT64: + value = element.getUInt64() != 0; + break; + default: + return false; + } + + auto & col_vec = assert_cast &>(dest); + col_vec.insertValue(static_cast(value)); + return true; + } +}; + +template +class JSONExtractRawImpl; + +template +class JSONExtractStringImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings & format_settings, String & error) + { + if (element.isNull()) + return false; + + if (!element.isString()) + return JSONExtractRawImpl::insertResultToColumn(dest, element, {}, format_settings, error); + + auto str = element.getString(); + ColumnString & col_str = assert_cast(dest); + col_str.insertData(str.data(), str.size()); + return true; + } +}; + +template +class JSONExtractImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) + { + if (arguments.size() < 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least two arguments", String(function_name)); + + const auto & col = arguments.back(); + const auto * col_type_const = typeid_cast(col.column.get()); + if (!col_type_const || !isString(col.type)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "The last argument of function {} should " + "be a constant string specifying the return data type, illegal value: {}", + String(function_name), col.name); + + return DataTypeFactory::instance().get(col_type_const->getValue()); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 2; } + + void prepare(const char * function_name, const ColumnsWithTypeAndName &, const DataTypePtr & result_type) + { + extract_tree = buildJSONExtractTree(result_type, function_name); + insert_settings.insert_default_on_invalid_elements_in_complex_types = true; + } + + bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings & format_settings, String & error) + { + return extract_tree->insertResultToColumn(dest, element, insert_settings, format_settings, error); + } + +protected: + std::unique_ptr> extract_tree; + JSONExtractInsertSettings insert_settings; +}; + + +template +class JSONExtractKeysAndValuesImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) + { + if (arguments.size() < 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least two arguments", String(function_name)); + + const auto & col = arguments.back(); + const auto * col_type_const = typeid_cast(col.column.get()); + if (!col_type_const || !isString(col.type)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "The last argument of function {} should " + "be a constant string specifying the values' data type, illegal value: {}", + String(function_name), col.name); + + DataTypePtr key_type = std::make_unique(); + DataTypePtr value_type = DataTypeFactory::instance().get(col_type_const->getValue()); + DataTypePtr tuple_type = std::make_unique(DataTypes{key_type, value_type}); + return std::make_unique(tuple_type); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 2; } + + void prepare(const char * function_name, const ColumnsWithTypeAndName &, const DataTypePtr & result_type) + { + const auto tuple_type = typeid_cast(result_type.get())->getNestedType(); + const auto value_type = typeid_cast(tuple_type.get())->getElements()[1]; + extract_tree = buildJSONExtractTree(value_type, function_name); + insert_settings.insert_default_on_invalid_elements_in_complex_types = true; + } + + bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings & format_settings, String & error) + { + if (!element.isObject()) + return false; + + auto object = element.getObject(); + + auto & col_arr = assert_cast(dest); + auto & col_tuple = assert_cast(col_arr.getData()); + size_t old_size = col_tuple.size(); + auto & col_key = assert_cast(col_tuple.getColumn(0)); + auto & col_value = col_tuple.getColumn(1); + + for (const auto & [key, value] : object) + { + if (extract_tree->insertResultToColumn(col_value, value, insert_settings, format_settings, error)) + col_key.insertData(key.data(), key.size()); + } + + if (col_tuple.size() == old_size) + return false; + + col_arr.getOffsets().push_back(col_tuple.size()); + return true; + } + +private: + std::unique_ptr> extract_tree; + JSONExtractInsertSettings insert_settings; +}; + + +template +class JSONExtractRawImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings & format_settings, String &) + { + ColumnString & col_str = assert_cast(dest); + auto & chars = col_str.getChars(); + WriteBufferFromVector buf(chars, AppendModeTag()); + jsonElementToString(element, buf, format_settings); + buf.finalize(); + chars.push_back(0); + col_str.getOffsets().push_back(chars.size()); + return true; + } +}; + + +template +class JSONExtractArrayRawImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(std::make_shared()); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings & format_settings, String & error) + { + if (!element.isArray()) + return false; + + auto array = element.getArray(); + ColumnArray & col_res = assert_cast(dest); + + for (auto value : array) + JSONExtractRawImpl::insertResultToColumn(col_res.getData(), value, {}, format_settings, error); + + col_res.getOffsets().push_back(col_res.getOffsets().back() + array.size()); + return true; + } +}; + + +template +class JSONExtractKeysAndValuesRawImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + DataTypePtr string_type = std::make_unique(); + DataTypePtr tuple_type = std::make_unique(DataTypes{string_type, string_type}); + return std::make_unique(tuple_type); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings & format_settings, String & error) + { + if (!element.isObject()) + return false; + + auto object = element.getObject(); + + auto & col_arr = assert_cast(dest); + auto & col_tuple = assert_cast(col_arr.getData()); + auto & col_key = assert_cast(col_tuple.getColumn(0)); + auto & col_value = assert_cast(col_tuple.getColumn(1)); + + for (const auto & [key, value] : object) + { + col_key.insertData(key.data(), key.size()); + JSONExtractRawImpl::insertResultToColumn(col_value, value, {}, format_settings, error); + } + + col_arr.getOffsets().push_back(col_arr.getOffsets().back() + object.size()); + return true; + } +}; + +template +class JSONExtractKeysImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_unique(std::make_shared()); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings &, String &) + { + if (!element.isObject()) + return false; + + auto object = element.getObject(); + + ColumnArray & col_res = assert_cast(dest); + auto & col_key = assert_cast(col_res.getData()); + + for (const auto & [key, value] : object) + { + col_key.insertData(key.data(), key.size()); + } + + col_res.getOffsets().push_back(col_res.getOffsets().back() + object.size()); + return true; + } +}; + REGISTER_FUNCTION(JSON) { factory.registerFunction>(); diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h deleted file mode 100644 index 8a2ad457d34..00000000000 --- a/src/Functions/FunctionsJSON.h +++ /dev/null @@ -1,1781 +0,0 @@ -#pragma once - -#include -#include - -#include - -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include - - -#include "config.h" - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int ILLEGAL_COLUMN; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} - -template -concept HasIndexOperator = requires (T t) -{ - t[0]; -}; - -/// Functions to parse JSONs and extract values from it. -/// The first argument of all these functions gets a JSON, -/// after that there are any number of arguments specifying path to a desired part from the JSON's root. -/// For example, -/// select JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) = -100 - -class FunctionJSONHelpers -{ -public: - template typename Impl, class JSONParser> - class Executor - { - public: - static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) - { - MutableColumnPtr to{result_type->createColumn()}; - to->reserve(input_rows_count); - - if (arguments.empty()) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument", String(Name::name)); - - const auto & first_column = arguments[0]; - if (!isString(first_column.type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "The first argument of function {} should be a string containing JSON, illegal type: " - "{}", String(Name::name), first_column.type->getName()); - - const ColumnPtr & arg_json = first_column.column; - const auto * col_json_const = typeid_cast(arg_json.get()); - const auto * col_json_string - = typeid_cast(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get()); - - if (!col_json_string) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {}", arg_json->getName()); - - const ColumnString::Chars & chars = col_json_string->getChars(); - const ColumnString::Offsets & offsets = col_json_string->getOffsets(); - - size_t num_index_arguments = Impl::getNumberOfIndexArguments(arguments); - std::vector moves = prepareMoves(Name::name, arguments, 1, num_index_arguments); - - /// Preallocate memory in parser if necessary. - JSONParser parser; - if constexpr (has_member_function_reserve::value) - { - size_t max_size = calculateMaxSize(offsets); - if (max_size) - parser.reserve(max_size); - } - - Impl impl; - - /// prepare() does Impl-specific preparation before handling each row. - if constexpr (has_member_function_prepare::*)(const char *, const ColumnsWithTypeAndName &, const DataTypePtr &)>::value) - impl.prepare(Name::name, arguments, result_type); - - using Element = typename JSONParser::Element; - - Element document; - bool document_ok = false; - if (col_json_const) - { - std::string_view json{reinterpret_cast(chars.data()), offsets[0] - 1}; - document_ok = parser.parse(json, document); - } - - for (const auto i : collections::range(0, input_rows_count)) - { - if (!col_json_const) - { - std::string_view json{reinterpret_cast(&chars[offsets[i - 1]]), offsets[i] - offsets[i - 1] - 1}; - document_ok = parser.parse(json, document); - } - - bool added_to_column = false; - if (document_ok) - { - /// Perform moves. - Element element; - std::string_view last_key; - bool moves_ok = performMoves(arguments, i, document, moves, element, last_key); - - if (moves_ok) - added_to_column = impl.insertResultToColumn(*to, element, last_key); - } - - /// We add default value (=null or zero) if something goes wrong, we don't throw exceptions in these JSON functions. - if (!added_to_column) - to->insertDefault(); - } - return to; - } - }; - -private: - BOOST_TTI_HAS_MEMBER_FUNCTION(reserve) - BOOST_TTI_HAS_MEMBER_FUNCTION(prepare) - - /// Represents a move of a JSON iterator described by a single argument passed to a JSON function. - /// For example, the call JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) - /// contains two moves: {MoveType::ConstKey, "b"} and {MoveType::ConstIndex, 1}. - /// Keys and indices can be nonconst, in this case they are calculated for each row. - enum class MoveType : uint8_t - { - Key, - Index, - ConstKey, - ConstIndex, - }; - - struct Move - { - explicit Move(MoveType type_, size_t index_ = 0) : type(type_), index(index_) {} - Move(MoveType type_, const String & key_) : type(type_), key(key_) {} - MoveType type; - size_t index = 0; - String key; - }; - - static std::vector prepareMoves( - const char * function_name, - const ColumnsWithTypeAndName & columns, - size_t first_index_argument, - size_t num_index_arguments) - { - std::vector moves; - moves.reserve(num_index_arguments); - for (const auto i : collections::range(first_index_argument, first_index_argument + num_index_arguments)) - { - const auto & column = columns[i]; - if (!isString(column.type) && !isNativeInteger(column.type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "The argument {} of function {} should be a string specifying key " - "or an integer specifying index, illegal type: {}", - std::to_string(i + 1), String(function_name), column.type->getName()); - - if (column.column && isColumnConst(*column.column)) - { - const auto & column_const = assert_cast(*column.column); - if (isString(column.type)) - moves.emplace_back(MoveType::ConstKey, column_const.getValue()); - else - moves.emplace_back(MoveType::ConstIndex, column_const.getInt(0)); - } - else - { - if (isString(column.type)) - moves.emplace_back(MoveType::Key, ""); - else - moves.emplace_back(MoveType::Index, 0); - } - } - return moves; - } - - - /// Performs moves of types MoveType::Index and MoveType::ConstIndex. - template - static bool performMoves(const ColumnsWithTypeAndName & arguments, size_t row, - const typename JSONParser::Element & document, const std::vector & moves, - typename JSONParser::Element & element, std::string_view & last_key) - { - typename JSONParser::Element res_element = document; - std::string_view key; - - for (size_t j = 0; j != moves.size(); ++j) - { - switch (moves[j].type) - { - case MoveType::ConstIndex: - { - if (!moveToElementByIndex(res_element, static_cast(moves[j].index), key)) - return false; - break; - } - case MoveType::ConstKey: - { - key = moves[j].key; - if (!moveToElementByKey(res_element, key)) - return false; - break; - } - case MoveType::Index: - { - Int64 index = (*arguments[j + 1].column)[row].get(); - if (!moveToElementByIndex(res_element, static_cast(index), key)) - return false; - break; - } - case MoveType::Key: - { - key = arguments[j + 1].column->getDataAt(row).toView(); - if (!moveToElementByKey(res_element, key)) - return false; - break; - } - } - } - - element = res_element; - last_key = key; - return true; - } - - template - static bool moveToElementByIndex(typename JSONParser::Element & element, int index, std::string_view & out_key) - { - if (element.isArray()) - { - auto array = element.getArray(); - if (index >= 0) - --index; - else - index += array.size(); - - if (static_cast(index) >= array.size()) - return false; - element = array[index]; - out_key = {}; - return true; - } - - if constexpr (HasIndexOperator) - { - if (element.isObject()) - { - auto object = element.getObject(); - if (index >= 0) - --index; - else - index += object.size(); - - if (static_cast(index) >= object.size()) - return false; - std::tie(out_key, element) = object[index]; - return true; - } - } - - return {}; - } - - /// Performs moves of types MoveType::Key and MoveType::ConstKey. - template - static bool moveToElementByKey(typename JSONParser::Element & element, std::string_view key) - { - if (!element.isObject()) - return false; - auto object = element.getObject(); - return object.find(key, element); - } - - static size_t calculateMaxSize(const ColumnString::Offsets & offsets) - { - size_t max_size = 0; - for (const auto i : collections::range(0, offsets.size())) - { - size_t size = offsets[i] - offsets[i - 1]; - max_size = std::max(max_size, size); - } - if (max_size) - --max_size; - return max_size; - } - -}; - -template -class JSONExtractImpl; - -template -class JSONExtractKeysAndValuesImpl; - -/** -* Functions JSONExtract and JSONExtractKeysAndValues force the return type - it is specified in the last argument. -* For example - `SELECT JSONExtract(materialize('{"a": 131231, "b": 1234}'), 'b', 'LowCardinality(FixedString(4))')` -* But by default ClickHouse decides on its own whether the return type will be LowCardinality based on the types of -* input arguments. -* And for these specific functions we cannot rely on this mechanism, so these functions have their own implementation - -* just convert all of the LowCardinality input columns to full ones, execute and wrap the resulting column in LowCardinality -* if needed. -*/ -template typename Impl> -constexpr bool functionForcesTheReturnType() -{ - return std::is_same_v, JSONExtractImpl> || std::is_same_v, JSONExtractKeysAndValuesImpl>; -} - -template typename Impl> -class ExecutableFunctionJSON : public IExecutableFunction -{ - -public: - explicit ExecutableFunctionJSON(const NullPresence & null_presence_, bool allow_simdjson_, const DataTypePtr & json_return_type_) - : null_presence(null_presence_), allow_simdjson(allow_simdjson_), json_return_type(json_return_type_) - { - } - - String getName() const override { return Name::name; } - bool useDefaultImplementationForNulls() const override { return false; } - bool useDefaultImplementationForConstants() const override { return true; } - bool useDefaultImplementationForLowCardinalityColumns() const override - { - return !functionForcesTheReturnType(); - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override - { - if (null_presence.has_null_constant) - return result_type->createColumnConstWithDefaultValue(input_rows_count); - - if constexpr (functionForcesTheReturnType()) - { - ColumnsWithTypeAndName columns_without_low_cardinality = arguments; - - for (auto & column : columns_without_low_cardinality) - { - column.column = recursiveRemoveLowCardinality(column.column); - column.type = recursiveRemoveLowCardinality(column.type); - } - - ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(columns_without_low_cardinality) : columns_without_low_cardinality; - ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); - - if (null_presence.has_nullable) - temporary_result = wrapInNullable(temporary_result, columns_without_low_cardinality, result_type, input_rows_count); - - if (result_type->lowCardinality()) - temporary_result = recursiveLowCardinalityTypeConversion(temporary_result, json_return_type, result_type); - - return temporary_result; - } - else - { - ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(arguments) : arguments; - ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); - - if (null_presence.has_nullable) - temporary_result = wrapInNullable(temporary_result, arguments, result_type, input_rows_count); - - if (result_type->lowCardinality()) - temporary_result = recursiveLowCardinalityTypeConversion(temporary_result, json_return_type, result_type); - - return temporary_result; - } - } - -private: - - ColumnPtr - chooseAndRunJSONParser(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const - { -#if USE_SIMDJSON - if (allow_simdjson) - return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count); -#endif - -#if USE_RAPIDJSON - return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count); -#else - return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count); -#endif - } - - NullPresence null_presence; - bool allow_simdjson; - DataTypePtr json_return_type; -}; - - -template typename Impl> -class FunctionBaseFunctionJSON : public IFunctionBase -{ -public: - explicit FunctionBaseFunctionJSON( - const NullPresence & null_presence_, - bool allow_simdjson_, - DataTypes argument_types_, - DataTypePtr return_type_, - DataTypePtr json_return_type_) - : null_presence(null_presence_) - , allow_simdjson(allow_simdjson_) - , argument_types(std::move(argument_types_)) - , return_type(std::move(return_type_)) - , json_return_type(std::move(json_return_type_)) - { - } - - String getName() const override { return Name::name; } - - const DataTypes & getArgumentTypes() const override - { - return argument_types; - } - - const DataTypePtr & getResultType() const override - { - return return_type; - } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - - ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName &) const override - { - return std::make_unique>(null_presence, allow_simdjson, json_return_type); - } - -private: - NullPresence null_presence; - bool allow_simdjson; - DataTypes argument_types; - DataTypePtr return_type; - DataTypePtr json_return_type; -}; - -/// We use IFunctionOverloadResolver instead of IFunction to handle non-default NULL processing. -/// Both NULL and JSON NULL should generate NULL value. If any argument is NULL, return NULL. -template typename Impl> -class JSONOverloadResolver : public IFunctionOverloadResolver, WithContext -{ -public: - static constexpr auto name = Name::name; - - String getName() const override { return name; } - - static FunctionOverloadResolverPtr create(ContextPtr context_) - { - return std::make_unique(context_); - } - - explicit JSONOverloadResolver(ContextPtr context_) : WithContext(context_) {} - - bool isVariadic() const override { return true; } - size_t getNumberOfArguments() const override { return 0; } - bool useDefaultImplementationForNulls() const override { return false; } - bool useDefaultImplementationForLowCardinalityColumns() const override - { - return !functionForcesTheReturnType(); - } - - FunctionBasePtr build(const ColumnsWithTypeAndName & arguments) const override - { - bool has_nothing_argument = false; - for (const auto & arg : arguments) - has_nothing_argument |= isNothing(arg.type); - - DataTypePtr json_return_type = Impl::getReturnType(Name::name, createBlockWithNestedColumns(arguments)); - NullPresence null_presence = getNullPresense(arguments); - DataTypePtr return_type; - if (has_nothing_argument) - return_type = std::make_shared(); - else if (null_presence.has_null_constant) - return_type = makeNullable(std::make_shared()); - else if (null_presence.has_nullable) - return_type = makeNullable(json_return_type); - else - return_type = json_return_type; - - /// Top-level LowCardinality columns are processed outside JSON parser. - json_return_type = removeLowCardinality(json_return_type); - - DataTypes argument_types; - argument_types.reserve(arguments.size()); - for (const auto & argument : arguments) - argument_types.emplace_back(argument.type); - return std::make_unique>( - null_presence, getContext()->getSettingsRef().allow_simdjson, argument_types, return_type, json_return_type); - } -}; - -struct NameJSONHas { static constexpr auto name{"JSONHas"}; }; -struct NameIsValidJSON { static constexpr auto name{"isValidJSON"}; }; -struct NameJSONLength { static constexpr auto name{"JSONLength"}; }; -struct NameJSONKey { static constexpr auto name{"JSONKey"}; }; -struct NameJSONType { static constexpr auto name{"JSONType"}; }; -struct NameJSONExtractInt { static constexpr auto name{"JSONExtractInt"}; }; -struct NameJSONExtractUInt { static constexpr auto name{"JSONExtractUInt"}; }; -struct NameJSONExtractFloat { static constexpr auto name{"JSONExtractFloat"}; }; -struct NameJSONExtractBool { static constexpr auto name{"JSONExtractBool"}; }; -struct NameJSONExtractString { static constexpr auto name{"JSONExtractString"}; }; -struct NameJSONExtract { static constexpr auto name{"JSONExtract"}; }; -struct NameJSONExtractKeysAndValues { static constexpr auto name{"JSONExtractKeysAndValues"}; }; -struct NameJSONExtractRaw { static constexpr auto name{"JSONExtractRaw"}; }; -struct NameJSONExtractArrayRaw { static constexpr auto name{"JSONExtractArrayRaw"}; }; -struct NameJSONExtractKeysAndValuesRaw { static constexpr auto name{"JSONExtractKeysAndValuesRaw"}; }; -struct NameJSONExtractKeys { static constexpr auto name{"JSONExtractKeys"}; }; - - -template -class JSONHasImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared(); } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view) - { - ColumnVector & col_vec = assert_cast &>(dest); - col_vec.insertValue(1); - return true; - } -}; - - -template -class IsValidJSONImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) - { - if (arguments.size() != 1) - { - /// IsValidJSON() shouldn't get parameters other than JSON. - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} needs exactly one argument", - String(function_name)); - } - return std::make_shared(); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName &) { return 0; } - - static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view) - { - /// This function is called only if JSON is valid. - /// If JSON isn't valid then `FunctionJSON::Executor::run()` adds default value (=zero) to `dest` without calling this function. - ColumnVector & col_vec = assert_cast &>(dest); - col_vec.insertValue(1); - return true; - } -}; - - -template -class JSONLengthImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_shared(); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - size_t size; - if (element.isArray()) - size = element.getArray().size(); - else if (element.isObject()) - size = element.getObject().size(); - else - return false; - - ColumnVector & col_vec = assert_cast &>(dest); - col_vec.insertValue(size); - return true; - } -}; - - -template -class JSONKeyImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_shared(); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view last_key) - { - if (last_key.empty()) - return false; - ColumnString & col_str = assert_cast(dest); - col_str.insertData(last_key.data(), last_key.size()); - return true; - } -}; - - -template -class JSONTypeImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - static const std::vector> values = { - {"Array", '['}, - {"Object", '{'}, - {"String", '"'}, - {"Int64", 'i'}, - {"UInt64", 'u'}, - {"Double", 'd'}, - {"Bool", 'b'}, - {"Null", 0}, /// the default value for the column. - }; - return std::make_shared>(values); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - UInt8 type; - switch (element.type()) - { - case ElementType::INT64: - type = 'i'; - break; - case ElementType::UINT64: - type = 'u'; - break; - case ElementType::DOUBLE: - type = 'd'; - break; - case ElementType::STRING: - type = '"'; - break; - case ElementType::ARRAY: - type = '['; - break; - case ElementType::OBJECT: - type = '{'; - break; - case ElementType::BOOL: - type = 'b'; - break; - case ElementType::NULL_VALUE: - type = 0; - break; - } - - ColumnVector & col_vec = assert_cast &>(dest); - col_vec.insertValue(type); - return true; - } -}; - - -template -class JSONExtractNumericImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_shared>(); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - NumberType value; - - switch (element.type()) - { - case ElementType::DOUBLE: - if constexpr (std::is_floating_point_v) - { - /// We permit inaccurate conversion of double to float. - /// Example: double 0.1 from JSON is not representable in float. - /// But it will be more convenient for user to perform conversion. - value = static_cast(element.getDouble()); - } - else if (!accurate::convertNumeric(element.getDouble(), value)) - return false; - break; - case ElementType::UINT64: - if (!accurate::convertNumeric(element.getUInt64(), value)) - return false; - break; - case ElementType::INT64: - if (!accurate::convertNumeric(element.getInt64(), value)) - return false; - break; - case ElementType::BOOL: - if constexpr (is_integer && convert_bool_to_integer) - { - value = static_cast(element.getBool()); - break; - } - return false; - case ElementType::STRING: - { - auto rb = ReadBufferFromMemory{element.getString()}; - if constexpr (std::is_floating_point_v) - { - if (!tryReadFloatText(value, rb) || !rb.eof()) - return false; - } - else - { - if (tryReadIntText(value, rb) && rb.eof()) - break; - - /// Try to parse float and convert it to integer. - Float64 tmp_float; - rb.position() = rb.buffer().begin(); - if (!tryReadFloatText(tmp_float, rb) || !rb.eof()) - return false; - - if (!accurate::convertNumeric(tmp_float, value)) - return false; - } - break; - } - default: - return false; - } - - if (dest.getDataType() == TypeIndex::LowCardinality) - { - ColumnLowCardinality & col_low = assert_cast(dest); - col_low.insertData(reinterpret_cast(&value), sizeof(value)); - } - else - { - auto & col_vec = assert_cast &>(dest); - col_vec.insertValue(value); - } - return true; - } -}; - - -template -using JSONExtractInt64Impl = JSONExtractNumericImpl; -template -using JSONExtractUInt64Impl = JSONExtractNumericImpl; -template -using JSONExtractFloat64Impl = JSONExtractNumericImpl; - - -template -class JSONExtractBoolImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_shared(); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - bool value; - switch (element.type()) - { - case ElementType::BOOL: - value = element.getBool(); - break; - case ElementType::INT64: - value = element.getInt64() != 0; - break; - case ElementType::UINT64: - value = element.getUInt64() != 0; - break; - default: - return false; - } - - auto & col_vec = assert_cast &>(dest); - col_vec.insertValue(static_cast(value)); - return true; - } -}; - -template -class JSONExtractRawImpl; - -template -class JSONExtractStringImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_shared(); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - if (element.isNull()) - return false; - - if (!element.isString()) - return JSONExtractRawImpl::insertResultToColumn(dest, element, {}); - - auto str = element.getString(); - - if (dest.getDataType() == TypeIndex::LowCardinality) - { - ColumnLowCardinality & col_low = assert_cast(dest); - col_low.insertData(str.data(), str.size()); - } - else - { - ColumnString & col_str = assert_cast(dest); - col_str.insertData(str.data(), str.size()); - } - return true; - } -}; - -/// Nodes of the extract tree. We need the extract tree to extract from JSON complex values containing array, tuples or nullables. -template -struct JSONExtractTree -{ - using Element = typename JSONParser::Element; - - class Node - { - public: - Node() = default; - virtual ~Node() = default; - virtual bool insertResultToColumn(IColumn &, const Element &) = 0; - }; - - template - class NumericNode : public Node - { - public: - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - return JSONExtractNumericImpl::insertResultToColumn(dest, element, {}); - } - }; - - class LowCardinalityFixedStringNode : public Node - { - public: - explicit LowCardinalityFixedStringNode(const size_t fixed_length_) : fixed_length(fixed_length_) { } - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - // If element is an object we delegate the insertion to JSONExtractRawImpl - if (element.isObject()) - return JSONExtractRawImpl::insertResultToLowCardinalityFixedStringColumn(dest, element, fixed_length); - else if (!element.isString()) - return false; - - auto str = element.getString(); - if (str.size() > fixed_length) - return false; - - // For the non low cardinality case of FixedString, the padding is done in the FixedString Column implementation. - // In order to avoid having to pass the data to a FixedString Column and read it back (which would slow down the execution) - // the data is padded here and written directly to the Low Cardinality Column - if (str.size() == fixed_length) - { - assert_cast(dest).insertData(str.data(), str.size()); - } - else - { - String padded_str(str); - padded_str.resize(fixed_length, '\0'); - - assert_cast(dest).insertData(padded_str.data(), padded_str.size()); - } - return true; - } - - private: - const size_t fixed_length; - }; - - class UUIDNode : public Node - { - public: - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - if (!element.isString()) - return false; - - auto uuid = parseFromString(element.getString()); - if (dest.getDataType() == TypeIndex::LowCardinality) - { - ColumnLowCardinality & col_low = assert_cast(dest); - col_low.insertData(reinterpret_cast(&uuid), sizeof(uuid)); - } - else - { - assert_cast(dest).insert(uuid); - } - return true; - } - }; - - template - class DecimalNode : public Node - { - public: - explicit DecimalNode(DataTypePtr data_type_) : data_type(data_type_) {} - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - const auto * type = assert_cast *>(data_type.get()); - - DecimalType value{}; - - switch (element.type()) - { - case ElementType::DOUBLE: - value = convertToDecimal, DataTypeDecimal>( - element.getDouble(), type->getScale()); - break; - case ElementType::UINT64: - value = convertToDecimal, DataTypeDecimal>( - element.getUInt64(), type->getScale()); - break; - case ElementType::INT64: - value = convertToDecimal, DataTypeDecimal>( - element.getInt64(), type->getScale()); - break; - case ElementType::STRING: { - auto rb = ReadBufferFromMemory{element.getString()}; - if (!SerializationDecimal::tryReadText(value, rb, DecimalUtils::max_precision, type->getScale())) - return false; - break; - } - default: - return false; - } - - assert_cast &>(dest).insertValue(value); - return true; - } - - private: - DataTypePtr data_type; - }; - - class StringNode : public Node - { - public: - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - return JSONExtractStringImpl::insertResultToColumn(dest, element, {}); - } - }; - - class FixedStringNode : public Node - { - public: - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - if (element.isNull()) - return false; - - if (!element.isString()) - return JSONExtractRawImpl::insertResultToFixedStringColumn(dest, element, {}); - - auto str = element.getString(); - auto & col_str = assert_cast(dest); - if (str.size() > col_str.getN()) - return false; - col_str.insertData(str.data(), str.size()); - - return true; - } - }; - - template - class EnumNode : public Node - { - public: - explicit EnumNode(const std::vector> & name_value_pairs_) : name_value_pairs(name_value_pairs_) - { - for (const auto & name_value_pair : name_value_pairs) - { - name_to_value_map.emplace(name_value_pair.first, name_value_pair.second); - only_values.emplace(name_value_pair.second); - } - } - - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - auto & col_vec = assert_cast &>(dest); - - if (element.isInt64()) - { - Type value; - if (!accurate::convertNumeric(element.getInt64(), value) || !only_values.contains(value)) - return false; - col_vec.insertValue(value); - return true; - } - - if (element.isUInt64()) - { - Type value; - if (!accurate::convertNumeric(element.getUInt64(), value) || !only_values.contains(value)) - return false; - col_vec.insertValue(value); - return true; - } - - if (element.isString()) - { - auto value = name_to_value_map.find(element.getString()); - if (value == name_to_value_map.end()) - return false; - col_vec.insertValue(value->second); - return true; - } - - return false; - } - - private: - std::vector> name_value_pairs; - std::unordered_map name_to_value_map; - std::unordered_set only_values; - }; - - class NullableNode : public Node - { - public: - explicit NullableNode(std::unique_ptr nested_) : nested(std::move(nested_)) {} - - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - if (dest.getDataType() == TypeIndex::LowCardinality) - { - /// We do not need to handle nullability in that case - /// because nested node handles LowCardinality columns and will call proper overload of `insertData` - return nested->insertResultToColumn(dest, element); - } - - ColumnNullable & col_null = assert_cast(dest); - if (!nested->insertResultToColumn(col_null.getNestedColumn(), element)) - return false; - col_null.getNullMapColumn().insertValue(0); - return true; - } - - private: - std::unique_ptr nested; - }; - - class ArrayNode : public Node - { - public: - explicit ArrayNode(std::unique_ptr nested_) : nested(std::move(nested_)) {} - - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - if (!element.isArray()) - return false; - - auto array = element.getArray(); - - ColumnArray & col_arr = assert_cast(dest); - auto & data = col_arr.getData(); - size_t old_size = data.size(); - bool were_valid_elements = false; - - for (auto value : array) - { - if (nested->insertResultToColumn(data, value)) - were_valid_elements = true; - else - data.insertDefault(); - } - - if (!were_valid_elements) - { - data.popBack(data.size() - old_size); - return false; - } - - col_arr.getOffsets().push_back(data.size()); - return true; - } - - private: - std::unique_ptr nested; - }; - - class TupleNode : public Node - { - public: - TupleNode(std::vector> nested_, const std::vector & explicit_names_) : nested(std::move(nested_)), explicit_names(explicit_names_) - { - for (size_t i = 0; i != explicit_names.size(); ++i) - name_to_index_map.emplace(explicit_names[i], i); - } - - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - ColumnTuple & tuple = assert_cast(dest); - size_t old_size = dest.size(); - bool were_valid_elements = false; - - auto set_size = [&](size_t size) - { - for (size_t i = 0; i != tuple.tupleSize(); ++i) - { - auto & col = tuple.getColumn(i); - if (col.size() != size) - { - if (col.size() > size) - col.popBack(col.size() - size); - else - while (col.size() < size) - col.insertDefault(); - } - } - }; - - if (element.isArray()) - { - auto array = element.getArray(); - auto it = array.begin(); - - for (size_t index = 0; (index != nested.size()) && (it != array.end()); ++index) - { - if (nested[index]->insertResultToColumn(tuple.getColumn(index), *it++)) - were_valid_elements = true; - else - tuple.getColumn(index).insertDefault(); - } - - set_size(old_size + static_cast(were_valid_elements)); - return were_valid_elements; - } - - if (element.isObject()) - { - auto object = element.getObject(); - if (name_to_index_map.empty()) - { - auto it = object.begin(); - for (size_t index = 0; (index != nested.size()) && (it != object.end()); ++index) - { - if (nested[index]->insertResultToColumn(tuple.getColumn(index), (*it++).second)) - were_valid_elements = true; - else - tuple.getColumn(index).insertDefault(); - } - } - else - { - for (const auto & [key, value] : object) - { - auto index = name_to_index_map.find(key); - if (index != name_to_index_map.end()) - { - if (nested[index->second]->insertResultToColumn(tuple.getColumn(index->second), value)) - were_valid_elements = true; - } - } - } - - set_size(old_size + static_cast(were_valid_elements)); - return were_valid_elements; - } - - return false; - } - - private: - std::vector> nested; - std::vector explicit_names; - std::unordered_map name_to_index_map; - }; - - class MapNode : public Node - { - public: - MapNode(std::unique_ptr key_, std::unique_ptr value_) : key(std::move(key_)), value(std::move(value_)) { } - - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - if (!element.isObject()) - return false; - - ColumnMap & map_col = assert_cast(dest); - auto & offsets = map_col.getNestedColumn().getOffsets(); - auto & tuple_col = map_col.getNestedData(); - auto & key_col = tuple_col.getColumn(0); - auto & value_col = tuple_col.getColumn(1); - size_t old_size = tuple_col.size(); - - auto object = element.getObject(); - auto it = object.begin(); - for (; it != object.end(); ++it) - { - auto pair = *it; - - /// Insert key - key_col.insertData(pair.first.data(), pair.first.size()); - - /// Insert value - if (!value->insertResultToColumn(value_col, pair.second)) - value_col.insertDefault(); - } - - offsets.push_back(old_size + object.size()); - return true; - } - - private: - std::unique_ptr key; - std::unique_ptr value; - }; - - class VariantNode : public Node - { - public: - VariantNode(std::vector> variant_nodes_, std::vector order_) : variant_nodes(std::move(variant_nodes_)), order(std::move(order_)) { } - - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - auto & column_variant = assert_cast(dest); - for (size_t i : order) - { - auto & variant = column_variant.getVariantByGlobalDiscriminator(i); - if (variant_nodes[i]->insertResultToColumn(variant, element)) - { - column_variant.getLocalDiscriminators().push_back(column_variant.localDiscriminatorByGlobal(i)); - column_variant.getOffsets().push_back(variant.size() - 1); - return true; - } - } - - return false; - } - - private: - std::vector> variant_nodes; - /// Order in which we should try variants nodes. - /// For example, String should be always the last one. - std::vector order; - }; - - static std::unique_ptr build(const char * function_name, const DataTypePtr & type) - { - switch (type->getTypeId()) - { - case TypeIndex::UInt8: return std::make_unique>(); - case TypeIndex::UInt16: return std::make_unique>(); - case TypeIndex::UInt32: return std::make_unique>(); - case TypeIndex::UInt64: return std::make_unique>(); - case TypeIndex::UInt128: return std::make_unique>(); - case TypeIndex::UInt256: return std::make_unique>(); - case TypeIndex::Int8: return std::make_unique>(); - case TypeIndex::Int16: return std::make_unique>(); - case TypeIndex::Int32: return std::make_unique>(); - case TypeIndex::Int64: return std::make_unique>(); - case TypeIndex::Int128: return std::make_unique>(); - case TypeIndex::Int256: return std::make_unique>(); - case TypeIndex::Float32: return std::make_unique>(); - case TypeIndex::Float64: return std::make_unique>(); - case TypeIndex::String: return std::make_unique(); - case TypeIndex::FixedString: return std::make_unique(); - case TypeIndex::UUID: return std::make_unique(); - case TypeIndex::LowCardinality: - { - // The low cardinality case is treated in two different ways: - // For FixedString type, an especial class is implemented for inserting the data in the destination column, - // as the string length must be passed in order to check and pad the incoming data. - // For the rest of low cardinality types, the insertion is done in their corresponding class, adapting the data - // as needed for the insertData function of the ColumnLowCardinality. - auto dictionary_type = typeid_cast(type.get())->getDictionaryType(); - if ((*dictionary_type).getTypeId() == TypeIndex::FixedString) - { - auto fixed_length = typeid_cast(dictionary_type.get())->getN(); - return std::make_unique(fixed_length); - } - return build(function_name, dictionary_type); - } - case TypeIndex::Decimal256: return std::make_unique>(type); - case TypeIndex::Decimal128: return std::make_unique>(type); - case TypeIndex::Decimal64: return std::make_unique>(type); - case TypeIndex::Decimal32: return std::make_unique>(type); - case TypeIndex::Enum8: - return std::make_unique>(static_cast(*type).getValues()); - case TypeIndex::Enum16: - return std::make_unique>(static_cast(*type).getValues()); - case TypeIndex::Nullable: - { - return std::make_unique(build(function_name, static_cast(*type).getNestedType())); - } - case TypeIndex::Array: - { - return std::make_unique(build(function_name, static_cast(*type).getNestedType())); - } - case TypeIndex::Tuple: - { - const auto & tuple = static_cast(*type); - const auto & tuple_elements = tuple.getElements(); - std::vector> elements; - elements.reserve(tuple_elements.size()); - for (const auto & tuple_element : tuple_elements) - elements.emplace_back(build(function_name, tuple_element)); - return std::make_unique(std::move(elements), tuple.haveExplicitNames() ? tuple.getElementNames() : Strings{}); - } - case TypeIndex::Map: - { - const auto & map_type = static_cast(*type); - const auto & key_type = map_type.getKeyType(); - if (!isString(removeLowCardinality(key_type))) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Function {} doesn't support the return type schema: {} with key type not String", - String(function_name), - type->getName()); - - const auto & value_type = map_type.getValueType(); - return std::make_unique(build(function_name, key_type), build(function_name, value_type)); - } - case TypeIndex::Variant: - { - const auto & variant_type = static_cast(*type); - const auto & variants = variant_type.getVariants(); - std::vector> variant_nodes; - variant_nodes.reserve(variants.size()); - for (const auto & variant : variants) - variant_nodes.push_back(build(function_name, variant)); - return std::make_unique(std::move(variant_nodes), SerializationVariant::getVariantsDeserializeTextOrder(variants)); - } - default: - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Function {} doesn't support the return type schema: {}", - String(function_name), type->getName()); - } - } -}; - - -template -class JSONExtractImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) - { - if (arguments.size() < 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least two arguments", String(function_name)); - - const auto & col = arguments.back(); - const auto * col_type_const = typeid_cast(col.column.get()); - if (!col_type_const || !isString(col.type)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "The last argument of function {} should " - "be a constant string specifying the return data type, illegal value: {}", - String(function_name), col.name); - - return DataTypeFactory::instance().get(col_type_const->getValue()); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 2; } - - void prepare(const char * function_name, const ColumnsWithTypeAndName &, const DataTypePtr & result_type) - { - extract_tree = JSONExtractTree::build(function_name, result_type); - } - - bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - return extract_tree->insertResultToColumn(dest, element); - } - -protected: - std::unique_ptr::Node> extract_tree; -}; - - -template -class JSONExtractKeysAndValuesImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) - { - if (arguments.size() < 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least two arguments", String(function_name)); - - const auto & col = arguments.back(); - const auto * col_type_const = typeid_cast(col.column.get()); - if (!col_type_const || !isString(col.type)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "The last argument of function {} should " - "be a constant string specifying the values' data type, illegal value: {}", - String(function_name), col.name); - - DataTypePtr key_type = std::make_unique(); - DataTypePtr value_type = DataTypeFactory::instance().get(col_type_const->getValue()); - DataTypePtr tuple_type = std::make_unique(DataTypes{key_type, value_type}); - return std::make_unique(tuple_type); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 2; } - - void prepare(const char * function_name, const ColumnsWithTypeAndName &, const DataTypePtr & result_type) - { - const auto tuple_type = typeid_cast(result_type.get())->getNestedType(); - const auto value_type = typeid_cast(tuple_type.get())->getElements()[1]; - extract_tree = JSONExtractTree::build(function_name, value_type); - } - - bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - if (!element.isObject()) - return false; - - auto object = element.getObject(); - - auto & col_arr = assert_cast(dest); - auto & col_tuple = assert_cast(col_arr.getData()); - size_t old_size = col_tuple.size(); - auto & col_key = assert_cast(col_tuple.getColumn(0)); - auto & col_value = col_tuple.getColumn(1); - - for (const auto & [key, value] : object) - { - if (extract_tree->insertResultToColumn(col_value, value)) - col_key.insertData(key.data(), key.size()); - } - - if (col_tuple.size() == old_size) - return false; - - col_arr.getOffsets().push_back(col_tuple.size()); - return true; - } - -private: - std::unique_ptr::Node> extract_tree; -}; - - -template -class JSONExtractRawImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_shared(); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - if (dest.getDataType() == TypeIndex::LowCardinality) - { - ColumnString::Chars chars; - WriteBufferFromVector buf(chars, AppendModeTag()); - traverse(element, buf); - buf.finalize(); - assert_cast(dest).insertData(reinterpret_cast(chars.data()), chars.size()); - } - else - { - ColumnString & col_str = assert_cast(dest); - auto & chars = col_str.getChars(); - WriteBufferFromVector buf(chars, AppendModeTag()); - traverse(element, buf); - buf.finalize(); - chars.push_back(0); - col_str.getOffsets().push_back(chars.size()); - } - return true; - } - - // We use insertResultToFixedStringColumn in case we are inserting raw data in a FixedString column - static bool insertResultToFixedStringColumn(IColumn & dest, const Element & element, std::string_view) - { - ColumnFixedString::Chars chars; - WriteBufferFromVector buf(chars, AppendModeTag()); - traverse(element, buf); - buf.finalize(); - - auto & col_str = assert_cast(dest); - - if (chars.size() > col_str.getN()) - return false; - - chars.resize_fill(col_str.getN()); - col_str.insertData(reinterpret_cast(chars.data()), chars.size()); - - - return true; - } - - // We use insertResultToLowCardinalityFixedStringColumn in case we are inserting raw data in a Low Cardinality FixedString column - static bool insertResultToLowCardinalityFixedStringColumn(IColumn & dest, const Element & element, size_t fixed_length) - { - if (element.getObject().size() > fixed_length) - return false; - - ColumnFixedString::Chars chars; - WriteBufferFromVector buf(chars, AppendModeTag()); - traverse(element, buf); - buf.finalize(); - - if (chars.size() > fixed_length) - return false; - chars.resize_fill(fixed_length); - assert_cast(dest).insertData(reinterpret_cast(chars.data()), chars.size()); - - return true; - } - -private: - static void traverse(const Element & element, WriteBuffer & buf) - { - if (element.isInt64()) - { - writeIntText(element.getInt64(), buf); - return; - } - if (element.isUInt64()) - { - writeIntText(element.getUInt64(), buf); - return; - } - if (element.isDouble()) - { - writeFloatText(element.getDouble(), buf); - return; - } - if (element.isBool()) - { - if (element.getBool()) - writeCString("true", buf); - else - writeCString("false", buf); - return; - } - if (element.isString()) - { - writeJSONString(element.getString(), buf, formatSettings()); - return; - } - if (element.isArray()) - { - writeChar('[', buf); - bool need_comma = false; - for (auto value : element.getArray()) - { - if (std::exchange(need_comma, true)) - writeChar(',', buf); - traverse(value, buf); - } - writeChar(']', buf); - return; - } - if (element.isObject()) - { - writeChar('{', buf); - bool need_comma = false; - for (auto [key, value] : element.getObject()) - { - if (std::exchange(need_comma, true)) - writeChar(',', buf); - writeJSONString(key, buf, formatSettings()); - writeChar(':', buf); - traverse(value, buf); - } - writeChar('}', buf); - return; - } - if (element.isNull()) - { - writeCString("null", buf); - return; - } - } - - static const FormatSettings & formatSettings() - { - static const FormatSettings the_instance = [] - { - FormatSettings settings; - settings.json.escape_forward_slashes = false; - return settings; - }(); - return the_instance; - } -}; - - -template -class JSONExtractArrayRawImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_shared(std::make_shared()); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - if (!element.isArray()) - return false; - - auto array = element.getArray(); - ColumnArray & col_res = assert_cast(dest); - - for (auto value : array) - JSONExtractRawImpl::insertResultToColumn(col_res.getData(), value, {}); - - col_res.getOffsets().push_back(col_res.getOffsets().back() + array.size()); - return true; - } -}; - - -template -class JSONExtractKeysAndValuesRawImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - DataTypePtr string_type = std::make_unique(); - DataTypePtr tuple_type = std::make_unique(DataTypes{string_type, string_type}); - return std::make_unique(tuple_type); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - if (!element.isObject()) - return false; - - auto object = element.getObject(); - - auto & col_arr = assert_cast(dest); - auto & col_tuple = assert_cast(col_arr.getData()); - auto & col_key = assert_cast(col_tuple.getColumn(0)); - auto & col_value = assert_cast(col_tuple.getColumn(1)); - - for (const auto & [key, value] : object) - { - col_key.insertData(key.data(), key.size()); - JSONExtractRawImpl::insertResultToColumn(col_value, value, {}); - } - - col_arr.getOffsets().push_back(col_arr.getOffsets().back() + object.size()); - return true; - } -}; - -template -class JSONExtractKeysImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_unique(std::make_shared()); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - if (!element.isObject()) - return false; - - auto object = element.getObject(); - - ColumnArray & col_res = assert_cast(dest); - auto & col_key = assert_cast(col_res.getData()); - - for (const auto & [key, value] : object) - { - col_key.insertData(key.data(), key.size()); - } - - col_res.getOffsets().push_back(col_res.getOffsets().back() + object.size()); - return true; - } -}; - -} diff --git a/src/Functions/FunctionsMultiStringFuzzySearch.h b/src/Functions/FunctionsMultiStringFuzzySearch.h index 18b411e9839..a92a6570279 100644 --- a/src/Functions/FunctionsMultiStringFuzzySearch.h +++ b/src/Functions/FunctionsMultiStringFuzzySearch.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include diff --git a/src/Functions/FunctionsMultiStringSearch.h b/src/Functions/FunctionsMultiStringSearch.h index c0ed90aa042..03db2651fd0 100644 --- a/src/Functions/FunctionsMultiStringSearch.h +++ b/src/Functions/FunctionsMultiStringSearch.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Functions/FunctionsRound.h b/src/Functions/FunctionsRound.h index 08e257de8ac..6b65a5feaec 100644 --- a/src/Functions/FunctionsRound.h +++ b/src/Functions/FunctionsRound.h @@ -518,66 +518,78 @@ struct Dispatcher template static ColumnPtr apply(const IColumn * value_col, const IColumn * scale_col = nullptr) { - const auto & value_col_typed = checkAndGetColumn>(*value_col); - auto col_res = ColumnVector::create(); - - typename ColumnVector::Container & vec_res = col_res->getData(); - vec_res.resize(value_col_typed.getData().size()); - - if (!vec_res.empty()) + // Non-const value argument: + const auto * value_col_typed = checkAndGetColumn>(value_col); + if (value_col_typed) { - if (scale_col == nullptr || isColumnConst(*scale_col)) - { - auto scale_arg = (scale_col == nullptr) ? 0 : getScaleArg(checkAndGetColumnConst>(scale_col)); - if (scale_arg == 0) - { - size_t scale = 1; - FunctionRoundingImpl::apply(value_col_typed.getData(), scale, vec_res); - } - else if (scale_arg > 0) - { - size_t scale = intExp10(scale_arg); - FunctionRoundingImpl::apply(value_col_typed.getData(), scale, vec_res); - } - else - { - size_t scale = intExp10(-scale_arg); - FunctionRoundingImpl::apply(value_col_typed.getData(), scale, vec_res); - } - } - /// Non-const scale argument: - else if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) - { - const auto & value_data = value_col_typed.getData(); - const auto & scale_data = scale_col_typed->getData(); - const size_t rows = value_data.size(); + auto col_res = ColumnVector::create(); - for (size_t i = 0; i < rows; ++i) - { - Int64 scale64 = scale_data[i]; - validateScale(scale64); - Scale raw_scale = scale64; + typename ColumnVector::Container & vec_res = col_res->getData(); + vec_res.resize(value_col_typed->getData().size()); - if (raw_scale == 0) + if (!vec_res.empty()) + { + // Const scale argument: + if (scale_col == nullptr || isColumnConst(*scale_col)) + { + auto scale_arg = (scale_col == nullptr) ? 0 : getScaleArg(checkAndGetColumnConst>(scale_col)); + if (scale_arg == 0) { size_t scale = 1; - FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + FunctionRoundingImpl::apply(value_col_typed->getData(), scale, vec_res); } - else if (raw_scale > 0) + else if (scale_arg > 0) { - size_t scale = intExp10(raw_scale); - FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + size_t scale = intExp10(scale_arg); + FunctionRoundingImpl::apply(value_col_typed->getData(), scale, vec_res); } else { - size_t scale = intExp10(-raw_scale); - FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + size_t scale = intExp10(-scale_arg); + FunctionRoundingImpl::apply(value_col_typed->getData(), scale, vec_res); + } + } + /// Non-const scale argument: + else if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) + { + const auto & value_data = value_col_typed->getData(); + const auto & scale_data = scale_col_typed->getData(); + const size_t rows = value_data.size(); + + for (size_t i = 0; i < rows; ++i) + { + Int64 scale64 = scale_data[i]; + validateScale(scale64); + Scale raw_scale = scale64; + + if (raw_scale == 0) + { + size_t scale = 1; + FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + } + else if (raw_scale > 0) + { + size_t scale = intExp10(raw_scale); + FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + } + else + { + size_t scale = intExp10(-raw_scale); + FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + } } } } + return col_res; } - - return col_res; + // Const value argument: + const auto * value_col_typed_const = checkAndGetColumnConst>(value_col); + if (value_col_typed_const) + { + auto value_col_full = value_col_typed_const->convertToFullColumn(); + return apply(value_col_full.get(), scale_col); + } + return nullptr; } }; @@ -589,38 +601,52 @@ public: template static ColumnPtr apply(const IColumn * value_col, const IColumn * scale_col = nullptr) { - const auto & value_col_typed = checkAndGetColumn>(*value_col); - const typename ColumnDecimal::Container & vec_src = value_col_typed.getData(); - - auto col_res = ColumnDecimal::create(vec_src.size(), value_col_typed.getScale()); - auto & vec_res = col_res->getData(); - - if (!vec_res.empty()) + // Non-const value argument: + const auto * value_col_typed = checkAndGetColumn>(value_col); + if (value_col_typed) { - if (scale_col == nullptr || isColumnConst(*scale_col)) - { - auto scale_arg = scale_col == nullptr ? 0 : getScaleArg(checkAndGetColumnConst>(scale_col)); - DecimalRoundingImpl::apply(value_col_typed.getData(), value_col_typed.getScale(), vec_res, scale_arg); - } - /// Non-const scale argument - else if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) - { - const auto & scale = scale_col_typed->getData(); - const size_t rows = vec_src.size(); + const typename ColumnDecimal::Container & vec_src = value_col_typed->getData(); - for (size_t i = 0; i < rows; ++i) + auto col_res = ColumnDecimal::create(vec_src.size(), value_col_typed->getScale()); + auto & vec_res = col_res->getData(); + vec_res.resize(vec_src.size()); + + if (!vec_res.empty()) + { + /// Const scale argument: + if (scale_col == nullptr || isColumnConst(*scale_col)) { - Int64 scale64 = scale[i]; - validateScale(scale64); - Scale raw_scale = scale64; + auto scale_arg = scale_col == nullptr ? 0 : getScaleArg(checkAndGetColumnConst>(scale_col)); + DecimalRoundingImpl::apply(vec_src, value_col_typed->getScale(), vec_res, scale_arg); + } + /// Non-const scale argument: + else if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) + { + const auto & scale = scale_col_typed->getData(); + const size_t rows = vec_src.size(); - DecimalRoundingImpl::applyOne(value_col_typed.getElement(i), value_col_typed.getScale(), - reinterpret_cast::NativeT&>(col_res->getElement(i)), raw_scale); + for (size_t i = 0; i < rows; ++i) + { + Int64 scale64 = scale[i]; + validateScale(scale64); + Scale raw_scale = scale64; + + DecimalRoundingImpl::applyOne(value_col_typed->getElement(i), value_col_typed->getScale(), + reinterpret_cast::NativeT&>(col_res->getElement(i)), raw_scale); + } } } - } - return col_res; + return col_res; + } + // Const value argument: + const auto * value_col_typed_const = checkAndGetColumnConst>(value_col); + if (value_col_typed_const) + { + auto value_col_full = value_col_typed_const->convertToFullColumn(); + return apply(value_col_full.get(), scale_col); + } + return nullptr; } }; @@ -647,7 +673,7 @@ public: FunctionArgumentDescriptors optional_args{ {"N", static_cast(&isNativeInteger), nullptr, "The number of decimal places to round to"}, }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return arguments[0].type; } @@ -671,9 +697,6 @@ public: using ScaleTypes = std::decay_t; using ScaleType = typename ScaleTypes::RightType; - if (isColumnConst(*value_arg.column) && !isColumnConst(*scale_column.column)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale column must be const for const data column"); - res = Dispatcher::template apply(value_arg.column.get(), scale_column.column.get()); return true; }; diff --git a/src/Functions/FunctionsStringSearch.h b/src/Functions/FunctionsStringSearch.h index 924a66c1130..fba6336ebff 100644 --- a/src/Functions/FunctionsStringSearch.h +++ b/src/Functions/FunctionsStringSearch.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Functions/FunctionsTextClassification.h b/src/Functions/FunctionsTextClassification.h index 8e0f236366d..90e8af06ccc 100644 --- a/src/Functions/FunctionsTextClassification.h +++ b/src/Functions/FunctionsTextClassification.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include diff --git a/src/Functions/JSONArrayLength.cpp b/src/Functions/JSONArrayLength.cpp index 84e87061398..73dd55f1266 100644 --- a/src/Functions/JSONArrayLength.cpp +++ b/src/Functions/JSONArrayLength.cpp @@ -48,7 +48,7 @@ namespace {"json", static_cast(&isString), nullptr, "String"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(std::make_shared()); } diff --git a/src/Functions/PerformanceAdaptors.h b/src/Functions/PerformanceAdaptors.h index ef2c788bf43..48daf13ba68 100644 --- a/src/Functions/PerformanceAdaptors.h +++ b/src/Functions/PerformanceAdaptors.h @@ -4,6 +4,7 @@ #include #include +#include #include #include diff --git a/src/Functions/Regexps.h b/src/Functions/Regexps.h index b6bd463212f..b317d786fab 100644 --- a/src/Functions/Regexps.h +++ b/src/Functions/Regexps.h @@ -23,7 +23,11 @@ namespace ProfileEvents { -extern const Event RegexpCreated; + extern const Event RegexpWithMultipleNeedlesCreated; + extern const Event RegexpWithMultipleNeedlesGlobalCacheHit; + extern const Event RegexpWithMultipleNeedlesGlobalCacheMiss; + extern const Event RegexpLocalCacheHit; + extern const Event RegexpLocalCacheMiss; } @@ -72,18 +76,28 @@ public: Bucket & bucket = known_regexps[hasher(pattern) % CACHE_SIZE]; if (bucket.regexp == nullptr) [[unlikely]] + { /// insert new entry + ProfileEvents::increment(ProfileEvents::RegexpLocalCacheMiss); bucket = {pattern, std::make_shared(createRegexp(pattern))}; + } else + { if (pattern != bucket.pattern) + { /// replace existing entry + ProfileEvents::increment(ProfileEvents::RegexpLocalCacheMiss); bucket = {pattern, std::make_shared(createRegexp(pattern))}; + } + else + ProfileEvents::increment(ProfileEvents::RegexpLocalCacheHit); + } return bucket.regexp; } private: - constexpr static size_t CACHE_SIZE = 100; /// collision probability + constexpr static size_t CACHE_SIZE = 1'000; /// collision probability std::hash hasher; struct Bucket @@ -244,7 +258,7 @@ inline Regexps constructRegexps(const std::vector & str_patterns, [[mayb throw Exception(ErrorCodes::BAD_ARGUMENTS, "Pattern '{}' failed with error '{}'", str_patterns[error->expression], String(error->message)); } - ProfileEvents::increment(ProfileEvents::RegexpCreated); + ProfileEvents::increment(ProfileEvents::RegexpWithMultipleNeedlesCreated); /// We allocate the scratch space only once, then copy it across multiple threads with hs_clone_scratch /// function which is faster than allocating scratch space each time in each thread. @@ -322,9 +336,11 @@ inline DeferredConstructedRegexpsPtr getOrSet(const std::vector(str_patterns, edit_distance); }); + ProfileEvents::increment(ProfileEvents::RegexpWithMultipleNeedlesGlobalCacheMiss); bucket = {std::move(str_patterns), edit_distance, deferred_constructed_regexps}; } else + { if (bucket.patterns != str_patterns || bucket.edit_distance != edit_distance) { /// replace existing entry @@ -333,8 +349,12 @@ inline DeferredConstructedRegexpsPtr getOrSet(const std::vector(str_patterns, edit_distance); }); + ProfileEvents::increment(ProfileEvents::RegexpWithMultipleNeedlesGlobalCacheMiss); bucket = {std::move(str_patterns), edit_distance, deferred_constructed_regexps}; } + else + ProfileEvents::increment(ProfileEvents::RegexpWithMultipleNeedlesGlobalCacheHit); + } return bucket.regexps; } diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index 24a40c45c6e..f5fb08f71d2 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -1,9 +1,12 @@ #pragma once -#include #include +#include #include +#include +#include #include +#include namespace DB { @@ -48,45 +51,75 @@ struct ReplaceRegexpImpl static constexpr int max_captures = 10; - static Instructions createInstructions(std::string_view replacement, int num_captures) + /// The replacement string references must not contain non-existing capturing groups. + static void checkSubstitutions(std::string_view replacement, int num_captures) { - Instructions instructions; - - String literals; for (size_t i = 0; i < replacement.size(); ++i) { if (replacement[i] == '\\' && i + 1 < replacement.size()) { - if (isNumericASCII(replacement[i + 1])) /// Substitution + if (isNumericASCII(replacement[i + 1])) /// substitution + { + int substitution_num = replacement[i + 1] - '0'; + if (substitution_num >= num_captures) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Substitution '\\{}' in replacement argument is invalid, regexp has only {} capturing groups", substitution_num, num_captures - 1); + } + } + } + } + + static Instructions createInstructions(std::string_view replacement, int num_captures) + { + checkSubstitutions(replacement, num_captures); + + Instructions instructions; + + String literals; + literals.reserve(replacement.size()); + + for (size_t i = 0; i < replacement.size(); ++i) + { + if (replacement[i] == '\\' && i + 1 < replacement.size()) + { + if (isNumericASCII(replacement[i + 1])) /// substitution { if (!literals.empty()) { instructions.emplace_back(literals); literals = ""; } - instructions.emplace_back(replacement[i + 1] - '0'); + int substitution_num = replacement[i + 1] - '0'; + instructions.emplace_back(substitution_num); } else - literals += replacement[i + 1]; /// Escaping + literals += replacement[i + 1]; /// escaping ++i; } else - literals += replacement[i]; /// Plain character + literals += replacement[i]; /// plain character } if (!literals.empty()) instructions.emplace_back(literals); - for (const auto & instr : instructions) - if (instr.substitution_num >= num_captures) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Id {} in replacement string is an invalid substitution, regexp has only {} capturing groups", - instr.substitution_num, num_captures - 1); - return instructions; } + static bool canFallbackToStringReplacement(const String & needle, const String & replacement, const re2::RE2 & searcher, int num_captures) + { + if (searcher.NumberOfCapturingGroups()) + return false; + + checkSubstitutions(replacement, num_captures); + + String required_substring; + bool is_trivial; + bool required_substring_is_prefix; + std::vector alternatives; + OptimizedRegularExpression::analyze(needle, required_substring, is_trivial, required_substring_is_prefix, alternatives); + return is_trivial && required_substring_is_prefix && required_substring == needle; + } + static void processString( const char * haystack_data, size_t haystack_length, @@ -124,7 +157,7 @@ struct ReplaceRegexpImpl { std::string_view replacement; if (instr.substitution_num >= 0) - replacement = std::string_view(matches[instr.substitution_num].data(), matches[instr.substitution_num].size()); + replacement = {matches[instr.substitution_num].data(), matches[instr.substitution_num].size()}; else replacement = instr.literal; res_data.resize(res_data.size() + replacement.size()); @@ -179,19 +212,32 @@ struct ReplaceRegexpImpl res_offsets.resize(haystack_size); re2::RE2::Options regexp_options; - /// Don't write error messages to stderr. - regexp_options.set_log_errors(false); + regexp_options.set_log_errors(false); /// don't write error messages to stderr re2::RE2 searcher(needle, regexp_options); - if (!searcher.ok()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error()); int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures); + /// Try to use non-regexp string replacement. This shortcut is implemented only for const-needles + const-replacement as + /// pattern analysis incurs some cost too. + if (canFallbackToStringReplacement(needle, replacement, searcher, num_captures)) + { + auto convertTrait = [](ReplaceRegexpTraits::Replace first_or_all) + { + switch (first_or_all) + { + case ReplaceRegexpTraits::Replace::First: return ReplaceStringTraits::Replace::First; + case ReplaceRegexpTraits::Replace::All: return ReplaceStringTraits::Replace::All; + } + }; + ReplaceStringImpl::vectorConstantConstant(haystack_data, haystack_offsets, needle, replacement, res_data, res_offsets); + return; + } + Instructions instructions = createInstructions(replacement, num_captures); - /// Cannot perform search for whole columns. Will process each string separately. for (size_t i = 0; i < haystack_size; ++i) { size_t from = i > 0 ? haystack_offsets[i - 1] : 0; @@ -221,10 +267,8 @@ struct ReplaceRegexpImpl res_offsets.resize(haystack_size); re2::RE2::Options regexp_options; - /// Don't write error messages to stderr. - regexp_options.set_log_errors(false); + regexp_options.set_log_errors(false); /// don't write error messages to stderr - /// Cannot perform search for whole columns. Will process each string separately. for (size_t i = 0; i < haystack_size; ++i) { size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0; @@ -242,6 +286,7 @@ struct ReplaceRegexpImpl re2::RE2 searcher(needle, regexp_options); if (!searcher.ok()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error()); + int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures); Instructions instructions = createInstructions(replacement, num_captures); @@ -270,17 +315,14 @@ struct ReplaceRegexpImpl res_offsets.resize(haystack_size); re2::RE2::Options regexp_options; - /// Don't write error messages to stderr. - regexp_options.set_log_errors(false); + regexp_options.set_log_errors(false); /// don't write error messages to stderr re2::RE2 searcher(needle, regexp_options); - if (!searcher.ok()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error()); int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures); - /// Cannot perform search for whole columns. Will process each string separately. for (size_t i = 0; i < haystack_size; ++i) { size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0; @@ -290,8 +332,9 @@ struct ReplaceRegexpImpl size_t repl_from = i > 0 ? replacement_offsets[i - 1] : 0; const char * repl_data = reinterpret_cast(replacement_data.data() + repl_from); const size_t repl_length = static_cast(replacement_offsets[i] - repl_from - 1); + std::string_view replacement(repl_data, repl_length); - Instructions instructions = createInstructions(std::string_view(repl_data, repl_length), num_captures); + Instructions instructions = createInstructions(replacement, num_captures); processString(hs_data, hs_length, res_data, res_offset, searcher, num_captures, instructions); res_offsets[i] = res_offset; @@ -317,10 +360,8 @@ struct ReplaceRegexpImpl res_offsets.resize(haystack_size); re2::RE2::Options regexp_options; - /// Don't write error messages to stderr. - regexp_options.set_log_errors(false); + regexp_options.set_log_errors(false); /// don't write error messages to stderr - /// Cannot perform search for whole columns. Will process each string separately. for (size_t i = 0; i < haystack_size; ++i) { size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0; @@ -338,12 +379,14 @@ struct ReplaceRegexpImpl size_t repl_from = i > 0 ? replacement_offsets[i - 1] : 0; const char * repl_data = reinterpret_cast(replacement_data.data() + repl_from); const size_t repl_length = static_cast(replacement_offsets[i] - repl_from - 1); + std::string_view replacement(repl_data, repl_length); re2::RE2 searcher(needle, regexp_options); if (!searcher.ok()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error()); + int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures); - Instructions instructions = createInstructions(std::string_view(repl_data, repl_length), num_captures); + Instructions instructions = createInstructions(replacement, num_captures); processString(hs_data, hs_length, res_data, res_offset, searcher, num_captures, instructions); res_offsets[i] = res_offset; @@ -367,16 +410,13 @@ struct ReplaceRegexpImpl res_offsets.resize(haystack_size); re2::RE2::Options regexp_options; - /// Don't write error messages to stderr. - regexp_options.set_log_errors(false); + regexp_options.set_log_errors(false); /// don't write error messages to stderr re2::RE2 searcher(needle, regexp_options); - if (!searcher.ok()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error()); int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures); - Instructions instructions = createInstructions(replacement, num_captures); for (size_t i = 0; i < haystack_size; ++i) diff --git a/src/Functions/URL/URLHierarchy.cpp b/src/Functions/URL/URLHierarchy.cpp index c08f41f06ee..0f565df8172 100644 --- a/src/Functions/URL/URLHierarchy.cpp +++ b/src/Functions/URL/URLHierarchy.cpp @@ -32,7 +32,7 @@ public: {"URL", static_cast(&isString), nullptr, "String"}, }; - validateFunctionArgumentTypes(func, arguments, mandatory_args); + validateFunctionArguments(func, arguments, mandatory_args); } static constexpr auto strings_argument_position = 0uz; diff --git a/src/Functions/URL/URLPathHierarchy.cpp b/src/Functions/URL/URLPathHierarchy.cpp index 7c796116b8d..2cb5995e375 100644 --- a/src/Functions/URL/URLPathHierarchy.cpp +++ b/src/Functions/URL/URLPathHierarchy.cpp @@ -30,7 +30,7 @@ public: {"URL", static_cast(&isString), nullptr, "String"}, }; - validateFunctionArgumentTypes(func, arguments, mandatory_args); + validateFunctionArguments(func, arguments, mandatory_args); } static constexpr auto strings_argument_position = 0uz; diff --git a/src/Functions/URL/extractURLParameterNames.cpp b/src/Functions/URL/extractURLParameterNames.cpp index 16ace36d39b..b3d51d02162 100644 --- a/src/Functions/URL/extractURLParameterNames.cpp +++ b/src/Functions/URL/extractURLParameterNames.cpp @@ -30,7 +30,7 @@ public: {"URL", static_cast(&isString), nullptr, "String"}, }; - validateFunctionArgumentTypes(func, arguments, mandatory_args); + validateFunctionArguments(func, arguments, mandatory_args); } static constexpr auto strings_argument_position = 0uz; diff --git a/src/Functions/URL/extractURLParameters.cpp b/src/Functions/URL/extractURLParameters.cpp index 43079834872..ce2aadaeede 100644 --- a/src/Functions/URL/extractURLParameters.cpp +++ b/src/Functions/URL/extractURLParameters.cpp @@ -31,7 +31,7 @@ public: {"URL", static_cast(&isString), nullptr, "String"}, }; - validateFunctionArgumentTypes(func, arguments, mandatory_args); + validateFunctionArguments(func, arguments, mandatory_args); } void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substrings_includes_remaining_string*/) {} diff --git a/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.cpp b/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.cpp index 2c031158c48..a20e1807487 100644 --- a/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.cpp +++ b/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.cpp @@ -2,6 +2,7 @@ #include #include +#include #include @@ -184,7 +185,7 @@ ExternalLoader::LoadableMutablePtr ExternalUserDefinedExecutableFunctionsLoader: pool_size = config.getUInt64(key_in_config + ".pool_size", 16); max_command_execution_time = config.getUInt64(key_in_config + ".max_command_execution_time", 10); - size_t max_execution_time_seconds = static_cast(getContext()->getSettings().max_execution_time.totalSeconds()); + size_t max_execution_time_seconds = static_cast(getContext()->getSettingsRef().max_execution_time.totalSeconds()); if (max_execution_time_seconds != 0 && max_command_execution_time > max_execution_time_seconds) max_command_execution_time = max_execution_time_seconds; } diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp index c15958f81c9..da79e17f18a 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp @@ -13,6 +13,7 @@ #include #include #include +#include namespace DB diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp index b406cc8d317..4c004d2537c 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp @@ -9,6 +9,8 @@ #include #include +#include + #include #include #include diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp index 766d63eafb0..01e7e3995fa 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp @@ -14,7 +14,7 @@ #include #include #include - +#include namespace DB { diff --git a/src/Functions/array/FunctionsMapMiscellaneous.cpp b/src/Functions/array/FunctionsMapMiscellaneous.cpp index 76c1ec18171..c3586a57161 100644 --- a/src/Functions/array/FunctionsMapMiscellaneous.cpp +++ b/src/Functions/array/FunctionsMapMiscellaneous.cpp @@ -51,6 +51,8 @@ public: bool isVariadic() const override { return impl.isVariadic(); } size_t getNumberOfArguments() const override { return impl.getNumberOfArguments(); } + bool useDefaultImplementationForNulls() const override { return impl.useDefaultImplementationForNulls(); } + bool useDefaultImplementationForLowCardinalityColumns() const override { return impl.useDefaultImplementationForLowCardinalityColumns(); } bool useDefaultImplementationForConstants() const override { return impl.useDefaultImplementationForConstants(); } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return false; } @@ -184,7 +186,7 @@ struct MapToNestedAdapter : public MapAdapterBase struct MapToSubcolumnAdapter { - static_assert(position <= 1); + static_assert(position <= 1, "position of Map subcolumn must be 0 or 1"); static void extractNestedTypes(DataTypes & types) { @@ -357,7 +359,7 @@ struct NameMapValues { static constexpr auto name = "mapValues"; }; using FunctionMapValues = FunctionMapToArrayAdapter, NameMapValues>; struct NameMapContains { static constexpr auto name = "mapContains"; }; -using FunctionMapContains = FunctionMapToArrayAdapter, MapToSubcolumnAdapter, NameMapContains>; +using FunctionMapContains = FunctionMapToArrayAdapter, MapToSubcolumnAdapter, NameMapContains>; struct NameMapFilter { static constexpr auto name = "mapFilter"; }; using FunctionMapFilter = FunctionMapToArrayAdapter, NameMapFilter>; diff --git a/src/Functions/array/array.cpp b/src/Functions/array/array.cpp index 03b51808799..dfe589fb74f 100644 --- a/src/Functions/array/array.cpp +++ b/src/Functions/array/array.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include diff --git a/src/Functions/array/arrayAUC.cpp b/src/Functions/array/arrayAUC.cpp index 499fe4ce7b2..3e2a3bf6863 100644 --- a/src/Functions/array/arrayAUC.cpp +++ b/src/Functions/array/arrayAUC.cpp @@ -103,27 +103,40 @@ private: sorted_labels[i].label = label; } - /// Stable sort is required for for labels to apply in same order if score is equal - std::stable_sort(sorted_labels.begin(), sorted_labels.end(), [](const auto & lhs, const auto & rhs) { return lhs.score > rhs.score; }); + /// Sorting scores in descending order to traverse the ROC curve from left to right + std::sort(sorted_labels.begin(), sorted_labels.end(), [](const auto & lhs, const auto & rhs) { return lhs.score > rhs.score; }); /// We will first calculate non-normalized area. - size_t area = 0; - size_t count_positive = 0; + Float64 area = 0.0; + Float64 prev_score = sorted_labels[0].score; + size_t prev_fp = 0, prev_tp = 0; + size_t curr_fp = 0, curr_tp = 0; for (size_t i = 0; i < size; ++i) { + // Only increment the area when the score changes + if (sorted_labels[i].score != prev_score) + { + area += (curr_fp - prev_fp) * (curr_tp + prev_tp) / 2.0; // Trapezoidal area under curve (might degenerate to zero or to a rectangle) + prev_fp = curr_fp; + prev_tp = curr_tp; + prev_score = sorted_labels[i].score; + } + if (sorted_labels[i].label) - ++count_positive; /// The curve moves one step up. No area increase. + curr_tp += 1; /// The curve moves one step up. else - area += count_positive; /// The curve moves one step right. Area is increased by 1 * height = count_positive. + curr_fp += 1; /// The curve moves one step right. } - /// Then divide the area to the area of rectangle. + area += (curr_fp - prev_fp) * (curr_tp + prev_tp) / 2.0; - if (count_positive == 0 || count_positive == size) + /// Then normalize it dividing by the area to the area of rectangle. + + if (curr_tp == 0 || curr_tp == size) return std::numeric_limits::quiet_NaN(); - return static_cast(area) / count_positive / (size - count_positive); + return area / curr_tp / (size - curr_tp); } static void vector( diff --git a/src/Functions/array/arrayIndex.h b/src/Functions/array/arrayIndex.h index fa9b3dc92dd..0782f109187 100644 --- a/src/Functions/array/arrayIndex.h +++ b/src/Functions/array/arrayIndex.h @@ -1,4 +1,5 @@ #pragma once +#include #include #include #include @@ -13,6 +14,8 @@ #include #include #include +#include "Common/Logger.h" +#include "Common/logger_useful.h" #include #include #include @@ -28,6 +31,7 @@ namespace ErrorCodes { extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int LOGICAL_ERROR; } using NullMap = PaddedPODArray; @@ -424,31 +428,21 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override { - if constexpr (std::is_same_v) + if (auto res = executeMap(arguments, result_type)) + return res; + + if (auto res = executeArrayLowCardinality(arguments)) + return res; + + auto new_arguments = arguments; + + for (auto & argument : new_arguments) { - if (isMap(arguments[0].type)) - { - auto non_const_map_column = arguments[0].column->convertToFullColumnIfConst(); - - const auto & map_column = assert_cast(*non_const_map_column); - const auto & map_array_column = map_column.getNestedColumn(); - auto offsets = map_array_column.getOffsetsPtr(); - auto keys = map_column.getNestedData().getColumnPtr(0); - auto array_column = ColumnArray::create(keys, offsets); - - const auto & type_map = assert_cast(*arguments[0].type); - auto array_type = std::make_shared(type_map.getKeyType()); - - auto arguments_copy = arguments; - arguments_copy[0].column = std::move(array_column); - arguments_copy[0].type = std::move(array_type); - arguments_copy[0].name = arguments[0].name; - - return executeArrayImpl(arguments_copy, result_type); - } + argument.column = recursiveRemoveLowCardinality(argument.column); + argument.type = recursiveRemoveLowCardinality(argument.type); } - return executeArrayImpl(arguments, result_type); + return executeArrayImpl(new_arguments, result_type); } private: @@ -458,18 +452,6 @@ private: using NullMaps = std::pair; - struct ExecutionData - { - const IColumn& left; - const IColumn& right; - const ColumnArray::Offsets& offsets; - ColumnPtr result_column; - NullMaps maps; - ResultColumnPtr result { ResultColumnType::create() }; - - void moveResult() { result_column = std::move(result); } - }; - static bool allowArguments(const DataTypePtr & inner_type, const DataTypePtr & arg) { auto inner_type_decayed = removeNullable(removeLowCardinality(inner_type)); @@ -574,23 +556,13 @@ private: } } -#define INTEGRAL_TPL_PACK UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64 +#define INTEGRAL_PACK UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64 ColumnPtr executeOnNonNullable(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const { - if (const auto* const left_arr = checkAndGetColumn(arguments[0].column.get())) - { - if (checkAndGetColumn(&left_arr->getData())) - { - if (auto res = executeLowCardinality(arguments)) - return res; - - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal internal type of first argument of function {}", getName()); - } - } - ColumnPtr res; - if (!((res = executeIntegral(arguments)) + if (!((res = executeNothing(arguments)) + || (res = executeIntegral(arguments)) || (res = executeConst(arguments, result_type)) || (res = executeString(arguments)) || (res = executeGeneric(arguments)))) @@ -599,6 +571,8 @@ private: return res; } +#undef INTEGRAL_PACK + /** * The Array's internal data type may be quite tricky (containing a Nullable type somewhere). To process the * Nullable types correctly, for each data type specialisation we provide two null maps (one for the data and one @@ -627,6 +601,14 @@ private: return {null_map_data, null_map_item}; } + struct ExecutionData + { + const IColumn & left; + const IColumn & right; + const ColumnArray::Offsets & offsets; + NullMaps null_maps; + }; + /** * Given a variadic pack #Integral, apply executeIntegralExpanded with such parameters: * Integral s = {s1, s2, ...} @@ -635,39 +617,33 @@ private: template static ColumnPtr executeIntegral(const ColumnsWithTypeAndName & arguments) { - const ColumnArray * const left = checkAndGetColumn(arguments[0].column.get()); - - if (!left) + const auto * array = checkAndGetColumn(arguments[0].column.get()); + if (!array) return nullptr; - const ColumnPtr right_converted_ptr = arguments[1].column->convertToFullColumnIfLowCardinality(); - const IColumn& right = *right_converted_ptr.get(); - - ExecutionData data = { - left->getData(), - right, - left->getOffsets(), - nullptr, - getNullMaps(arguments) + ExecutionData data + { + .left = array->getData(), + .right = *arguments[1].column, + .offsets = array->getOffsets(), + .null_maps = getNullMaps(arguments), }; - if (executeIntegral(data)) - return data.result_column; - - return nullptr; + auto result = ResultColumnType::create(); + return executeIntegral(data, *result) ? std::move(result) : nullptr; } template - static bool executeIntegral(ExecutionData& data) + static bool executeIntegral(const ExecutionData & data, ResultColumnType & result) { - return (executeIntegralExpanded(data) || ...); + return (executeIntegralExpanded(data, result) || ...); } /// Invoke executeIntegralImpl with such parameters: (A, other1), (A, other2), ... template - static bool executeIntegralExpanded(ExecutionData& data) + static bool executeIntegralExpanded(const ExecutionData & data, ResultColumnType & result) { - return (executeIntegralImpl(data) || ...); + return (executeIntegralImpl(data, result) || ...); } /** @@ -676,40 +652,31 @@ private: * so we have to check all possible variants for #Initial and #Resulting types. */ template - static bool executeIntegralImpl(ExecutionData& data) + static bool executeIntegralImpl(const ExecutionData & data, ResultColumnType & result) { - const ColumnVector * col_nested = checkAndGetColumn>(&data.left); - - if (!col_nested) + const auto * left_typed = checkAndGetColumn>(&data.left); + if (!left_typed) return false; - const auto [null_map_data, null_map_item] = data.maps; - - if (data.right.onlyNull()) - Impl::Null::process( - data.offsets, - data.result->getData(), - null_map_data); - else if (const auto item_arg_const = checkAndGetColumnConst>(&data.right)) + if (const auto * item_arg_const = checkAndGetColumnConst>(&data.right)) Impl::Main::vector( - col_nested->getData(), + left_typed->getData(), data.offsets, item_arg_const->template getValue(), - data.result->getData(), - null_map_data, + result.getData(), + data.null_maps.first, nullptr); - else if (const auto item_arg_vector = checkAndGetColumn>(&data.right)) + else if (const auto * item_arg_vector = checkAndGetColumn>(&data.right)) Impl::Main::vector( - col_nested->getData(), + left_typed->getData(), data.offsets, item_arg_vector->getData(), - data.result->getData(), - null_map_data, - null_map_item); + result.getData(), + data.null_maps.first, + data.null_maps.second); else return false; - data.moveResult(); return true; } @@ -724,227 +691,161 @@ private: * * Tips and tricks tried can be found at https://github.com/ClickHouse/ClickHouse/pull/12550 . */ - static ColumnPtr executeLowCardinality(const ColumnsWithTypeAndName & arguments) + static ColumnPtr executeArrayLowCardinality(const ColumnsWithTypeAndName & arguments) { - const ColumnArray * const col_array = checkAndGetColumn(arguments[0].column.get()); + const auto * col_array = checkAndGetColumn(arguments[0].column.get()); + const auto * col_array_const = checkAndGetColumnConstData(arguments[0].column.get()); - if (!col_array) + if (!col_array && !col_array_const) return nullptr; - const ColumnLowCardinality * const col_lc = checkAndGetColumn(&col_array->getData()); + if (col_array_const) + col_array = col_array_const; - if (!col_lc) + const auto * left_lc = checkAndGetColumn(&col_array->getData()); + if (!left_lc) return nullptr; - const auto [null_map_data, null_map_item] = getNullMaps(arguments); + const auto * right_const = checkAndGetColumn(arguments[1].column.get()); + if (!right_const) + return nullptr; - if (const ColumnConst * col_arg_const = checkAndGetColumn(&*arguments[1].column)) + const auto & array_type = assert_cast(*arguments[0].type); + const auto target_type = recursiveRemoveLowCardinality(array_type.getNestedType()); + auto right = recursiveRemoveLowCardinality(right_const->getDataColumnPtr()); + + UInt64 index = 0; + UInt64 left_size = arguments[0].column->size(); + ResultColumnPtr col_result = ResultColumnType::create(); + + if (!right->isNullAt(0)) { - const IColumnUnique & col_lc_dict = col_lc->getDictionary(); + auto right_type = recursiveRemoveLowCardinality(arguments[1].type); + right = castColumn({right, right_type, ""}, target_type); - const DataTypeArray * const array_type = checkAndGetDataType(arguments[0].type.get()); - const DataTypePtr target_type_ptr = recursiveRemoveLowCardinality(array_type->getNestedType()); + if (right->isNullable()) + right = checkAndGetColumn(*right).getNestedColumnPtr(); - ColumnPtr col_arg_cloned = castColumn( - {col_arg_const->getDataColumnPtr(), arguments[1].type, arguments[1].name}, target_type_ptr); + StringRef elem = right->getDataAt(0); + const auto & left_dict = left_lc->getDictionary(); - ResultColumnPtr col_result = ResultColumnType::create(); - UInt64 index = 0; - - if (!col_arg_cloned->isNullAt(0)) + if (std::optional maybe_index = left_dict.getOrFindValueIndex(elem); maybe_index) { - if (col_arg_cloned->isNullable()) - col_arg_cloned = checkAndGetColumn(*col_arg_cloned).getNestedColumnPtr(); - - StringRef elem = col_arg_cloned->getDataAt(0); - - if (std::optional maybe_index = col_lc_dict.getOrFindValueIndex(elem); maybe_index) - { - index = *maybe_index; - } - else - { - const size_t offsets_size = col_array->getOffsets().size(); - auto & data = col_result->getData(); - - data.resize_fill(offsets_size); - - return col_result; - } + index = *maybe_index; } - - Impl::Main::vector( - col_lc->getIndexes(), - col_array->getOffsets(), - index, /** Assuming LowCardinality has index of NULL always as zero. */ - col_result->getData(), - null_map_data, - null_map_item); - - return col_result; - } - else if (col_lc->nestedIsNullable()) // LowCardinality(Nullable(T)) and U - { - const ColumnPtr left_casted = col_lc->convertToFullColumnIfLowCardinality(); // Nullable(T) - const ColumnNullable & left_nullable = checkAndGetColumn(*left_casted); - - const NullMap * const null_map_left_casted = &left_nullable.getNullMapColumn().getData(); - - const IColumn & left_ptr = left_nullable.getNestedColumn(); - - const ColumnPtr right_casted = arguments[1].column->convertToFullColumnIfLowCardinality(); - const ColumnNullable * const right_nullable = checkAndGetColumn(right_casted.get()); - - const NullMap * const null_map_right_casted = right_nullable - ? &right_nullable->getNullMapColumn().getData() - : null_map_item; - - const IColumn& right_ptr = right_nullable - ? right_nullable->getNestedColumn() - : *right_casted.get(); - - ExecutionData data = + else { - left_ptr, right_ptr, - col_array->getOffsets(), - nullptr, - {null_map_left_casted, null_map_right_casted}}; + col_result->getData().resize_fill(col_array->size()); - if (dispatchConvertedLowCardinalityColumns(data)) - return data.result_column; + if (col_array_const) + return ColumnConst::create(std::move(col_result), left_size); + + return col_result; + } } - else // LowCardinality(T) and U, T not Nullable - { - if (arguments[1].column->isNullable()) - return nullptr; - - if (const auto* const arg_lc = checkAndGetColumn(arguments[1].column.get()); - arg_lc && arg_lc->isNullable()) - return nullptr; - - // LowCardinality(T) and U (possibly LowCardinality(V)) - - const ColumnPtr left_casted = col_lc->convertToFullColumnIfLowCardinality(); - const ColumnPtr right_casted = arguments[1].column->convertToFullColumnIfLowCardinality(); - - ExecutionData data = - { - *left_casted.get(), *right_casted.get(), col_array->getOffsets(), - nullptr, {null_map_data, null_map_item} - }; - - if (dispatchConvertedLowCardinalityColumns(data)) - return data.result_column; - } - - return nullptr; - } - - static bool dispatchConvertedLowCardinalityColumns(ExecutionData & data) - { - if (data.left.isNumeric() && data.right.isNumeric()) // ColumnArrays - return executeIntegral(data); - - if (checkAndGetColumn(&data.left)) - return executeStringImpl(data); Impl::Main::vector( - data.left, - data.offsets, data.right, - data.result->getData(), - data.maps.first, data.maps.second); + left_lc->getIndexes(), + col_array->getOffsets(), + index, /** Assuming LowCardinality has index of NULL always as zero. */ + col_result->getData(), + nullptr, + nullptr); - data.moveResult(); - return true; + if (col_array_const) + return ColumnConst::create(std::move(col_result), left_size); + + return col_result; } -#undef INTEGRAL_TPL_PACK + ColumnPtr executeMap(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const + { + if constexpr (!std::is_same_v) + return nullptr; + + if (!isMap(arguments[0].type)) + return nullptr; + + auto non_const_map_column = arguments[0].column->convertToFullColumnIfConst(); + + const auto & map_column = assert_cast(*non_const_map_column); + const auto & map_array_column = map_column.getNestedColumn(); + auto offsets = map_array_column.getOffsetsPtr(); + auto keys = map_column.getNestedData().getColumnPtr(0); + auto array_column = ColumnArray::create(keys, offsets); + + const auto & type_map = assert_cast(*arguments[0].type); + auto array_type = std::make_shared(type_map.getKeyType()); + + auto arguments_copy = arguments; + arguments_copy[0].column = std::move(array_column); + arguments_copy[0].type = std::move(array_type); + arguments_copy[0].name = arguments[0].name; + + return executeArrayImpl(arguments_copy, result_type); + } static ColumnPtr executeString(const ColumnsWithTypeAndName & arguments) { - const ColumnArray * array = checkAndGetColumn(arguments[0].column.get()); - + const auto * array = checkAndGetColumn(arguments[0].column.get()); if (!array) return nullptr; - const ColumnString * left = checkAndGetColumn(&array->getData()); - + const auto * left = checkAndGetColumn(&array->getData()); if (!left) return nullptr; - const ColumnPtr right_ptr = arguments[1].column->convertToFullColumnIfLowCardinality(); - const IColumn & right = *right_ptr.get(); + const auto & right = *arguments[1].column; + const auto [null_map_data, null_map_item] = getNullMaps(arguments); - ExecutionData data = { - *left, right, array->getOffsets(), - nullptr, getNullMaps(arguments), - std::move(ResultColumnType::create()) - }; + auto result = ResultColumnType::create(); - if (executeStringImpl(data)) - return data.result_column; - - return nullptr; - } - - static bool executeStringImpl(ExecutionData& data) - { - const auto [null_map_data, null_map_item] = data.maps; - const ColumnString& left = *typeid_cast(&data.left); - - if (data.right.onlyNull()) - Impl::Null::process( - data.offsets, - data.result->getData(), - null_map_data); - else if (const auto *const item_arg_const = checkAndGetColumnConstStringOrFixedString(&data.right)) + if (const auto * item_arg_const = checkAndGetColumnConstStringOrFixedString(&right)) { - const ColumnString * item_const_string = - checkAndGetColumn(&item_arg_const->getDataColumn()); - - const ColumnFixedString * item_const_fixedstring = - checkAndGetColumn(&item_arg_const->getDataColumn()); + const auto * item_const_string = checkAndGetColumn(&item_arg_const->getDataColumn()); + const auto * item_const_fixedstring = checkAndGetColumn(&item_arg_const->getDataColumn()); if (item_const_string) Impl::String::process( - left.getChars(), - data.offsets, - left.getOffsets(), + left->getChars(), + array->getOffsets(), + left->getOffsets(), item_const_string->getChars(), item_const_string->getDataAt(0).size, - data.result->getData(), + result->getData(), null_map_data, null_map_item); else if (item_const_fixedstring) Impl::String::process( - left.getChars(), - data.offsets, - left.getOffsets(), + left->getChars(), + array->getOffsets(), + left->getOffsets(), item_const_fixedstring->getChars(), item_const_fixedstring->getN(), - data.result->getData(), + result->getData(), null_map_data, null_map_item); else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Logical error: ColumnConst contains not String nor FixedString column"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "ColumnConst contains not String nor FixedString column"); } - else if (const auto *const item_arg_vector = checkAndGetColumn(&data.right)) + else if (const auto * item_arg_vector = checkAndGetColumn(&right)) { Impl::String::process( - left.getChars(), - data.offsets, - left.getOffsets(), + left->getChars(), + array->getOffsets(), + left->getOffsets(), item_arg_vector->getChars(), item_arg_vector->getOffsets(), - data.result->getData(), + result->getData(), null_map_data, null_map_item); } else - return false; + { + return nullptr; + } - data.moveResult(); - return true; + return result; } static ColumnPtr executeConst(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) @@ -955,9 +856,7 @@ private: return nullptr; Array arr = col_array->getValue(); - - const ColumnPtr right_ptr = arguments[1].column->convertToFullColumnIfLowCardinality(); - const IColumn * item_arg = right_ptr.get(); + const IColumn * item_arg = arguments[1].column.get(); if (isColumnConst(*item_arg)) { @@ -1026,48 +925,59 @@ private: } } + static ColumnPtr executeNothing(const ColumnsWithTypeAndName & arguments) + { + const auto * array = checkAndGetColumn(arguments[0].column.get()); + if (!array) + return nullptr; + + if (arguments[1].column->onlyNull()) + { + auto result = ResultColumnType::create(); + Impl::Null::process(array->getOffsets(), result->getData(), getNullMaps(arguments).first); + return result; + } + + return nullptr; + } + static ColumnPtr executeGeneric(const ColumnsWithTypeAndName & arguments) { - const ColumnArray * col = checkAndGetColumn(arguments[0].column.get()); - - if (!col) + const auto * col_array = checkAndGetColumn(arguments[0].column.get()); + if (!col_array) return nullptr; DataTypePtr array_elements_type = assert_cast(*arguments[0].type).getNestedType(); const DataTypePtr & index_type = arguments[1].type; - DataTypePtr common_type = getLeastSupertype(DataTypes{array_elements_type, index_type}); - - ColumnPtr col_nested = castColumn({ col->getDataPtr(), array_elements_type, "" }, common_type); - - const ColumnPtr right_ptr = arguments[1].column->convertToFullColumnIfLowCardinality(); - ColumnPtr item_arg = castColumn({ right_ptr, removeLowCardinality(index_type), "" }, common_type); + DataTypePtr common_type = getLeastSupertype(DataTypes{array_elements_type, arguments[1].type}); + ColumnPtr col_nested = castColumn({ col_array->getDataPtr(), array_elements_type, "" }, common_type); + ColumnPtr item_arg = castColumn({ arguments[1].column, removeLowCardinality(index_type), "" }, common_type); auto col_res = ResultColumnType::create(); auto [null_map_data, null_map_item] = getNullMaps(arguments); - if (item_arg->onlyNull()) - Impl::Null::process( - col->getOffsets(), - col_res->getData(), - null_map_data); - else if (isColumnConst(*item_arg)) + if (const auto * item_arg_const = checkAndGetColumn(item_arg.get())) + { Impl::Main::vector( *col_nested, - col->getOffsets(), - typeid_cast(*item_arg).getDataColumn(), + col_array->getOffsets(), + item_arg_const->getDataColumn(), col_res->getData(), /// TODO This is wrong. null_map_data, nullptr); + } else + { Impl::Main::vector( *col_nested, - col->getOffsets(), + col_array->getOffsets(), *item_arg, col_res->getData(), null_map_data, null_map_item); + } return col_res; } diff --git a/src/Functions/array/arrayJaccardIndex.cpp b/src/Functions/array/arrayJaccardIndex.cpp index 87f3390ac73..7db20667888 100644 --- a/src/Functions/array/arrayJaccardIndex.cpp +++ b/src/Functions/array/arrayJaccardIndex.cpp @@ -87,7 +87,7 @@ public: {"array_1", static_cast(&isArray), nullptr, "Array"}, {"array_2", static_cast(&isArray), nullptr, "Array"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared>(); } diff --git a/src/Functions/array/arrayRandomSample.cpp b/src/Functions/array/arrayRandomSample.cpp index b08a73b93f3..6e176b6e33d 100644 --- a/src/Functions/array/arrayRandomSample.cpp +++ b/src/Functions/array/arrayRandomSample.cpp @@ -39,7 +39,7 @@ public: {"array", static_cast(&isArray), nullptr, "Array"}, {"samples", static_cast(&isUInt), isColumnConst, "const UInt*"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); // Return an array with the same nested type as the input array const DataTypePtr & array_type = arguments[0].type; diff --git a/src/Functions/array/arrayShingles.cpp b/src/Functions/array/arrayShingles.cpp index 8932482c69c..7c97d8136fb 100644 --- a/src/Functions/array/arrayShingles.cpp +++ b/src/Functions/array/arrayShingles.cpp @@ -31,7 +31,7 @@ public: {"array", static_cast(&isArray), nullptr, "Array"}, {"length", static_cast(&isInteger), nullptr, "Integer"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); const DataTypeArray * array_type = checkAndGetDataType(arguments[0].type.get()); return std::make_shared(std::make_shared(array_type->getNestedType())); diff --git a/src/Functions/array/range.cpp b/src/Functions/array/range.cpp index 1ac698a2745..b8b03ab5e74 100644 --- a/src/Functions/array/range.cpp +++ b/src/Functions/array/range.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Functions/arrayStringConcat.cpp b/src/Functions/arrayStringConcat.cpp index 421408c01f2..12bab410fec 100644 --- a/src/Functions/arrayStringConcat.cpp +++ b/src/Functions/arrayStringConcat.cpp @@ -159,7 +159,7 @@ public: {"separator", static_cast(&isString), isColumnConst, "const String"}, }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } diff --git a/src/Functions/bitShiftLeft.cpp b/src/Functions/bitShiftLeft.cpp index c366a1ecb44..d561430d51f 100644 --- a/src/Functions/bitShiftLeft.cpp +++ b/src/Functions/bitShiftLeft.cpp @@ -5,6 +5,7 @@ namespace DB { namespace ErrorCodes { + extern const int ARGUMENT_OUT_OF_BOUND; extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; } @@ -24,6 +25,8 @@ struct BitShiftLeftImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); + else if (b < 0 || static_cast(b) > 8 * sizeof(A)) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); else if constexpr (is_big_int_v) return static_cast(a) << static_cast(b); else @@ -37,9 +40,12 @@ struct BitShiftLeftImpl throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); else { - UInt8 word_size = 8; - /// To prevent overflow - if (static_cast(b) >= (static_cast(end - pos) * word_size) || b < 0) + const UInt8 word_size = 8 * sizeof(*pos); + size_t n = end - pos; + const UInt128 bit_limit = static_cast(word_size) * n; + if (b < 0 || static_cast(b) > bit_limit) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); + else if (b == bit_limit) { // insert default value out_vec.push_back(0); @@ -102,10 +108,12 @@ struct BitShiftLeftImpl throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); else { - UInt8 word_size = 8; + const UInt8 word_size = 8; size_t n = end - pos; - /// To prevent overflow - if (static_cast(b) >= (static_cast(n) * word_size) || b < 0) + const UInt128 bit_limit = static_cast(word_size) * n; + if (b < 0 || static_cast(b) > bit_limit) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); + else if (b == bit_limit) { // insert default value out_vec.resize_fill(out_vec.size() + n); diff --git a/src/Functions/bitShiftRight.cpp b/src/Functions/bitShiftRight.cpp index 1c37cd3bf4c..05b8581c792 100644 --- a/src/Functions/bitShiftRight.cpp +++ b/src/Functions/bitShiftRight.cpp @@ -6,6 +6,7 @@ namespace DB { namespace ErrorCodes { + extern const int ARGUMENT_OUT_OF_BOUND; extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; } @@ -25,6 +26,8 @@ struct BitShiftRightImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); + else if (b < 0 || static_cast(b) > 8 * sizeof(A)) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); else if constexpr (is_big_int_v) return static_cast(a) >> static_cast(b); else @@ -53,9 +56,12 @@ struct BitShiftRightImpl throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); else { - UInt8 word_size = 8; - /// To prevent overflow - if (static_cast(b) >= (static_cast(end - pos) * word_size) || b < 0) + const UInt8 word_size = 8; + size_t n = end - pos; + const UInt128 bit_limit = static_cast(word_size) * n; + if (b < 0 || static_cast(b) > bit_limit) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); + else if (b == bit_limit) { /// insert default value out_vec.push_back(0); @@ -90,10 +96,12 @@ struct BitShiftRightImpl throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); else { - UInt8 word_size = 8; + const UInt8 word_size = 8; size_t n = end - pos; - /// To prevent overflow - if (static_cast(b) >= (static_cast(n) * word_size) || b < 0) + const UInt128 bit_limit = static_cast(word_size) * n; + if (b < 0 || static_cast(b) > bit_limit) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); + else if (b == bit_limit) { // insert default value out_vec.resize_fill(out_vec.size() + n); diff --git a/src/Functions/bitTest.cpp b/src/Functions/bitTest.cpp index 78ec9c8b773..cb6b83c1cf1 100644 --- a/src/Functions/bitTest.cpp +++ b/src/Functions/bitTest.cpp @@ -8,6 +8,7 @@ namespace DB namespace ErrorCodes { extern const int NOT_IMPLEMENTED; + extern const int PARAMETER_OUT_OF_BOUND; } namespace @@ -21,12 +22,21 @@ struct BitTestImpl static const constexpr bool allow_string_integer = false; template - NO_SANITIZE_UNDEFINED static Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) + static Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) { if constexpr (is_big_int_v || is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "bitTest is not implemented for big integers as second argument"); else - return (typename NumberTraits::ToInteger::Type(a) >> typename NumberTraits::ToInteger::Type(b)) & 1; + { + typename NumberTraits::ToInteger::Type a_int = a; + typename NumberTraits::ToInteger::Type b_int = b; + const auto max_position = static_cast((8 * sizeof(a)) - 1); + if (b_int > max_position || b_int < 0) + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, + "The bit position argument needs to a positive value and less or equal to {} for integer {}", + std::to_string(max_position), std::to_string(a_int)); + return (a_int >> b_int) & 1; + } } #if USE_EMBEDDED_COMPILER diff --git a/src/Functions/castOrDefault.cpp b/src/Functions/castOrDefault.cpp index 44b39811882..1ece17ca1e3 100644 --- a/src/Functions/castOrDefault.cpp +++ b/src/Functions/castOrDefault.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -203,7 +204,7 @@ private: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, nullptr}}; + FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, "any type"}}; FunctionArgumentDescriptors optional_args; if (isDecimal(type) || isDateTime64(type)) @@ -212,9 +213,9 @@ private: if (isDateTimeOrDateTime64(type)) optional_args.push_back({"timezone", static_cast(&isString), isColumnConst, "const String"}); - optional_args.push_back({"default_value", nullptr, nullptr, nullptr}); + optional_args.push_back({"default_value", nullptr, nullptr, "any type"}); - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); size_t additional_argument_index = 1; diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp new file mode 100644 index 00000000000..19e4c165ee3 --- /dev/null +++ b/src/Functions/changeDate.cpp @@ -0,0 +1,399 @@ +#include "Common/DateLUTImpl.h" +#include "Common/Exception.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +namespace +{ + +enum class Component +{ + Year, + Month, + Day, + Hour, + Minute, + Second +}; + +} + +template +class FunctionChangeDate : public IFunction +{ +public: + static constexpr auto name = Traits::name; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + String getName() const override { return Traits::name; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + size_t getNumberOfArguments() const override { return 2; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + FunctionArgumentDescriptors args{ + {"date_or_datetime", static_cast(&isDateOrDate32OrDateTimeOrDateTime64), nullptr, "Date or date with time"}, + {"value", static_cast(&isNativeInteger), nullptr, "Integer"} + }; + validateFunctionArguments(*this, arguments, args); + + const auto & input_type = arguments[0].type; + + if constexpr (Traits::component == Component::Hour || Traits::component == Component::Minute || Traits::component == Component::Second) + { + if (isDate(input_type)) + return std::make_shared(); + if (isDate32(input_type)) + return std::make_shared(DataTypeDateTime64::default_scale); + } + + return input_type; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + const auto & input_type = arguments[0].type; + if (isDate(input_type)) + { + if constexpr (Traits::component == Component::Hour || Traits::component == Component::Minute || Traits::component == Component::Second) + return execute(arguments, input_type, result_type, input_rows_count); + return execute(arguments, input_type, result_type, input_rows_count); + } + if (isDate32(input_type)) + { + if constexpr (Traits::component == Component::Hour || Traits::component == Component::Minute || Traits::component == Component::Second) + return execute(arguments, input_type, result_type, input_rows_count); + return execute(arguments, input_type, result_type, input_rows_count); + } + if (isDateTime(input_type)) + return execute(arguments, input_type, result_type, input_rows_count); + if (isDateTime64(input_type)) + return execute(arguments, input_type, result_type, input_rows_count); + + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid input type"); + } + + template + ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & input_type, const DataTypePtr & result_type, size_t input_rows_count) const + { + typename ResultDataType::ColumnType::MutablePtr result_col; + if constexpr (std::is_same_v) + { + auto scale = DataTypeDateTime64::default_scale; + if constexpr (std::is_same_v) + scale = typeid_cast(*result_type).getScale(); + result_col = ResultDataType::ColumnType::create(input_rows_count, scale); + } + else + result_col = ResultDataType::ColumnType::create(input_rows_count); + + auto date_time_col = arguments[0].column->convertToFullIfNeeded(); + const auto & date_time_col_data = typeid_cast(*date_time_col).getData(); + + auto value_col = castColumn(arguments[1], std::make_shared()); + value_col = value_col->convertToFullIfNeeded(); + const auto & value_col_data = typeid_cast(*value_col).getData(); + + auto & result_col_data = result_col->getData(); + + if constexpr (std::is_same_v) + { + const auto scale = typeid_cast(*result_type).getScale(); + const auto & date_lut = typeid_cast(*result_type).getTimeZone(); + + Int64 deg = 1; + for (size_t j = 0; j < scale; ++j) + deg *= 10; + + for (size_t i = 0; i < input_rows_count; ++i) + { + Int64 time = date_lut.toNumYYYYMMDDhhmmss(date_time_col_data[i] / deg); + Int64 fraction = date_time_col_data[i] % deg; + + result_col_data[i] = getChangedDate(time, value_col_data[i], result_type, date_lut, scale, fraction); + } + } + else if constexpr (std::is_same_v && std::is_same_v) + { + const auto & date_lut = typeid_cast(*result_type).getTimeZone(); + for (size_t i = 0; i < input_rows_count; ++i) + { + Int64 time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(date_time_col_data[i]))) * 1'000'000; + result_col_data[i] = getChangedDate(time, value_col_data[i], result_type, date_lut, 3, 0); + } + } + else if constexpr (std::is_same_v && std::is_same_v) + { + const auto & date_lut = typeid_cast(*result_type).getTimeZone(); + for (size_t i = 0; i < input_rows_count; ++i) + { + Int64 time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(date_time_col_data[i]))) * 1'000'000; + result_col_data[i] = static_cast(getChangedDate(time, value_col_data[i], result_type, date_lut)); + } + } + else if constexpr (std::is_same_v) + { + const auto & date_lut = typeid_cast(*result_type).getTimeZone(); + for (size_t i = 0; i < input_rows_count; ++i) + { + Int64 time = date_lut.toNumYYYYMMDDhhmmss(date_time_col_data[i]); + result_col_data[i] = static_cast(getChangedDate(time, value_col_data[i], result_type, date_lut)); + } + } + else + { + const auto & date_lut = DateLUT::instance(); + for (size_t i = 0; i < input_rows_count; ++i) + { + Int64 time; + if (isDate(input_type)) + time = static_cast(date_lut.toNumYYYYMMDD(DayNum(date_time_col_data[i]))) * 1'000'000; + else + time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(date_time_col_data[i]))) * 1'000'000; + + if (isDate(result_type)) + result_col_data[i] = static_cast(getChangedDate(time, value_col_data[i], result_type, date_lut)); + else + result_col_data[i] = static_cast(getChangedDate(time, value_col_data[i], result_type, date_lut)); + } + } + + return result_col; + } + + Int64 getChangedDate(Int64 time, Float64 new_value, const DataTypePtr & result_type, const DateLUTImpl & date_lut, Int64 scale = 0, Int64 fraction = 0) const + { + auto year = time / 10'000'000'000; + auto month = (time % 10'000'000'000) / 100'000'000; + auto day = (time % 100'000'000) / 1'000'000; + auto hours = (time % 1'000'000) / 10'000; + auto minutes = (time % 10'000) / 100; + auto seconds = time % 100; + + Int64 min_date = 0, max_date = 0; + Int16 min_year, max_year; + if (isDate(result_type)) + { + min_date = date_lut.makeDayNum(1970, 1, 1); + max_date = date_lut.makeDayNum(2149, 6, 6); + min_year = 1970; + max_year = 2149; + } + else if (isDate32(result_type)) + { + min_date = date_lut.makeDayNum(1900, 1, 1); + max_date = date_lut.makeDayNum(2299, 12, 31); + min_year = 1900; + max_year = 2299; + } + else if (isDateTime(result_type)) + { + min_date = 0; + max_date = 0x0FFFFFFFFLL; + min_year = 1970; + max_year = 2106; + } + else + { + min_date = DecimalUtils::decimalFromComponents( + date_lut.makeDateTime(1900, 1, 1, 0, 0, 0), + static_cast(0), + static_cast(scale)); + Int64 deg = 1; + for (Int64 j = 0; j < scale; ++j) + deg *= 10; + max_date = DecimalUtils::decimalFromComponents( + date_lut.makeDateTime(2299, 12, 31, 23, 59, 59), + static_cast(deg - 1), + static_cast(scale)); + min_year = 1900; + max_year = 2299; + } + + switch (Traits::component) + { + case Component::Year: + if (new_value < min_year) + return min_date; + else if (new_value > max_year) + return max_date; + year = static_cast(new_value); + break; + case Component::Month: + if (new_value < 1 || new_value > 12) + return min_date; + month = static_cast(new_value); + break; + case Component::Day: + if (new_value < 1 || new_value > 31) + return min_date; + day = static_cast(new_value); + break; + case Component::Hour: + if (new_value < 0 || new_value > 23) + return min_date; + hours = static_cast(new_value); + break; + case Component::Minute: + if (new_value < 0 || new_value > 59) + return min_date; + minutes = static_cast(new_value); + break; + case Component::Second: + if (new_value < 0 || new_value > 59) + return min_date; + seconds = static_cast(new_value); + break; + } + + Int64 result; + if (isDate(result_type) || isDate32(result_type)) + result = date_lut.makeDayNum(year, month, day); + else if (isDateTime(result_type)) + result = date_lut.makeDateTime(year, month, day, hours, minutes, seconds); + else +#ifndef __clang_analyzer__ + /// ^^ This looks funny. It is the least terrible suppression of a false positive reported by clang-analyzer (a sub-class + /// of clang-tidy checks) deep down in 'decimalFromComponents'. Usual suppressions of the form NOLINT* don't work here (they + /// would only affect code in _this_ file), and suppressing the issue in 'decimalFromComponents' may suppress true positives. + result = DecimalUtils::decimalFromComponents( + date_lut.makeDateTime(year, month, day, hours, minutes, seconds), + fraction, + static_cast(scale)); +#else + { + UNUSED(fraction); + result = 0; + } +#endif + + if (result < min_date) + return min_date; + + if (result > max_date) + return max_date; + + return result; + } +}; + + +struct ChangeYearTraits +{ + static constexpr auto name = "changeYear"; + static constexpr auto component = Component::Year; +}; + +struct ChangeMonthTraits +{ + static constexpr auto name = "changeMonth"; + static constexpr auto component = Component::Month; +}; + +struct ChangeDayTraits +{ + static constexpr auto name = "changeDay"; + static constexpr auto component = Component::Day; +}; + +struct ChangeHourTraits +{ + static constexpr auto name = "changeHour"; + static constexpr auto component = Component::Hour; +}; + +struct ChangeMinuteTraits +{ + static constexpr auto name = "changeMinute"; + static constexpr auto component = Component::Minute; +}; + +struct ChangeSecondTraits +{ + static constexpr auto name = "changeSecond"; + static constexpr auto component = Component::Second; +}; + +REGISTER_FUNCTION(ChangeDate) +{ + { + FunctionDocumentation::Description description = "Changes the year component of a date or date time."; + FunctionDocumentation::Syntax syntax = "changeYear(date_or_datetime, value);"; + FunctionDocumentation::Arguments arguments = {{"date_or_datetime", "The value to change. Type: Date, Date32, DateTime, or DateTime64"}, {"value", "The new value. Type: [U]Int*"}}; + FunctionDocumentation::ReturnedValue returned_value = "The same type as date_or_datetime."; + FunctionDocumentation::Categories categories = {"Dates and Times"}; + FunctionDocumentation function_documentation = {.description = description, .syntax = syntax, .arguments = arguments, .returned_value = returned_value, .categories = categories}; + factory.registerFunction>(function_documentation); + } + { + FunctionDocumentation::Description description = "Changes the month component of a date or date time."; + FunctionDocumentation::Syntax syntax = "changeMonth(date_or_datetime, value);"; + FunctionDocumentation::Arguments arguments = {{"date_or_datetime", "The value to change. Type: Date, Date32, DateTime, or DateTime64"}, {"value", "The new value. Type: [U]Int*"}}; + FunctionDocumentation::ReturnedValue returned_value = "The same type as date_or_datetime."; + FunctionDocumentation::Categories categories = {"Dates and Times"}; + FunctionDocumentation function_documentation = {.description = description, .syntax = syntax, .arguments = arguments, .returned_value = returned_value, .categories = categories}; + factory.registerFunction>(function_documentation); + } + { + FunctionDocumentation::Description description = "Changes the day component of a date or date time."; + FunctionDocumentation::Syntax syntax = "changeDay(date_or_datetime, value);"; + FunctionDocumentation::Arguments arguments = {{"date_or_datetime", "The value to change. Type: Date, Date32, DateTime, or DateTime64"}, {"value", "The new value. Type: [U]Int*"}}; + FunctionDocumentation::ReturnedValue returned_value = "The same type as date_or_datetime."; + FunctionDocumentation::Categories categories = {"Dates and Times"}; + FunctionDocumentation function_documentation = {.description = description, .syntax = syntax, .arguments = arguments, .returned_value = returned_value, .categories = categories}; + factory.registerFunction>(function_documentation); + } + { + FunctionDocumentation::Description description = "Changes the hour component of a date or date time."; + FunctionDocumentation::Syntax syntax = "changeHour(date_or_datetime, value);"; + FunctionDocumentation::Arguments arguments = {{"date_or_datetime", "The value to change. Type: Date, Date32, DateTime, or DateTime64"}, {"value", "The new value. Type: [U]Int*"}}; + FunctionDocumentation::ReturnedValue returned_value = "The same type as date_or_datetime. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64."; + FunctionDocumentation::Categories categories = {"Dates and Times"}; + FunctionDocumentation function_documentation = {.description = description, .syntax = syntax, .arguments = arguments, .returned_value = returned_value, .categories = categories}; + factory.registerFunction>(function_documentation); + } + { + FunctionDocumentation::Description description = "Changes the minute component of a date or date time."; + FunctionDocumentation::Syntax syntax = "changeMinute(date_or_datetime, value);"; + FunctionDocumentation::Arguments arguments = {{"date_or_datetime", "The value to change. Type: Date, Date32, DateTime, or DateTime64"}, {"value", "The new value. Type: [U]Int*"}}; + FunctionDocumentation::ReturnedValue returned_value = "The same type as date_or_datetime. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64."; + FunctionDocumentation::Categories categories = {"Dates and Times"}; + FunctionDocumentation function_documentation = {.description = description, .syntax = syntax, .arguments = arguments, .returned_value = returned_value, .categories = categories}; + factory.registerFunction>(function_documentation); + } + { + FunctionDocumentation::Description description = "Changes the second component of a date or date time."; + FunctionDocumentation::Syntax syntax = "changeSecond(date_or_datetime, value);"; + FunctionDocumentation::Arguments arguments = {{"date_or_datetime", "The value to change. Type: Date, Date32, DateTime, or DateTime64"}, {"value", "The new value. Type: [U]Int*"}}; + FunctionDocumentation::ReturnedValue returned_value = "The same type as date_or_datetime. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64."; + FunctionDocumentation::Categories categories = {"Dates and Times"}; + FunctionDocumentation function_documentation = {.description = description, .syntax = syntax, .arguments = arguments, .returned_value = returned_value, .categories = categories}; + factory.registerFunction>(function_documentation); + } +} + +} diff --git a/src/Functions/countMatches.h b/src/Functions/countMatches.h index fbbb9d017ee..5f07b936e26 100644 --- a/src/Functions/countMatches.h +++ b/src/Functions/countMatches.h @@ -38,7 +38,7 @@ public: {"haystack", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"}, {"pattern", static_cast(&isString), isColumnConst, "constant String"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/dateTimeToSnowflakeID.cpp b/src/Functions/dateTimeToSnowflakeID.cpp index 968a7628ca5..c48f8c13152 100644 --- a/src/Functions/dateTimeToSnowflakeID.cpp +++ b/src/Functions/dateTimeToSnowflakeID.cpp @@ -43,7 +43,7 @@ public: FunctionArgumentDescriptors optional_args{ {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"} }; - validateFunctionArgumentTypes(*this, arguments, args, optional_args); + validateFunctionArguments(*this, arguments, args, optional_args); return std::make_shared(); } @@ -91,7 +91,7 @@ public: FunctionArgumentDescriptors optional_args{ {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"} }; - validateFunctionArgumentTypes(*this, arguments, args, optional_args); + validateFunctionArguments(*this, arguments, args, optional_args); return std::make_shared(); } diff --git a/src/Functions/extractAll.cpp b/src/Functions/extractAll.cpp index 5801a7b8f4f..4a3eb32474c 100644 --- a/src/Functions/extractAll.cpp +++ b/src/Functions/extractAll.cpp @@ -59,7 +59,7 @@ public: {"pattern", static_cast(&isString), isColumnConst, "const String"} }; - validateFunctionArgumentTypes(func, arguments, mandatory_args); + validateFunctionArguments(func, arguments, mandatory_args); } static constexpr auto strings_argument_position = 0uz; diff --git a/src/Functions/extractAllGroups.h b/src/Functions/extractAllGroups.h index dfcd0e31715..7732855b211 100644 --- a/src/Functions/extractAllGroups.h +++ b/src/Functions/extractAllGroups.h @@ -74,7 +74,7 @@ public: {"haystack", static_cast(&isStringOrFixedString), nullptr, "const String or const FixedString"}, {"needle", static_cast(&isStringOrFixedString), isColumnConst, "const String or const FixedString"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); /// Two-dimensional array of strings, each `row` of top array represents matching groups. return std::make_shared(std::make_shared(std::make_shared())); diff --git a/src/Functions/extractGroups.cpp b/src/Functions/extractGroups.cpp index f62352af0bd..ac6266a2e82 100644 --- a/src/Functions/extractGroups.cpp +++ b/src/Functions/extractGroups.cpp @@ -48,7 +48,7 @@ public: {"haystack", static_cast(&isStringOrFixedString), nullptr, "const String or const FixedString"}, {"needle", static_cast(&isStringOrFixedString), isColumnConst, "const String or const FixedString"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(std::make_shared()); } diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index 1c5dc05c5aa..c3a5fe036c3 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -782,9 +783,9 @@ public: static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } explicit FunctionFormatDateTimeImpl(ContextPtr context) - : mysql_M_is_month_name(context->getSettings().formatdatetime_parsedatetime_m_is_month_name) - , mysql_f_prints_single_zero(context->getSettings().formatdatetime_f_prints_single_zero) - , mysql_format_ckl_without_leading_zeros(context->getSettings().formatdatetime_format_without_leading_zeros) + : mysql_M_is_month_name(context->getSettingsRef().formatdatetime_parsedatetime_m_is_month_name) + , mysql_f_prints_single_zero(context->getSettingsRef().formatdatetime_f_prints_single_zero) + , mysql_format_ckl_without_leading_zeros(context->getSettingsRef().formatdatetime_format_without_leading_zeros) { } diff --git a/src/Functions/formatQuery.cpp b/src/Functions/formatQuery.cpp index 3b632147864..d10b3f9a5b7 100644 --- a/src/Functions/formatQuery.cpp +++ b/src/Functions/formatQuery.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -54,7 +55,7 @@ public: FunctionArgumentDescriptors args{ {"query", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); DataTypePtr string_type = std::make_shared(); if (error_handling == ErrorHandling::Null) diff --git a/src/Functions/fromDaysSinceYearZero.cpp b/src/Functions/fromDaysSinceYearZero.cpp index b98c587d172..0543e6bf229 100644 --- a/src/Functions/fromDaysSinceYearZero.cpp +++ b/src/Functions/fromDaysSinceYearZero.cpp @@ -54,7 +54,7 @@ public: { FunctionArgumentDescriptors args{{"days", static_cast(&isNativeInteger), nullptr, "Integer"}}; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index 8ac010deafc..a171b6bf86e 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -167,7 +167,7 @@ public: FunctionArgumentDescriptors optional_args{ {"expr", nullptr, nullptr, "Arbitrary expression"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } diff --git a/src/Functions/generateUUIDv4.cpp b/src/Functions/generateUUIDv4.cpp index b0fec43fe94..a928f9009c8 100644 --- a/src/Functions/generateUUIDv4.cpp +++ b/src/Functions/generateUUIDv4.cpp @@ -30,9 +30,9 @@ public: { FunctionArgumentDescriptors mandatory_args; FunctionArgumentDescriptors optional_args{ - {"expr", nullptr, nullptr, "Arbitrary Expression"} + {"expr", nullptr, nullptr, "any type"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } diff --git a/src/Functions/generateUUIDv7.cpp b/src/Functions/generateUUIDv7.cpp index b226c0840f4..5dc6f1cde32 100644 --- a/src/Functions/generateUUIDv7.cpp +++ b/src/Functions/generateUUIDv7.cpp @@ -11,20 +11,6 @@ namespace /* Bit layouts of UUIDv7 -without counter: - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -| unix_ts_ms | -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -| unix_ts_ms | ver | rand_a | -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -|var| rand_b | -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -| rand_b | -└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘ - -with counter: 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 ├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ @@ -163,7 +149,7 @@ public: FunctionArgumentDescriptors optional_args{ {"expr", nullptr, nullptr, "Arbitrary expression"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } diff --git a/src/Functions/geohashDecode.cpp b/src/Functions/geohashDecode.cpp index b2454f5dffc..96ad7dacfc4 100644 --- a/src/Functions/geohashDecode.cpp +++ b/src/Functions/geohashDecode.cpp @@ -38,9 +38,12 @@ public: bool useDefaultImplementationForConstants() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - validateArgumentType(*this, arguments, 0, isStringOrFixedString, "string or fixed string"); + FunctionArgumentDescriptors args{ + {"encoded", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"} + }; + validateFunctionArguments(*this, arguments, args); return std::make_shared( DataTypes{std::make_shared(), std::make_shared()}, diff --git a/src/Functions/geohashEncode.cpp b/src/Functions/geohashEncode.cpp index 7c353b822aa..034c8188b63 100644 --- a/src/Functions/geohashEncode.cpp +++ b/src/Functions/geohashEncode.cpp @@ -17,7 +17,6 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; } namespace @@ -40,19 +39,16 @@ public: bool useDefaultImplementationForConstants() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - validateArgumentType(*this, arguments, 0, isFloat, "float"); - validateArgumentType(*this, arguments, 1, isFloat, "float"); - if (arguments.size() == 3) - { - validateArgumentType(*this, arguments, 2, isInteger, "integer"); - } - if (arguments.size() > 3) - { - throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, "Too many arguments for function {} expected at most 3", - getName()); - } + FunctionArgumentDescriptors mandatory_args{ + {"longitude", static_cast(&isFloat), nullptr, "Float*"}, + {"latitude", static_cast(&isFloat), nullptr, "Float*"} + }; + FunctionArgumentDescriptors optional_args{ + {"precision", static_cast(&isInteger), nullptr, "(U)Int*"} + }; + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } diff --git a/src/Functions/geohashesInBox.cpp b/src/Functions/geohashesInBox.cpp index ac8d4a6ad8f..9429903dda7 100644 --- a/src/Functions/geohashesInBox.cpp +++ b/src/Functions/geohashesInBox.cpp @@ -35,22 +35,25 @@ public: size_t getNumberOfArguments() const override { return 5; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - validateArgumentType(*this, arguments, 0, isFloat, "float"); - validateArgumentType(*this, arguments, 1, isFloat, "float"); - validateArgumentType(*this, arguments, 2, isFloat, "float"); - validateArgumentType(*this, arguments, 3, isFloat, "float"); - validateArgumentType(*this, arguments, 4, isUInt8, "integer"); + FunctionArgumentDescriptors args{ + {"longitute_min", static_cast(&isFloat), nullptr, "Float*"}, + {"latitude_min", static_cast(&isFloat), nullptr, "Float*"}, + {"longitute_max", static_cast(&isFloat), nullptr, "Float*"}, + {"latitude_max", static_cast(&isFloat), nullptr, "Float*"}, + {"precision", static_cast(&isUInt8), nullptr, "UInt8"} + }; + validateFunctionArguments(*this, arguments, args); - if (!(arguments[0]->equals(*arguments[1]) && - arguments[0]->equals(*arguments[2]) && - arguments[0]->equals(*arguments[3]))) + if (!(arguments[0].type->equals(*arguments[1].type) && + arguments[0].type->equals(*arguments[2].type) && + arguments[0].type->equals(*arguments[3].type))) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of argument of {} all coordinate arguments must have the same type, " - "instead they are:{}, {}, {}, {}.", getName(), arguments[0]->getName(), - arguments[1]->getName(), arguments[2]->getName(), arguments[3]->getName()); + "instead they are:{}, {}, {}, {}.", getName(), arguments[0].type->getName(), + arguments[1].type->getName(), arguments[2].type->getName(), arguments[3].type->getName()); } return std::make_shared(std::make_shared()); diff --git a/src/Functions/getClientHTTPHeader.cpp b/src/Functions/getClientHTTPHeader.cpp index ebd070a90b9..140f39d03b8 100644 --- a/src/Functions/getClientHTTPHeader.cpp +++ b/src/Functions/getClientHTTPHeader.cpp @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB diff --git a/src/Functions/getSetting.cpp b/src/Functions/getSetting.cpp index d6b8946e760..aed6b2119e4 100644 --- a/src/Functions/getSetting.cpp +++ b/src/Functions/getSetting.cpp @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB diff --git a/src/Functions/hasColumnInTable.cpp b/src/Functions/hasColumnInTable.cpp index 8ea16f688ee..00714997b4a 100644 --- a/src/Functions/hasColumnInTable.cpp +++ b/src/Functions/hasColumnInTable.cpp @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp index 4c08cd3b931..30eaa26fa20 100644 --- a/src/Functions/if.cpp +++ b/src/Functions/if.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Functions/keyvaluepair/extractKeyValuePairs.cpp b/src/Functions/keyvaluepair/extractKeyValuePairs.cpp index 94f02861af0..02e8412bbf3 100644 --- a/src/Functions/keyvaluepair/extractKeyValuePairs.cpp +++ b/src/Functions/keyvaluepair/extractKeyValuePairs.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include diff --git a/src/Functions/lemmatize.cpp b/src/Functions/lemmatize.cpp index e1168e077bb..76e5ed7432e 100644 --- a/src/Functions/lemmatize.cpp +++ b/src/Functions/lemmatize.cpp @@ -3,6 +3,7 @@ #if USE_NLP #include +#include #include #include #include diff --git a/src/Functions/makeDate.cpp b/src/Functions/makeDate.cpp index 3d8b8617472..41a09793994 100644 --- a/src/Functions/makeDate.cpp +++ b/src/Functions/makeDate.cpp @@ -87,7 +87,7 @@ public: {mandatory_argument_names_year_month_day[1], static_cast(&isNumber), nullptr, "Number"}, {mandatory_argument_names_year_month_day[2], static_cast(&isNumber), nullptr, "Number"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); } else { @@ -95,7 +95,7 @@ public: {mandatory_argument_names_year_dayofyear[0], static_cast(&isNumber), nullptr, "Number"}, {mandatory_argument_names_year_dayofyear[1], static_cast(&isNumber), nullptr, "Number"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); } return std::make_shared(); @@ -193,7 +193,7 @@ public: {mandatory_argument_names[0], static_cast(&isNumber), nullptr, "Number"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } @@ -357,7 +357,7 @@ public: {optional_argument_names[0], static_cast(&isString), isColumnConst, "const String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); /// Optional timezone argument std::string timezone; @@ -440,7 +440,7 @@ public: {optional_argument_names[2], static_cast(&isString), isColumnConst, "const String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); if (arguments.size() >= mandatory_argument_names.size() + 1) { @@ -572,7 +572,7 @@ public: {optional_argument_names[0], static_cast(&isString), isColumnConst, "const String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); /// Optional timezone argument std::string timezone; @@ -652,7 +652,7 @@ public: {optional_argument_names[0], static_cast(&isString), isColumnConst, "const String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); /// Optional precision argument auto precision = DEFAULT_PRECISION; diff --git a/src/Functions/map.cpp b/src/Functions/map.cpp index 66cd10a3f0b..0d42ce2abe1 100644 --- a/src/Functions/map.cpp +++ b/src/Functions/map.cpp @@ -10,6 +10,7 @@ #include #include #include +#include namespace DB diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index d3bf5618f66..c4b675fcf6c 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Functions/neighbor.cpp b/src/Functions/neighbor.cpp index 62f129109f9..49b8980e7e2 100644 --- a/src/Functions/neighbor.cpp +++ b/src/Functions/neighbor.cpp @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/Functions/now.cpp b/src/Functions/now.cpp index 827b800a243..352c05f8f32 100644 --- a/src/Functions/now.cpp +++ b/src/Functions/now.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -90,7 +91,7 @@ public: size_t getNumberOfArguments() const override { return 0; } static FunctionOverloadResolverPtr create(ContextPtr context) { return std::make_unique(context); } explicit NowOverloadResolver(ContextPtr context) - : allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments) + : allow_nonconst_timezone_arguments(context->getSettingsRef().allow_nonconst_timezone_arguments) {} DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override diff --git a/src/Functions/now64.cpp b/src/Functions/now64.cpp index d6f8474c984..23ab8cad003 100644 --- a/src/Functions/now64.cpp +++ b/src/Functions/now64.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -118,7 +119,7 @@ public: size_t getNumberOfArguments() const override { return 0; } static FunctionOverloadResolverPtr create(ContextPtr context) { return std::make_unique(context); } explicit Now64OverloadResolver(ContextPtr context) - : allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments) + : allow_nonconst_timezone_arguments(context->getSettingsRef().allow_nonconst_timezone_arguments) {} DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override diff --git a/src/Functions/nowInBlock.cpp b/src/Functions/nowInBlock.cpp index 74f420986c8..291e43bdce5 100644 --- a/src/Functions/nowInBlock.cpp +++ b/src/Functions/nowInBlock.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -31,7 +32,7 @@ public: return std::make_shared(context); } explicit FunctionNowInBlock(ContextPtr context) - : allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments) + : allow_nonconst_timezone_arguments(context->getSettingsRef().allow_nonconst_timezone_arguments) {} String getName() const override diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp index 162b8c58873..d2353c19a61 100644 --- a/src/Functions/parseDateTime.cpp +++ b/src/Functions/parseDateTime.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -564,8 +565,8 @@ namespace static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } explicit FunctionParseDateTimeImpl(ContextPtr context) - : mysql_M_is_month_name(context->getSettings().formatdatetime_parsedatetime_m_is_month_name) - , mysql_parse_ckl_without_leading_zeros(context->getSettings().parsedatetime_parse_without_leading_zeros) + : mysql_M_is_month_name(context->getSettingsRef().formatdatetime_parsedatetime_m_is_month_name) + , mysql_parse_ckl_without_leading_zeros(context->getSettingsRef().parsedatetime_parse_without_leading_zeros) { } @@ -589,7 +590,7 @@ namespace {"timezone", static_cast(&isString), &isColumnConst, "const String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); String time_zone_name = getTimeZone(arguments).getTimeZone(); DataTypePtr date_type = std::make_shared(time_zone_name); diff --git a/src/Functions/parseReadableSize.cpp b/src/Functions/parseReadableSize.cpp index f5c2c53439b..1abcf7f164f 100644 --- a/src/Functions/parseReadableSize.cpp +++ b/src/Functions/parseReadableSize.cpp @@ -68,7 +68,7 @@ public: { {"readable_size", static_cast(&isString), nullptr, "String"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); DataTypePtr return_type = std::make_shared(); if constexpr (error_handling == ErrorHandling::Null) return std::make_shared(return_type); diff --git a/src/Functions/partitionId.cpp b/src/Functions/partitionId.cpp index e2e9038cd8b..dab56f39adc 100644 --- a/src/Functions/partitionId.cpp +++ b/src/Functions/partitionId.cpp @@ -66,6 +66,7 @@ public: REGISTER_FUNCTION(PartitionId) { factory.registerFunction(); + factory.registerAlias("partitionID", "partitionId"); } } diff --git a/src/Functions/pointInPolygon.cpp b/src/Functions/pointInPolygon.cpp index 6b413829bd1..32d5905a513 100644 --- a/src/Functions/pointInPolygon.cpp +++ b/src/Functions/pointInPolygon.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Functions/regexpExtract.cpp b/src/Functions/regexpExtract.cpp index cfb42580cb0..3cc5393296c 100644 --- a/src/Functions/regexpExtract.cpp +++ b/src/Functions/regexpExtract.cpp @@ -54,7 +54,7 @@ public: if (arguments.size() == 3) args.emplace_back(FunctionArgumentDescriptor{"index", static_cast(&isInteger), nullptr, "Integer"}); - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/repeat.cpp b/src/Functions/repeat.cpp index 7f2fe646062..aa90bf2490d 100644 --- a/src/Functions/repeat.cpp +++ b/src/Functions/repeat.cpp @@ -201,7 +201,7 @@ public: {"n", static_cast(&isInteger), nullptr, "Integer"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/runningAccumulate.cpp b/src/Functions/runningAccumulate.cpp index d585affd91b..c94cbe67216 100644 --- a/src/Functions/runningAccumulate.cpp +++ b/src/Functions/runningAccumulate.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/src/Functions/runningDifference.h b/src/Functions/runningDifference.h index fe477d13744..d51ea805f3c 100644 --- a/src/Functions/runningDifference.h +++ b/src/Functions/runningDifference.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include #include #include #include diff --git a/src/Functions/seriesDecomposeSTL.cpp b/src/Functions/seriesDecomposeSTL.cpp index 618808b64ed..720aa1e0799 100644 --- a/src/Functions/seriesDecomposeSTL.cpp +++ b/src/Functions/seriesDecomposeSTL.cpp @@ -45,7 +45,7 @@ public: {"time_series", static_cast(&isArray), nullptr, "Array"}, {"period", static_cast(&isNativeUInt), nullptr, "Unsigned Integer"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(std::make_shared(std::make_shared())); } diff --git a/src/Functions/seriesOutliersDetectTukey.cpp b/src/Functions/seriesOutliersDetectTukey.cpp index 81fc904e16e..4063d0ab85b 100644 --- a/src/Functions/seriesOutliersDetectTukey.cpp +++ b/src/Functions/seriesOutliersDetectTukey.cpp @@ -51,7 +51,7 @@ public: {"max_percentile", static_cast(&isFloat), isColumnConst, "Number"}, {"k", static_cast(&isNativeNumber), isColumnConst, "Number"}}; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(std::make_shared()); } diff --git a/src/Functions/seriesPeriodDetectFFT.cpp b/src/Functions/seriesPeriodDetectFFT.cpp index e85b3a97c67..471354235d5 100644 --- a/src/Functions/seriesPeriodDetectFFT.cpp +++ b/src/Functions/seriesPeriodDetectFFT.cpp @@ -53,7 +53,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{{"time_series", static_cast(&isArray), nullptr, "Array"}}; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/serverConstants.cpp b/src/Functions/serverConstants.cpp index e7e423058f1..b74edfc3121 100644 --- a/src/Functions/serverConstants.cpp +++ b/src/Functions/serverConstants.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Functions/sleep.h b/src/Functions/sleep.h index 22748e86888..62ee19fa904 100644 --- a/src/Functions/sleep.h +++ b/src/Functions/sleep.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Functions/snowflake.cpp b/src/Functions/snowflake.cpp index 5ff8a636058..0d4d65ee4b0 100644 --- a/src/Functions/snowflake.cpp +++ b/src/Functions/snowflake.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include @@ -64,7 +65,7 @@ public: FunctionArgumentDescriptors args{ {"value", static_cast(&isDateTime), nullptr, "DateTime"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } @@ -121,7 +122,7 @@ public: FunctionArgumentDescriptors optional_args{ {"time_zone", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); String timezone; if (arguments.size() == 2) @@ -190,7 +191,7 @@ public: FunctionArgumentDescriptors args{ {"value", static_cast(&isDateTime64), nullptr, "DateTime64"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } @@ -255,7 +256,7 @@ public: FunctionArgumentDescriptors optional_args{ {"time_zone", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); String timezone; if (arguments.size() == 2) diff --git a/src/Functions/snowflakeIDToDateTime.cpp b/src/Functions/snowflakeIDToDateTime.cpp index b799792a56f..6c57c72fff1 100644 --- a/src/Functions/snowflakeIDToDateTime.cpp +++ b/src/Functions/snowflakeIDToDateTime.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include @@ -38,7 +39,7 @@ public: static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } explicit FunctionSnowflakeIDToDateTime(ContextPtr context) - : allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments) + : allow_nonconst_timezone_arguments(context->getSettingsRef().allow_nonconst_timezone_arguments) {} String getName() const override { return name; } @@ -56,7 +57,7 @@ public: {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"}, {"time_zone", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, args, optional_args); + validateFunctionArguments(*this, arguments, args, optional_args); String timezone; if (arguments.size() == 3) @@ -109,7 +110,7 @@ public: static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } explicit FunctionSnowflakeIDToDateTime64(ContextPtr context) - : allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments) + : allow_nonconst_timezone_arguments(context->getSettingsRef().allow_nonconst_timezone_arguments) {} String getName() const override { return name; } @@ -127,7 +128,7 @@ public: {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"}, {"time_zone", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, args, optional_args); + validateFunctionArguments(*this, arguments, args, optional_args); String timezone; if (arguments.size() == 3) diff --git a/src/Functions/space.cpp b/src/Functions/space.cpp index 83183c991bc..ce12f2f541c 100644 --- a/src/Functions/space.cpp +++ b/src/Functions/space.cpp @@ -48,7 +48,7 @@ public: {"n", static_cast(&isInteger), nullptr, "Integer"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/sqid.cpp b/src/Functions/sqid.cpp index 6679646fef4..0e133590b84 100644 --- a/src/Functions/sqid.cpp +++ b/src/Functions/sqid.cpp @@ -100,7 +100,7 @@ public: FunctionArgumentDescriptors args{ {"sqid", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(std::make_shared()); } diff --git a/src/Functions/stem.cpp b/src/Functions/stem.cpp index d326d17a7a8..5b845cf332b 100644 --- a/src/Functions/stem.cpp +++ b/src/Functions/stem.cpp @@ -3,6 +3,7 @@ #if USE_NLP #include +#include #include #include #include diff --git a/src/Functions/synonyms.cpp b/src/Functions/synonyms.cpp index 6b3e205785c..28dd83627d9 100644 --- a/src/Functions/synonyms.cpp +++ b/src/Functions/synonyms.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Functions/throwIf.cpp b/src/Functions/throwIf.cpp index c6ff5b9151f..becc6d2f772 100644 --- a/src/Functions/throwIf.cpp +++ b/src/Functions/throwIf.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Functions/timestamp.cpp b/src/Functions/timestamp.cpp index fbca08b0968..6f2bd2030d5 100644 --- a/src/Functions/timestamp.cpp +++ b/src/Functions/timestamp.cpp @@ -46,7 +46,7 @@ public: FunctionArgumentDescriptors optional_args{ {"time", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(DATETIME_SCALE); } diff --git a/src/Functions/toDecimalString.cpp b/src/Functions/toDecimalString.cpp index fc621b272de..4ee664ad237 100644 --- a/src/Functions/toDecimalString.cpp +++ b/src/Functions/toDecimalString.cpp @@ -43,7 +43,7 @@ public: {"precision", static_cast(&isNativeInteger), &isColumnConst, "const Integer"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, {}); + validateFunctionArguments(*this, arguments, mandatory_args, {}); return std::make_shared(); } diff --git a/src/Functions/toTimezone.cpp b/src/Functions/toTimezone.cpp index a0d90351898..cbcdf9bc772 100644 --- a/src/Functions/toTimezone.cpp +++ b/src/Functions/toTimezone.cpp @@ -9,6 +9,7 @@ #include #include +#include namespace DB { @@ -88,7 +89,7 @@ public: size_t getNumberOfArguments() const override { return 2; } static FunctionOverloadResolverPtr create(ContextPtr context) { return std::make_unique(context); } explicit ToTimeZoneOverloadResolver(ContextPtr context) - : allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments) + : allow_nonconst_timezone_arguments(context->getSettingsRef().allow_nonconst_timezone_arguments) {} DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override diff --git a/src/Functions/toTypeName.cpp b/src/Functions/toTypeName.cpp index 63bcc58d65e..abe02148d4a 100644 --- a/src/Functions/toTypeName.cpp +++ b/src/Functions/toTypeName.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include diff --git a/src/Functions/tuple.cpp b/src/Functions/tuple.cpp index c83195bc976..84772f79d08 100644 --- a/src/Functions/tuple.cpp +++ b/src/Functions/tuple.cpp @@ -1,11 +1,28 @@ #include +#include + namespace DB { +FunctionPtr FunctionTuple::create(DB::ContextPtr context) +{ + return std::make_shared(context->getSettingsRef().enable_named_columns_in_function_tuple); +} + REGISTER_FUNCTION(Tuple) { - factory.registerFunction(); + factory.registerFunction(FunctionDocumentation{ + .description = R"( +Returns a tuple by grouping input arguments. + +For columns C1, C2, ... with the types T1, T2, ..., it returns a named Tuple(C1 T1, C2 T2, ...) type tuple containing these columns if their names are unique and can be treated as unquoted identifiers, otherwise a Tuple(T1, T2, ...) is returned. There is no cost to execute the function. +Tuples are normally used as intermediate values for an argument of IN operators, or for creating a list of formal parameters of lambda functions. Tuples can’t be written to a table. + +The function implements the operator `(x, y, ...)`. +)", + .examples{{"typical", "SELECT tuple(1, 2)", "(1,2)"}}, + .categories{"Miscellaneous"}}); } } diff --git a/src/Functions/tuple.h b/src/Functions/tuple.h index 8b3e041f781..03ba7ec9ef2 100644 --- a/src/Functions/tuple.h +++ b/src/Functions/tuple.h @@ -6,20 +6,24 @@ #include #include #include +#include namespace DB { -/** tuple(x, y, ...) is a function that allows you to group several columns +/** tuple(x, y, ...) is a function that allows you to group several columns. * tupleElement(tuple, n) is a function that allows you to retrieve a column from tuple. */ class FunctionTuple : public IFunction { + bool enable_named_columns; + public: static constexpr auto name = "tuple"; - /// maybe_unused: false-positive - [[ maybe_unused ]] static FunctionPtr create(ContextPtr) { return std::make_shared(); } + static FunctionPtr create(ContextPtr context); + + explicit FunctionTuple(bool enable_named_columns_ = false) : enable_named_columns(enable_named_columns_) { } String getName() const override { return name; } @@ -38,9 +42,26 @@ public: bool useDefaultImplementationForConstants() const override { return true; } bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - return std::make_shared(arguments); + if (arguments.empty()) + return std::make_shared(DataTypes{}); + + DataTypes types; + Names names; + NameSet name_set; + for (const auto & argument : arguments) + { + types.emplace_back(argument.type); + names.emplace_back(argument.name); + name_set.emplace(argument.name); + } + + if (enable_named_columns && name_set.size() == names.size() + && std::all_of(names.cbegin(), names.cend(), [](const auto & n) { return isUnquotedIdentifier(n); })) + return std::make_shared(types, names); + else + return std::make_shared(types); } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override @@ -53,9 +74,9 @@ public: for (size_t i = 0; i < tuple_size; ++i) { /** If tuple is mixed of constant and not constant columns, - * convert all to non-constant columns, - * because many places in code expect all non-constant columns in non-constant tuple. - */ + * convert all to non-constant columns, + * because many places in code expect all non-constant columns in non-constant tuple. + */ tuple_columns[i] = arguments[i].column->convertToFullColumnIfConst(); } return ColumnTuple::create(tuple_columns); diff --git a/src/Functions/tupleNames.cpp b/src/Functions/tupleNames.cpp new file mode 100644 index 00000000000..e444478c224 --- /dev/null +++ b/src/Functions/tupleNames.cpp @@ -0,0 +1,118 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +namespace +{ + +/** Transform a named tuple into names, which is a constant array of strings. + */ +class ExecutableFunctionTupleNames : public IExecutableFunction +{ +public: + static constexpr auto name = "tupleNames"; + + explicit ExecutableFunctionTupleNames(Array name_fields_) : name_fields(std::move(name_fields_)) { } + + String getName() const override { return name; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr & result_type, size_t input_rows_count) const override + { + return result_type->createColumnConst(input_rows_count, name_fields); + } + +private: + Array name_fields; +}; + +class FunctionBaseTupleNames : public IFunctionBase +{ +public: + static constexpr auto name = "tupleNames"; + + explicit FunctionBaseTupleNames(DataTypePtr argument_type, DataTypePtr result_type_, Array name_fields_) + : argument_types({std::move(argument_type)}), result_type(std::move(result_type_)), name_fields(std::move(name_fields_)) + { + } + + String getName() const override { return name; } + + bool isSuitableForConstantFolding() const override { return true; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + const DataTypes & getArgumentTypes() const override { return argument_types; } + + const DataTypePtr & getResultType() const override { return result_type; } + + ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName &) const override + { + return std::make_unique(name_fields); + } + +private: + DataTypes argument_types; + DataTypePtr result_type; + Array name_fields; +}; + +class TupleNamesOverloadResolver : public IFunctionOverloadResolver +{ +public: + static constexpr auto name = "tupleNames"; + + static FunctionOverloadResolverPtr create(ContextPtr) { return std::make_unique(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + const DataTypeTuple * tuple = checkAndGetDataType(arguments[0].type.get()); + + if (!tuple) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a tuple", getName()); + + return std::make_shared(std::make_shared()); + } + + FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const override + { + const DataTypeTuple * tuple = checkAndGetDataType(arguments[0].type.get()); + + if (!tuple) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a tuple", getName()); + + DataTypes types = tuple->getElements(); + Array name_fields; + for (const auto & elem_name : tuple->getElementNames()) + name_fields.emplace_back(elem_name); + + return std::make_unique(arguments[0].type, result_type, std::move(name_fields)); + } +}; + +} + +REGISTER_FUNCTION(TupleNames) +{ + factory.registerFunction(FunctionDocumentation{ + .description = R"( +Converts a tuple into an array of column names. For a tuple in the form `Tuple(a T, b T, ...)`, it returns an array of strings representing the named columns of the tuple. If the tuple elements do not have explicit names, their indices will be used as the column names instead. +)", + .examples{{"typical", "SELECT tupleNames(tuple(1 as a, 2 as b))", "['a','b']"}}, + .categories{"Miscellaneous"}}); +} + +} diff --git a/src/Functions/visibleWidth.cpp b/src/Functions/visibleWidth.cpp index 9a3edd9fbec..ebd4a1ff713 100644 --- a/src/Functions/visibleWidth.cpp +++ b/src/Functions/visibleWidth.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include diff --git a/src/Functions/widthBucket.cpp b/src/Functions/widthBucket.cpp index 62ed460ca9d..e804808b699 100644 --- a/src/Functions/widthBucket.cpp +++ b/src/Functions/widthBucket.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -15,6 +16,7 @@ #include #include #include +#include #include #include diff --git a/src/IO/Archives/hasRegisteredArchiveFileExtension.cpp b/src/IO/Archives/hasRegisteredArchiveFileExtension.cpp index 2a979f500f7..407977f1f13 100644 --- a/src/IO/Archives/hasRegisteredArchiveFileExtension.cpp +++ b/src/IO/Archives/hasRegisteredArchiveFileExtension.cpp @@ -1,5 +1,7 @@ #include +#include +#include namespace DB { diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp index 8bd436f218c..c10a7cd017a 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp @@ -16,10 +16,12 @@ namespace ProfileEvents { extern const Event AzureCopyObject; - extern const Event AzureUploadPart; + extern const Event AzureStageBlock; + extern const Event AzureCommitBlockList; extern const Event DiskAzureCopyObject; - extern const Event DiskAzureUploadPart; + extern const Event DiskAzureStageBlock; + extern const Event DiskAzureCommitBlockList; } @@ -45,9 +47,9 @@ namespace size_t total_size_, const String & dest_container_for_logging_, const String & dest_blob_, - std::shared_ptr settings_, + std::shared_ptr settings_, ThreadPoolCallbackRunnerUnsafe schedule_, - const Poco::Logger * log_) + LoggerPtr log_) : create_read_buffer(create_read_buffer_) , client(client_) , offset (offset_) @@ -70,9 +72,9 @@ namespace size_t total_size; const String & dest_container_for_logging; const String & dest_blob; - std::shared_ptr settings; + std::shared_ptr settings; ThreadPoolCallbackRunnerUnsafe schedule; - const Poco::Logger * log; + const LoggerPtr log; size_t max_single_part_upload_size; struct UploadPartTask @@ -81,7 +83,6 @@ namespace size_t part_size; std::vector block_ids; bool is_finished = false; - std::exception_ptr exception; }; size_t normal_part_size; @@ -90,6 +91,7 @@ namespace std::list TSA_GUARDED_BY(bg_tasks_mutex) bg_tasks; int num_added_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0; int num_finished_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0; + std::exception_ptr bg_exception TSA_GUARDED_BY(bg_tasks_mutex); std::mutex bg_tasks_mutex; std::condition_variable bg_tasks_condvar; @@ -156,6 +158,10 @@ namespace void completeMultipartUpload() { auto block_blob_client = client->GetBlockBlobClient(dest_blob); + ProfileEvents::increment(ProfileEvents::AzureCommitBlockList); + if (client->GetClickhouseOptions().IsClientForDisk) + ProfileEvents::increment(ProfileEvents::DiskAzureCommitBlockList); + block_blob_client.CommitBlockList(block_ids); } @@ -180,7 +186,7 @@ namespace } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log, fmt::format("While performing multipart upload of blob {} in container {}", dest_blob, dest_container_for_logging)); waitForAllBackgroundTasks(); throw; } @@ -236,7 +242,12 @@ namespace } catch (...) { - task->exception = std::current_exception(); + std::lock_guard lock(bg_tasks_mutex); + if (!bg_exception) + { + tryLogCurrentException(log, "While writing part"); + bg_exception = std::current_exception(); /// The exception will be rethrown after all background tasks stop working. + } } task_finish_notify(); }, Priority{}); @@ -259,9 +270,9 @@ namespace void processUploadPartRequest(UploadPartTask & task) { - ProfileEvents::increment(ProfileEvents::AzureUploadPart); + ProfileEvents::increment(ProfileEvents::AzureStageBlock); if (client->GetClickhouseOptions().IsClientForDisk) - ProfileEvents::increment(ProfileEvents::DiskAzureUploadPart); + ProfileEvents::increment(ProfileEvents::DiskAzureStageBlock); auto block_blob_client = client->GetBlockBlobClient(dest_blob); auto read_buffer = std::make_unique(create_read_buffer(), task.part_offset, task.part_size); @@ -293,13 +304,13 @@ namespace /// Suppress warnings because bg_tasks_mutex is actually hold, but tsa annotations do not understand std::unique_lock bg_tasks_condvar.wait(lock, [this]() {return TSA_SUPPRESS_WARNING_FOR_READ(num_added_bg_tasks) == TSA_SUPPRESS_WARNING_FOR_READ(num_finished_bg_tasks); }); - auto & tasks = TSA_SUPPRESS_WARNING_FOR_WRITE(bg_tasks); - for (auto & task : tasks) - { - if (task.exception) - std::rethrow_exception(task.exception); + auto exception = TSA_SUPPRESS_WARNING_FOR_READ(bg_exception); + if (exception) + std::rethrow_exception(exception); + + const auto & tasks = TSA_SUPPRESS_WARNING_FOR_READ(bg_tasks); + for (const auto & task : tasks) block_ids.insert(block_ids.end(),task.block_ids.begin(), task.block_ids.end()); - } } }; } @@ -312,10 +323,11 @@ void copyDataToAzureBlobStorageFile( std::shared_ptr dest_client, const String & dest_container_for_logging, const String & dest_blob, - std::shared_ptr settings, + std::shared_ptr settings, ThreadPoolCallbackRunnerUnsafe schedule) { - UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, &Poco::Logger::get("copyDataToAzureBlobStorageFile")}; + auto log = getLogger("copyDataToAzureBlobStorageFile"); + UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, log}; helper.performCopy(); } @@ -329,14 +341,15 @@ void copyAzureBlobStorageFile( size_t size, const String & dest_container_for_logging, const String & dest_blob, - std::shared_ptr settings, + std::shared_ptr settings, const ReadSettings & read_settings, ThreadPoolCallbackRunnerUnsafe schedule) { + auto log = getLogger("copyAzureBlobStorageFile"); if (settings->use_native_copy) { - LOG_TRACE(getLogger("copyAzureBlobStorageFile"), "Copying Blob: {} from Container: {} using native copy", src_container_for_logging, src_blob); + LOG_TRACE(log, "Copying Blob: {} from Container: {} using native copy", src_container_for_logging, src_blob); ProfileEvents::increment(ProfileEvents::AzureCopyObject); if (dest_client->GetClickhouseOptions().IsClientForDisk) ProfileEvents::increment(ProfileEvents::DiskAzureCopyObject); @@ -347,7 +360,7 @@ void copyAzureBlobStorageFile( if (size < settings->max_single_part_copy_size) { - LOG_TRACE(getLogger("copyAzureBlobStorageFile"), "Copy blob sync {} -> {}", src_blob, dest_blob); + LOG_TRACE(log, "Copy blob sync {} -> {}", src_blob, dest_blob); block_blob_client_dest.CopyFromUri(source_uri); } else @@ -363,7 +376,7 @@ void copyAzureBlobStorageFile( if (copy_status.HasValue() && copy_status.Value() == Azure::Storage::Blobs::Models::CopyStatus::Success) { - LOG_TRACE(getLogger("copyAzureBlobStorageFile"), "Copy of {} to {} finished", properties_model.CopySource.Value(), dest_blob); + LOG_TRACE(log, "Copy of {} to {} finished", properties_model.CopySource.Value(), dest_blob); } else { @@ -377,14 +390,14 @@ void copyAzureBlobStorageFile( } else { - LOG_TRACE(&Poco::Logger::get("copyAzureBlobStorageFile"), "Reading from Container: {}, Blob: {}", src_container_for_logging, src_blob); + LOG_TRACE(log, "Reading from Container: {}, Blob: {}", src_container_for_logging, src_blob); auto create_read_buffer = [&] { return std::make_unique( src_client, src_blob, read_settings, settings->max_single_read_retries, settings->max_single_download_retries); }; - UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, &Poco::Logger::get("copyAzureBlobStorageFile")}; + UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, log}; helper.performCopy(); } } diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h index 9c20ee4cff0..c8e48fcd372 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h @@ -28,7 +28,7 @@ void copyAzureBlobStorageFile( size_t src_size, const String & dest_container_for_logging, const String & dest_blob, - std::shared_ptr settings, + std::shared_ptr settings, const ReadSettings & read_settings, ThreadPoolCallbackRunnerUnsafe schedule_ = {}); @@ -45,7 +45,7 @@ void copyDataToAzureBlobStorageFile( std::shared_ptr client, const String & dest_container_for_logging, const String & dest_blob, - std::shared_ptr settings, + std::shared_ptr settings, ThreadPoolCallbackRunnerUnsafe schedule_ = {}); } diff --git a/src/IO/BufferWithOwnMemory.h b/src/IO/BufferWithOwnMemory.h index 5c9a69893df..da38bccdea1 100644 --- a/src/IO/BufferWithOwnMemory.h +++ b/src/IO/BufferWithOwnMemory.h @@ -4,12 +4,15 @@ #include #include +#include #include #include #include +#include "config.h" + namespace ProfileEvents { @@ -41,10 +44,13 @@ struct Memory : boost::noncopyable, Allocator char * m_data = nullptr; size_t alignment = 0; + [[maybe_unused]] bool allow_gwp_asan_force_sample{false}; + Memory() = default; /// If alignment != 0, then allocate memory aligned to specified value. - explicit Memory(size_t size_, size_t alignment_ = 0) : alignment(alignment_) + explicit Memory(size_t size_, size_t alignment_ = 0, bool allow_gwp_asan_force_sample_ = false) + : alignment(alignment_), allow_gwp_asan_force_sample(allow_gwp_asan_force_sample_) { alloc(size_); } @@ -127,6 +133,11 @@ private: ProfileEvents::increment(ProfileEvents::IOBufferAllocs); ProfileEvents::increment(ProfileEvents::IOBufferAllocBytes, new_capacity); +#if USE_GWP_ASAN + if (unlikely(allow_gwp_asan_force_sample && GWPAsan::shouldForceSample())) + gwp_asan::getThreadLocals()->NextSampleCounter = 1; +#endif + m_data = static_cast(Allocator::alloc(new_capacity, alignment)); m_capacity = new_capacity; m_size = new_size; @@ -154,7 +165,7 @@ protected: public: /// If non-nullptr 'existing_memory' is passed, then buffer will not create its own memory and will use existing_memory without ownership. explicit BufferWithOwnMemory(size_t size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0) - : Base(nullptr, 0), memory(existing_memory ? 0 : size, alignment) + : Base(nullptr, 0), memory(existing_memory ? 0 : size, alignment, /*allow_gwp_asan_force_sample_=*/true) { Base::set(existing_memory ? existing_memory : memory.data(), size); Base::padded = !existing_memory; diff --git a/src/IO/ConnectionTimeouts.cpp b/src/IO/ConnectionTimeouts.cpp index da6214ae477..662a2c067cf 100644 --- a/src/IO/ConnectionTimeouts.cpp +++ b/src/IO/ConnectionTimeouts.cpp @@ -1,7 +1,10 @@ +#include +#include #include -#include #include +#include + namespace DB { diff --git a/src/IO/ConnectionTimeouts.h b/src/IO/ConnectionTimeouts.h index b86ec44d21c..b84db46d656 100644 --- a/src/IO/ConnectionTimeouts.h +++ b/src/IO/ConnectionTimeouts.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include @@ -10,6 +9,7 @@ namespace DB { +struct ServerSettings; struct Settings; #define APPLY_FOR_ALL_CONNECTION_TIMEOUT_MEMBERS(M) \ diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index 303ffb744b5..b753e66da48 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -713,8 +713,12 @@ ReadWriteBufferFromHTTP::HTTPFileInfo ReadWriteBufferFromHTTP::getFileInfo() /// fall back to slow whole-file reads when HEAD is actually supported; that sounds /// like a nightmare to debug.) if (e.getHTTPStatus() >= 400 && e.getHTTPStatus() <= 499 && - e.getHTTPStatus() != Poco::Net::HTTPResponse::HTTP_TOO_MANY_REQUESTS) + e.getHTTPStatus() != Poco::Net::HTTPResponse::HTTP_TOO_MANY_REQUESTS && + e.getHTTPStatus() != Poco::Net::HTTPResponse::HTTP_REQUEST_TIMEOUT && + e.getHTTPStatus() != Poco::Net::HTTPResponse::HTTP_MISDIRECTED_REQUEST) + { return HTTPFileInfo{}; + } throw; } diff --git a/src/IO/S3/BlobStorageLogWriter.cpp b/src/IO/S3/BlobStorageLogWriter.cpp index c2f0cb86928..d3b97771790 100644 --- a/src/IO/S3/BlobStorageLogWriter.cpp +++ b/src/IO/S3/BlobStorageLogWriter.cpp @@ -56,7 +56,6 @@ void BlobStorageLogWriter::addEvent( BlobStorageLogWriterPtr BlobStorageLogWriter::create(const String & disk_name) { -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD /// Keeper standalone build doesn't have a context if (auto blob_storage_log = Context::getGlobalContextInstance()->getBlobStorageLog()) { auto log_writer = std::make_shared(std::move(blob_storage_log)); @@ -67,7 +66,6 @@ BlobStorageLogWriterPtr BlobStorageLogWriter::create(const String & disk_name) return log_writer; } -#endif return {}; } diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp index 55441cfb86b..3b958dea046 100644 --- a/src/IO/S3/Client.cpp +++ b/src/IO/S3/Client.cpp @@ -23,10 +23,11 @@ #include #include - #include #include +#include + #include diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp index bb654c3f5c9..361b5a707fd 100644 --- a/src/IO/S3/copyS3File.cpp +++ b/src/IO/S3/copyS3File.cpp @@ -98,7 +98,6 @@ namespace size_t part_size; String tag; bool is_finished = false; - std::exception_ptr exception; }; size_t num_parts; @@ -111,6 +110,7 @@ namespace size_t num_added_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0; size_t num_finished_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0; size_t num_finished_parts TSA_GUARDED_BY(bg_tasks_mutex) = 0; + std::exception_ptr bg_exception TSA_GUARDED_BY(bg_tasks_mutex); std::mutex bg_tasks_mutex; std::condition_variable bg_tasks_condvar; @@ -273,7 +273,7 @@ namespace } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log, fmt::format("While performing multipart upload of {}", dest_key)); // Multipart upload failed because it wasn't possible to schedule all the tasks. // To avoid execution of already scheduled tasks we abort MultipartUpload. abortMultipartUpload(); @@ -290,9 +290,9 @@ namespace if (!total_size) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chosen multipart upload for an empty file. This must not happen"); - auto max_part_number = request_settings.max_part_number; - auto min_upload_part_size = request_settings.min_upload_part_size; - auto max_upload_part_size = request_settings.max_upload_part_size; + UInt64 max_part_number = request_settings.max_part_number; + UInt64 min_upload_part_size = request_settings.min_upload_part_size; + UInt64 max_upload_part_size = request_settings.max_upload_part_size; if (!max_part_number) throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "max_part_number must not be 0"); @@ -385,7 +385,12 @@ namespace } catch (...) { - task->exception = std::current_exception(); + std::lock_guard lock(bg_tasks_mutex); + if (!bg_exception) + { + tryLogCurrentException(log, fmt::format("While writing part #{}", task->part_number)); + bg_exception = std::current_exception(); /// The exception will be rethrown after all background tasks stop working. + } } task_finish_notify(); }, Priority{}); @@ -435,22 +440,21 @@ namespace /// Suppress warnings because bg_tasks_mutex is actually hold, but tsa annotations do not understand std::unique_lock bg_tasks_condvar.wait(lock, [this]() {return TSA_SUPPRESS_WARNING_FOR_READ(num_added_bg_tasks) == TSA_SUPPRESS_WARNING_FOR_READ(num_finished_bg_tasks); }); - auto & tasks = TSA_SUPPRESS_WARNING_FOR_WRITE(bg_tasks); - for (auto & task : tasks) + auto exception = TSA_SUPPRESS_WARNING_FOR_READ(bg_exception); + if (exception) { - if (task.exception) - { - /// abortMultipartUpload() might be called already, see processUploadPartRequest(). - /// However if there were concurrent uploads at that time, those part uploads might or might not succeed. - /// As a result, it might be necessary to abort a given multipart upload multiple times in order to completely free - /// all storage consumed by all parts. - abortMultipartUpload(); + /// abortMultipartUpload() might be called already, see processUploadPartRequest(). + /// However if there were concurrent uploads at that time, those part uploads might or might not succeed. + /// As a result, it might be necessary to abort a given multipart upload multiple times in order to completely free + /// all storage consumed by all parts. + abortMultipartUpload(); - std::rethrow_exception(task.exception); - } - - part_tags.push_back(task.tag); + std::rethrow_exception(exception); } + + const auto & tasks = TSA_SUPPRESS_WARNING_FOR_READ(bg_tasks); + for (const auto & task : tasks) + part_tags.push_back(task.tag); } }; diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 490bf8c2d0c..a794cdbcf05 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "config.h" diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h index 2dca08871d3..fcfd0cfffae 100644 --- a/src/IO/S3Common.h +++ b/src/IO/S3Common.h @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include @@ -30,6 +30,8 @@ namespace ErrorCodes extern const int S3_ERROR; } +struct Settings; + class S3Exception : public Exception { public: diff --git a/src/IO/S3Settings.cpp b/src/IO/S3Settings.cpp index a5a50c873cb..f99e8c0105b 100644 --- a/src/IO/S3Settings.cpp +++ b/src/IO/S3Settings.cpp @@ -1,9 +1,11 @@ #include +#include #include -#include #include +#include + namespace DB { diff --git a/src/IO/examples/CMakeLists.txt b/src/IO/examples/CMakeLists.txt index 12b85c483a1..fc9d9c7dcd1 100644 --- a/src/IO/examples/CMakeLists.txt +++ b/src/IO/examples/CMakeLists.txt @@ -59,10 +59,10 @@ clickhouse_add_executable (parse_date_time_best_effort parse_date_time_best_effo target_link_libraries (parse_date_time_best_effort PRIVATE clickhouse_common_io) clickhouse_add_executable (zlib_ng_bug zlib_ng_bug.cpp) -target_link_libraries (zlib_ng_bug PRIVATE ch_contrib::zlib) +target_link_libraries (zlib_ng_bug PRIVATE ch_contrib::zlib clickhouse_common_io) clickhouse_add_executable (dragonbox_test dragonbox_test.cpp) -target_link_libraries (dragonbox_test PRIVATE ch_contrib::dragonbox_to_chars) +target_link_libraries (dragonbox_test PRIVATE ch_contrib::dragonbox_to_chars clickhouse_common_io) clickhouse_add_executable (zstd_buffers zstd_buffers.cpp) target_link_libraries (zstd_buffers PRIVATE clickhouse_common_io) diff --git a/src/Interpreters/Access/InterpreterSetRoleQuery.cpp b/src/Interpreters/Access/InterpreterSetRoleQuery.cpp index 24467923542..99a7a73d46c 100644 --- a/src/Interpreters/Access/InterpreterSetRoleQuery.cpp +++ b/src/Interpreters/Access/InterpreterSetRoleQuery.cpp @@ -29,33 +29,12 @@ BlockIO InterpreterSetRoleQuery::execute() void InterpreterSetRoleQuery::setRole(const ASTSetRoleQuery & query) { - auto & access_control = getContext()->getAccessControl(); auto session_context = getContext()->getSessionContext(); - auto user = session_context->getUser(); if (query.kind == ASTSetRoleQuery::Kind::SET_ROLE_DEFAULT) - { session_context->setCurrentRolesDefault(); - } else - { - RolesOrUsersSet roles_from_query{*query.roles, access_control}; - std::vector new_current_roles; - if (roles_from_query.all) - { - new_current_roles = user->granted_roles.findGranted(roles_from_query); - } - else - { - for (const auto & id : roles_from_query.getMatchingIDs()) - { - if (!user->granted_roles.isGranted(id)) - throw Exception(ErrorCodes::SET_NON_GRANTED_ROLE, "Role should be granted to set current"); - new_current_roles.emplace_back(id); - } - } - session_context->setCurrentRoles(new_current_roles); - } + session_context->setCurrentRoles(RolesOrUsersSet{*query.roles, session_context->getAccessControl()}); } diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index c3285d73145..a874067e43c 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -405,10 +406,6 @@ Block createBlockForSet( } -ScopeStack::Level::Level() = default; -ScopeStack::Level::~Level() = default; -ScopeStack::Level::Level(Level &&) noexcept = default; - FutureSetPtr makeExplicitSet( const ASTFunction * node, const ActionsDAG & actions, ContextPtr context, PreparedSets & prepared_sets) { @@ -446,7 +443,7 @@ FutureSetPtr makeExplicitSet( else block = createBlockForSet(left_arg_type, right_arg, set_element_types, context); - return prepared_sets.addFromTuple(set_key, block, context->getSettings()); + return prepared_sets.addFromTuple(set_key, block, context->getSettingsRef()); } class ScopeStack::Index @@ -462,6 +459,7 @@ public: for (const auto * node : index) map.emplace(node->result_name, node); } + ~Index() = default; void addNode(const ActionsDAG::Node * node) { @@ -502,6 +500,10 @@ public: } }; +ScopeStack::Level::Level() = default; +ScopeStack::Level::~Level() = default; +ScopeStack::Level::Level(Level &&) noexcept = default; + ActionsMatcher::Data::Data( ContextPtr context_, SizeLimits set_size_limit_, diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 45b43ae2d3a..e073b7a49b6 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -78,115 +77,6 @@ namespace ErrorCodes namespace { -/** Collects observed HashMap-s sizes to avoid redundant intermediate resizes. - */ -class HashTablesStatistics -{ -public: - struct Entry - { - size_t sum_of_sizes; // used to determine if it's better to convert aggregation to two-level from the beginning - size_t median_size; // roughly the size we're going to preallocate on each thread - }; - - using Cache = DB::CacheBase; - using CachePtr = std::shared_ptr; - using Params = DB::Aggregator::Params::StatsCollectingParams; - - /// Collection and use of the statistics should be enabled. - std::optional getSizeHint(const Params & params) - { - if (!params.isCollectionAndUseEnabled()) - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled."); - - std::lock_guard lock(mutex); - const auto cache = getHashTableStatsCache(params, lock); - if (const auto hint = cache->get(params.key)) - { - LOG_TRACE( - getLogger("Aggregator"), - "An entry for key={} found in cache: sum_of_sizes={}, median_size={}", - params.key, - hint->sum_of_sizes, - hint->median_size); - return *hint; - } - return std::nullopt; - } - - /// Collection and use of the statistics should be enabled. - void update(size_t sum_of_sizes, size_t median_size, const Params & params) - { - if (!params.isCollectionAndUseEnabled()) - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled."); - - std::lock_guard lock(mutex); - const auto cache = getHashTableStatsCache(params, lock); - const auto hint = cache->get(params.key); - // We'll maintain the maximum among all the observed values until the next prediction turns out to be too wrong. - if (!hint || sum_of_sizes < hint->sum_of_sizes / 2 || hint->sum_of_sizes < sum_of_sizes || median_size < hint->median_size / 2 - || hint->median_size < median_size) - { - LOG_TRACE( - getLogger("Aggregator"), - "Statistics updated for key={}: new sum_of_sizes={}, median_size={}", - params.key, - sum_of_sizes, - median_size); - cache->set(params.key, std::make_shared(Entry{.sum_of_sizes = sum_of_sizes, .median_size = median_size})); - } - } - - std::optional getCacheStats() const - { - std::lock_guard lock(mutex); - if (hash_table_stats) - { - size_t hits = 0, misses = 0; - hash_table_stats->getStats(hits, misses); - return DB::HashTablesCacheStatistics{.entries = hash_table_stats->count(), .hits = hits, .misses = misses}; - } - return std::nullopt; - } - - static size_t calculateCacheKey(const DB::ASTPtr & select_query) - { - if (!select_query) - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Query ptr cannot be null"); - - const auto & select = select_query->as(); - - // It may happen in some corner cases like `select 1 as num group by num`. - if (!select.tables()) - return 0; - - SipHash hash; - hash.update(select.tables()->getTreeHash(/*ignore_aliases=*/ true)); - if (const auto where = select.where()) - hash.update(where->getTreeHash(/*ignore_aliases=*/ true)); - if (const auto group_by = select.groupBy()) - hash.update(group_by->getTreeHash(/*ignore_aliases=*/ true)); - return hash.get64(); - } - -private: - CachePtr getHashTableStatsCache(const Params & params, const std::lock_guard &) - { - if (!hash_table_stats || hash_table_stats->maxSizeInBytes() != params.max_entries_for_hash_table_stats) - hash_table_stats = std::make_shared(params.max_entries_for_hash_table_stats); - return hash_table_stats; - } - - mutable std::mutex mutex; - CachePtr hash_table_stats; -}; - -HashTablesStatistics & getHashTablesStatistics() -{ - static HashTablesStatistics hash_tables_stats; - return hash_tables_stats; -} - bool worthConvertToTwoLevel( size_t group_by_two_level_threshold, size_t result_size, size_t group_by_two_level_threshold_bytes, auto result_size_bytes) { @@ -215,49 +105,29 @@ void initDataVariantsWithSizeHint( DB::AggregatedDataVariants & result, DB::AggregatedDataVariants::Type method_chosen, const DB::Aggregator::Params & params) { const auto & stats_collecting_params = params.stats_collecting_params; - if (stats_collecting_params.isCollectionAndUseEnabled()) + const auto max_threads = params.group_by_two_level_threshold != 0 ? std::max(params.max_threads, 1ul) : 1; + if (auto hint = getSizeHint(stats_collecting_params, /*tables_cnt=*/max_threads)) { - if (auto hint = getHashTablesStatistics().getSizeHint(stats_collecting_params)) - { - const auto max_threads = params.group_by_two_level_threshold != 0 ? std::max(params.max_threads, 1ul) : 1; - const auto lower_limit = hint->sum_of_sizes / max_threads; - const auto upper_limit = stats_collecting_params.max_size_to_preallocate_for_aggregation / max_threads; - if (hint->median_size > upper_limit) - { - /// Since we cannot afford to preallocate as much as we want, we will likely need to do resize anyway. - /// But we will also work with the big (i.e. not so cache friendly) HT from the beginning which may result in a slight slowdown. - /// So let's just do nothing. - LOG_TRACE( - getLogger("Aggregator"), - "No space were preallocated in hash tables because 'max_size_to_preallocate_for_aggregation' has too small value: {}, " - "should be at least {}", - stats_collecting_params.max_size_to_preallocate_for_aggregation, - hint->median_size * max_threads); - } - /// https://github.com/ClickHouse/ClickHouse/issues/44402#issuecomment-1359920703 - else if ((max_threads > 1 && hint->sum_of_sizes > 100'000) || hint->sum_of_sizes > 500'000) - { - const auto adjusted = std::max(lower_limit, hint->median_size); - if (worthConvertToTwoLevel( - params.group_by_two_level_threshold, - hint->sum_of_sizes, - /*group_by_two_level_threshold_bytes*/ 0, - /*result_size_bytes*/ 0)) - method_chosen = convertToTwoLevelTypeIfPossible(method_chosen); - result.init(method_chosen, adjusted); - ProfileEvents::increment(ProfileEvents::AggregationHashTablesInitializedAsTwoLevel, result.isTwoLevel()); - return; - } - } + if (worthConvertToTwoLevel( + params.group_by_two_level_threshold, + hint->sum_of_sizes, + /*group_by_two_level_threshold_bytes*/ 0, + /*result_size_bytes*/ 0)) + method_chosen = convertToTwoLevelTypeIfPossible(method_chosen); + result.init(method_chosen, hint->median_size); } - result.init(method_chosen); + else + { + result.init(method_chosen); + } + ProfileEvents::increment(ProfileEvents::AggregationHashTablesInitializedAsTwoLevel, result.isTwoLevel()); } /// Collection and use of the statistics should be enabled. -void updateStatistics(const DB::ManyAggregatedDataVariants & data_variants, const DB::Aggregator::Params::StatsCollectingParams & params) +void updateStatistics(const DB::ManyAggregatedDataVariants & data_variants, const DB::StatsCollectingParams & params) { if (!params.isCollectionAndUseEnabled()) - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled."); + return; std::vector sizes(data_variants.size()); for (size_t i = 0; i < data_variants.size(); ++i) @@ -265,7 +135,7 @@ void updateStatistics(const DB::ManyAggregatedDataVariants & data_variants, cons const auto median_size = sizes.begin() + sizes.size() / 2; // not precisely though... std::nth_element(sizes.begin(), median_size, sizes.end()); const auto sum_of_sizes = std::accumulate(sizes.begin(), sizes.end(), 0ull); - getHashTablesStatistics().update(sum_of_sizes, *median_size, params); + DB::getHashTablesStatistics().update(sum_of_sizes, *median_size, params); } DB::ColumnNumbers calculateKeysPositions(const DB::Block & header, const DB::Aggregator::Params & params) @@ -300,24 +170,6 @@ size_t getMinBytesForPrefetch() namespace DB { -std::optional getHashTablesCacheStatistics() -{ - return getHashTablesStatistics().getCacheStats(); -} - -Aggregator::Params::StatsCollectingParams::StatsCollectingParams() = default; - -Aggregator::Params::StatsCollectingParams::StatsCollectingParams( - const ASTPtr & select_query_, - bool collect_hash_table_stats_during_aggregation_, - size_t max_entries_for_hash_table_stats_, - size_t max_size_to_preallocate_for_aggregation_) - : key(collect_hash_table_stats_during_aggregation_ ? HashTablesStatistics::calculateCacheKey(select_query_) : 0) - , max_entries_for_hash_table_stats(max_entries_for_hash_table_stats_) - , max_size_to_preallocate_for_aggregation(max_size_to_preallocate_for_aggregation_) -{ -} - Block Aggregator::getHeader(bool final) const { return params.getHeader(header, final); @@ -2783,8 +2635,7 @@ ManyAggregatedDataVariants Aggregator::prepareVariantsToMerge(ManyAggregatedData LOG_TRACE(log, "Merging aggregated data"); - if (params.stats_collecting_params.isCollectionAndUseEnabled()) - updateStatistics(data_variants, params.stats_collecting_params); + updateStatistics(data_variants, params.stats_collecting_params); ManyAggregatedDataVariants non_empty_data; non_empty_data.reserve(data_variants.size()); @@ -3486,4 +3337,23 @@ void Aggregator::destroyAllAggregateStates(AggregatedDataVariants & result) cons throw Exception(ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT, "Unknown aggregated data variant."); } +UInt64 calculateCacheKey(const DB::ASTPtr & select_query) +{ + if (!select_query) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Query ptr cannot be null"); + + const auto & select = select_query->as(); + + // It may happen in some corner cases like `select 1 as num group by num`. + if (!select.tables()) + return 0; + + SipHash hash; + hash.update(select.tables()->getTreeHash(/*ignore_aliases=*/true)); + if (const auto where = select.where()) + hash.update(where->getTreeHash(/*ignore_aliases=*/true)); + if (const auto group_by = select.groupBy()) + hash.update(group_by->getTreeHash(/*ignore_aliases=*/true)); + return hash.get64(); +} } diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index 406d28597cf..f4f1e9a1df3 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -39,9 +39,10 @@ #include -#include #include #include +#include +#include namespace DB { @@ -128,24 +129,6 @@ public: const double min_hit_rate_to_use_consecutive_keys_optimization; - struct StatsCollectingParams - { - StatsCollectingParams(); - - StatsCollectingParams( - const ASTPtr & select_query_, - bool collect_hash_table_stats_during_aggregation_, - size_t max_entries_for_hash_table_stats_, - size_t max_size_to_preallocate_for_aggregation_); - - bool isCollectionAndUseEnabled() const { return key != 0; } - void disable() { key = 0; } - - UInt64 key = 0; - const size_t max_entries_for_hash_table_stats = 0; - const size_t max_size_to_preallocate_for_aggregation = 0; - }; - StatsCollectingParams stats_collecting_params; Params( @@ -674,6 +657,7 @@ private: Arena * arena); }; +UInt64 calculateCacheKey(const DB::ASTPtr & select_query); /** Get the aggregation variant by its type. */ template Method & getDataVariant(AggregatedDataVariants & variants); @@ -685,13 +669,4 @@ APPLY_FOR_AGGREGATED_VARIANTS(M) #undef M - -struct HashTablesCacheStatistics -{ - size_t entries = 0; - size_t hits = 0; - size_t misses = 0; -}; - -std::optional getHashTablesCacheStatistics(); } diff --git a/src/Interpreters/ArrayJoinAction.cpp b/src/Interpreters/ArrayJoinAction.cpp index 54ff53764e7..df7a0b48057 100644 --- a/src/Interpreters/ArrayJoinAction.cpp +++ b/src/Interpreters/ArrayJoinAction.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index d72f3d81549..56055e7044c 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -726,7 +726,10 @@ try /// Access rights must be checked for the user who executed the initial INSERT query. if (key.user_id) - insert_context->setUser(*key.user_id, key.current_roles); + { + insert_context->setUser(*key.user_id); + insert_context->setCurrentRoles(key.current_roles); + } insert_context->setSettings(key.settings); diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 0d33e39ffa3..a3848fa3a75 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -30,6 +30,7 @@ namespace ProfileEvents extern const Event FilesystemCacheFailToReserveSpaceBecauseOfLockContention; extern const Event FilesystemCacheFreeSpaceKeepingThreadRun; extern const Event FilesystemCacheFreeSpaceKeepingThreadWorkMilliseconds; + extern const Event FilesystemCacheFailToReserveSpaceBecauseOfCacheResize; } namespace DB @@ -813,7 +814,7 @@ bool FileCache::tryReserve( /// ok compared to the number of cases this check will help. if (cache_is_being_resized.load(std::memory_order_relaxed)) { - ProfileEvents::increment(ProfileEvents::FilesystemCacheFailToReserveSpaceBecauseOfLockContention); + ProfileEvents::increment(ProfileEvents::FilesystemCacheFailToReserveSpaceBecauseOfCacheResize); return false; } diff --git a/src/Interpreters/Cache/Metadata.h b/src/Interpreters/Cache/Metadata.h index a5c8f3c0cf4..0e85ead3265 100644 --- a/src/Interpreters/Cache/Metadata.h +++ b/src/Interpreters/Cache/Metadata.h @@ -1,11 +1,16 @@ #pragma once + + #include +#include #include #include #include #include #include #include + +#include #include namespace DB @@ -30,7 +35,7 @@ struct FileSegmentMetadata : private boost::noncopyable explicit FileSegmentMetadata(FileSegmentPtr && file_segment_); - bool releasable() const { return file_segment.unique(); } + bool releasable() const { return isSharedPtrUnique(file_segment); } size_t size() const; diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp index e654d091561..e6ebf6ad50c 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include @@ -12,6 +12,8 @@ #include #include +#include + namespace DB { @@ -134,7 +136,7 @@ std::unique_ptr WriteBufferToFileSegment::getReadBufferImpl() if (file_segment->getDownloadedSize() > 0) return std::make_unique(file_segment->getPath()); else - return std::make_unique(); + return std::make_unique(); } } diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index 94a02eacdaa..e35d31d2350 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 91c0c592f28..5d56ef09127 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -8,23 +8,28 @@ #include #include #include +#include #include -#include -#include #include +#include +#include +#include +#include +#include #include +#include #include #include -#include #include +#include #include +#include #include #include #include -#include +#include #include -#include -#include +#include namespace DB { @@ -33,7 +38,6 @@ namespace ErrorCodes { extern const int TOO_LARGE_DISTRIBUTED_DEPTH; extern const int LOGICAL_ERROR; - extern const int CLUSTER_DOESNT_EXIST; extern const int UNEXPECTED_CLUSTER; } @@ -172,7 +176,7 @@ ContextMutablePtr updateSettingsAndClientInfoForCluster(const Cluster & cluster, /// in case of parallel replicas custom key use round robing load balancing /// so custom key partitions will be spread over nodes in round-robin fashion - if (context->canUseParallelReplicasCustomKey(cluster) && !settings.load_balancing.changed) + if (context->canUseParallelReplicasCustomKeyForCluster(cluster) && !settings.load_balancing.changed) { new_settings.load_balancing = LoadBalancing::ROUND_ROBIN; } @@ -180,6 +184,10 @@ ContextMutablePtr updateSettingsAndClientInfoForCluster(const Cluster & cluster, auto new_context = Context::createCopy(context); new_context->setSettings(new_settings); new_context->setClientInfo(new_client_info); + + if (context->canUseParallelReplicasCustomKeyForCluster(cluster)) + new_context->disableOffsetParallelReplicas(); + return new_context; } @@ -220,6 +228,35 @@ static ThrottlerPtr getThrottler(const ContextPtr & context) return throttler; } +AdditionalShardFilterGenerator +getShardFilterGeneratorForCustomKey(const Cluster & cluster, ContextPtr context, const ColumnsDescription & columns) +{ + if (!context->canUseParallelReplicasCustomKeyForCluster(cluster)) + return {}; + + const auto & settings = context->getSettingsRef(); + auto custom_key_ast = parseCustomKeyForTable(settings.parallel_replicas_custom_key, *context); + if (custom_key_ast == nullptr) + return {}; + + return [my_custom_key_ast = std::move(custom_key_ast), + column_description = columns, + custom_key_type = settings.parallel_replicas_custom_key_filter_type.value, + custom_key_range_lower = settings.parallel_replicas_custom_key_range_lower.value, + custom_key_range_upper = settings.parallel_replicas_custom_key_range_upper.value, + query_context = context, + replica_count = cluster.getShardsInfo().front().per_replica_pools.size()](uint64_t replica_num) -> ASTPtr + { + return getCustomKeyFilterForParallelReplica( + replica_count, + replica_num - 1, + my_custom_key_ast, + {custom_key_type, custom_key_range_lower, custom_key_range_upper}, + column_description, + query_context); + }; +} + void executeQuery( QueryPlan & query_plan, @@ -412,14 +449,7 @@ void executeQueryWithParallelReplicas( const auto & settings = context->getSettingsRef(); /// check cluster for parallel replicas - if (settings.cluster_for_parallel_replicas.value.empty()) - { - throw Exception( - ErrorCodes::CLUSTER_DOESNT_EXIST, - "Reading in parallel from replicas is enabled but cluster to execute query is not provided. Please set " - "'cluster_for_parallel_replicas' setting"); - } - auto not_optimized_cluster = context->getCluster(settings.cluster_for_parallel_replicas); + auto not_optimized_cluster = context->getClusterForParallelReplicas(); auto new_context = Context::createCopy(context); @@ -542,6 +572,84 @@ void executeQueryWithParallelReplicas( executeQueryWithParallelReplicas(query_plan, storage_id, header, processed_stage, modified_query_ast, context, storage_limits); } +void executeQueryWithParallelReplicasCustomKey( + QueryPlan & query_plan, + const StorageID & storage_id, + const SelectQueryInfo & query_info, + const ColumnsDescription & columns, + const StorageSnapshotPtr & snapshot, + QueryProcessingStage::Enum processed_stage, + const Block & header, + ContextPtr context) +{ + /// Return directly (with correct header) if no shard to query. + if (query_info.getCluster()->getShardsInfo().empty()) + { + if (context->getSettingsRef().allow_experimental_analyzer) + return; + + Pipe pipe(std::make_shared(header)); + auto read_from_pipe = std::make_unique(std::move(pipe)); + read_from_pipe->setStepDescription("Read from NullSource (Distributed)"); + query_plan.addStep(std::move(read_from_pipe)); + return; + } + + ColumnsDescriptionByShardNum columns_object; + if (hasDynamicSubcolumns(columns)) + columns_object = getExtendedObjectsOfRemoteTables(*query_info.cluster, storage_id, columns, context); + + ClusterProxy::SelectStreamFactory select_stream_factory + = ClusterProxy::SelectStreamFactory(header, columns_object, snapshot, processed_stage); + + auto shard_filter_generator = getShardFilterGeneratorForCustomKey(*query_info.getCluster(), context, columns); + + ClusterProxy::executeQuery( + query_plan, + header, + processed_stage, + storage_id, + /*table_func_ptr=*/nullptr, + select_stream_factory, + getLogger("executeQueryWithParallelReplicasCustomKey"), + context, + query_info, + /*sharding_key_expr=*/nullptr, + /*sharding_key_column_name=*/{}, + /*distributed_settings=*/{}, + shard_filter_generator, + /*is_remote_function=*/false); +} + +void executeQueryWithParallelReplicasCustomKey( + QueryPlan & query_plan, + const StorageID & storage_id, + const SelectQueryInfo & query_info, + const ColumnsDescription & columns, + const StorageSnapshotPtr & snapshot, + QueryProcessingStage::Enum processed_stage, + const QueryTreeNodePtr & query_tree, + ContextPtr context) +{ + auto header = InterpreterSelectQueryAnalyzer::getSampleBlock(query_tree, context, SelectQueryOptions(processed_stage).analyze()); + executeQueryWithParallelReplicasCustomKey(query_plan, storage_id, query_info, columns, snapshot, processed_stage, header, context); +} + +void executeQueryWithParallelReplicasCustomKey( + QueryPlan & query_plan, + const StorageID & storage_id, + SelectQueryInfo query_info, + const ColumnsDescription & columns, + const StorageSnapshotPtr & snapshot, + QueryProcessingStage::Enum processed_stage, + const ASTPtr & query_ast, + ContextPtr context) +{ + auto header = InterpreterSelectQuery(query_ast, context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); + query_info.query = ClusterProxy::rewriteSelectQuery( + context, query_info.query, storage_id.getDatabaseName(), storage_id.getTableName(), /*table_function_ptr=*/nullptr); + executeQueryWithParallelReplicasCustomKey(query_plan, storage_id, query_info, columns, snapshot, processed_stage, header, context); +} } } diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h index 6548edf8939..c22fcd24f03 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.h +++ b/src/Interpreters/ClusterProxy/executeQuery.h @@ -1,7 +1,7 @@ #pragma once -#include #include +#include #include namespace DB @@ -13,6 +13,11 @@ class Cluster; using ClusterPtr = std::shared_ptr; struct SelectQueryInfo; +class ColumnsDescription; +struct StorageSnapshot; + +using StorageSnapshotPtr = std::shared_ptr; + class Pipe; class QueryPlan; @@ -47,6 +52,9 @@ class SelectStreamFactory; ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table); using AdditionalShardFilterGenerator = std::function; +AdditionalShardFilterGenerator +getShardFilterGeneratorForCustomKey(const Cluster & cluster, ContextPtr context, const ColumnsDescription & columns); + /// Execute a distributed query, creating a query plan, from which the query pipeline can be built. /// `stream_factory` object encapsulates the logic of creating plans for a different type of query /// (currently SELECT, DESCRIBE). @@ -91,6 +99,36 @@ void executeQueryWithParallelReplicas( const PlannerContextPtr & planner_context, ContextPtr context, std::shared_ptr storage_limits); + +void executeQueryWithParallelReplicasCustomKey( + QueryPlan & query_plan, + const StorageID & storage_id, + const SelectQueryInfo & query_info, + const ColumnsDescription & columns, + const StorageSnapshotPtr & snapshot, + QueryProcessingStage::Enum processed_stage, + const Block & header, + ContextPtr context); + +void executeQueryWithParallelReplicasCustomKey( + QueryPlan & query_plan, + const StorageID & storage_id, + const SelectQueryInfo & query_info, + const ColumnsDescription & columns, + const StorageSnapshotPtr & snapshot, + QueryProcessingStage::Enum processed_stage, + const QueryTreeNodePtr & query_tree, + ContextPtr context); + +void executeQueryWithParallelReplicasCustomKey( + QueryPlan & query_plan, + const StorageID & storage_id, + SelectQueryInfo query_info, + const ColumnsDescription & columns, + const StorageSnapshotPtr & snapshot, + QueryProcessingStage::Enum processed_stage, + const ASTPtr & query_ast, + ContextPtr context); } } diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index 53987694e46..4493a9f4dbd 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -16,12 +17,18 @@ #include #include #include +#include #include #include #include #include #include +namespace ProfileEvents +{ +extern const Event HashJoinPreallocatedElementsInHashTables; +} + namespace CurrentMetrics { extern const Metric ConcurrentHashJoinPoolThreads; @@ -29,6 +36,25 @@ extern const Metric ConcurrentHashJoinPoolThreadsActive; extern const Metric ConcurrentHashJoinPoolThreadsScheduled; } +namespace +{ + +void updateStatistics(const auto & hash_joins, const DB::StatsCollectingParams & params) +{ + if (!params.isCollectionAndUseEnabled()) + return; + + std::vector sizes(hash_joins.size()); + for (size_t i = 0; i < hash_joins.size(); ++i) + sizes[i] = hash_joins[i]->data->getTotalRowCount(); + const auto median_size = sizes.begin() + sizes.size() / 2; // not precisely though... + std::nth_element(sizes.begin(), median_size, sizes.end()); + if (auto sum_of_sizes = std::accumulate(sizes.begin(), sizes.end(), 0ull)) + DB::getHashTablesStatistics().update(sum_of_sizes, *median_size, params); +} + +} + namespace DB { @@ -46,7 +72,12 @@ static UInt32 toPowerOfTwo(UInt32 x) } ConcurrentHashJoin::ConcurrentHashJoin( - ContextPtr context_, std::shared_ptr table_join_, size_t slots_, const Block & right_sample_block, bool any_take_last_row_) + ContextPtr context_, + std::shared_ptr table_join_, + size_t slots_, + const Block & right_sample_block, + const StatsCollectingParams & stats_collecting_params_, + bool any_take_last_row_) : context(context_) , table_join(table_join_) , slots(toPowerOfTwo(std::min(static_cast(slots_), 256))) @@ -55,6 +86,7 @@ ConcurrentHashJoin::ConcurrentHashJoin( CurrentMetrics::ConcurrentHashJoinPoolThreadsActive, CurrentMetrics::ConcurrentHashJoinPoolThreadsScheduled, slots)) + , stats_collecting_params(stats_collecting_params_) { hash_joins.resize(slots); @@ -74,9 +106,14 @@ ConcurrentHashJoin::ConcurrentHashJoin( CurrentThread::attachToGroupIfDetached(thread_group); setThreadName("ConcurrentJoin"); + size_t reserve_size = 0; + if (auto hint = getSizeHint(stats_collecting_params, slots)) + reserve_size = hint->median_size; + ProfileEvents::increment(ProfileEvents::HashJoinPreallocatedElementsInHashTables, reserve_size); + auto inner_hash_join = std::make_shared(); inner_hash_join->data = std::make_unique( - table_join_, right_sample_block, any_take_last_row_, 0, fmt::format("concurrent{}", idx)); + table_join_, right_sample_block, any_take_last_row_, reserve_size, fmt::format("concurrent{}", idx)); /// Non zero `max_joined_block_rows` allows to process block partially and return not processed part. /// TODO: It's not handled properly in ConcurrentHashJoin case, so we set it to 0 to disable this feature. inner_hash_join->data->setMaxJoinedBlockRows(0); @@ -97,6 +134,8 @@ ConcurrentHashJoin::~ConcurrentHashJoin() { try { + updateStatistics(hash_joins, stats_collecting_params); + for (size_t i = 0; i < slots; ++i) { // Hash tables destruction may be very time-consuming. @@ -300,4 +339,16 @@ Blocks ConcurrentHashJoin::dispatchBlock(const Strings & key_columns_names, cons return result; } +UInt64 calculateCacheKey(std::shared_ptr & table_join, const QueryTreeNodePtr & right_table_expression) +{ + IQueryTreeNode::HashState hash; + chassert(right_table_expression); + hash.update(right_table_expression->getTreeHash()); + chassert(table_join && table_join->oneDisjunct()); + const auto keys + = NameOrderedSet{table_join->getClauses().at(0).key_names_right.begin(), table_join->getClauses().at(0).key_names_right.end()}; + for (const auto & name : keys) + hash.update(name); + return hash.get64(); +} } diff --git a/src/Interpreters/ConcurrentHashJoin.h b/src/Interpreters/ConcurrentHashJoin.h index efac648214e..a911edaccc3 100644 --- a/src/Interpreters/ConcurrentHashJoin.h +++ b/src/Interpreters/ConcurrentHashJoin.h @@ -3,8 +3,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -38,6 +40,7 @@ public: std::shared_ptr table_join_, size_t slots_, const Block & right_sample_block, + const StatsCollectingParams & stats_collecting_params_, bool any_take_last_row_ = false); ~ConcurrentHashJoin() override; @@ -70,6 +73,8 @@ private: std::unique_ptr pool; std::vector> hash_joins; + StatsCollectingParams stats_collecting_params; + std::mutex totals_mutex; Block totals; @@ -77,4 +82,5 @@ private: Blocks dispatchBlock(const Strings & key_columns_names, const Block & from_block); }; +UInt64 calculateCacheKey(std::shared_ptr & table_join, const QueryTreeNodePtr & right_table_expression); } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index b946c2cb21e..fc1e87e7b7e 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -58,6 +59,7 @@ #include #include #include +#include #include #include #include @@ -190,6 +192,7 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; extern const int CLUSTER_DOESNT_EXIST; + extern const int SET_NON_GRANTED_ROLE; } #define SHUTDOWN(log, desc, ptr, method) do \ @@ -822,8 +825,69 @@ void ContextSharedMutex::lockSharedImpl() ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds()); } -ContextData::ContextData() = default; -ContextData::ContextData(const ContextData &) = default; +ContextData::ContextData() +{ + settings = std::make_unique(); +} +ContextData::ContextData(const ContextData &o) : + shared(o.shared), + client_info(o.client_info), + external_tables_initializer_callback(o.external_tables_initializer_callback), + input_initializer_callback(o.input_initializer_callback), + input_blocks_reader(o.input_blocks_reader), + user_id(o.user_id), + current_roles(o.current_roles), + settings_constraints_and_current_profiles(o.settings_constraints_and_current_profiles), + access(o.access), + need_recalculate_access(o.need_recalculate_access), + current_database(o.current_database), + settings(std::make_unique(*o.settings)), + progress_callback(o.progress_callback), + file_progress_callback(o.file_progress_callback), + process_list_elem(o.process_list_elem), + has_process_list_elem(o.has_process_list_elem), + insertion_table_info(o.insertion_table_info), + is_distributed(o.is_distributed), + default_format(o.default_format), + insert_format(o.insert_format), + external_tables_mapping(o.external_tables_mapping), + scalars(o.scalars), + special_scalars(o.special_scalars), + next_task_callback(o.next_task_callback), + merge_tree_read_task_callback(o.merge_tree_read_task_callback), + merge_tree_all_ranges_callback(o.merge_tree_all_ranges_callback), + parallel_replicas_group_uuid(o.parallel_replicas_group_uuid), + client_protocol_version(o.client_protocol_version), + query_access_info(std::make_shared(*o.query_access_info)), + query_factories_info(o.query_factories_info), + query_privileges_info(o.query_privileges_info), + async_read_counters(o.async_read_counters), + view_source(o.view_source), + table_function_results(o.table_function_results), + query_context(o.query_context), + session_context(o.session_context), + global_context(o.global_context), + buffer_context(o.buffer_context), + is_internal_query(o.is_internal_query), + temp_data_on_disk(o.temp_data_on_disk), + classifier(o.classifier), + prepared_sets_cache(o.prepared_sets_cache), + offset_parallel_replicas_enabled(o.offset_parallel_replicas_enabled), + kitchen_sink(o.kitchen_sink), + part_uuids(o.part_uuids), + ignored_part_uuids(o.ignored_part_uuids), + query_parameters(o.query_parameters), + host_context(o.host_context), + metadata_transaction(o.metadata_transaction), + merge_tree_transaction(o.merge_tree_transaction), + merge_tree_transaction_holder(o.merge_tree_transaction_holder), + remote_read_query_throttler(o.remote_read_query_throttler), + remote_write_query_throttler(o.remote_write_query_throttler), + local_read_query_throttler(o.local_read_query_throttler), + local_write_query_throttler(o.local_write_query_throttler), + backups_query_throttler(o.backups_query_throttler) +{ +} Context::Context() = default; Context::Context(const Context & rhs) : ContextData(rhs), std::enable_shared_from_this(rhs) {} @@ -863,7 +927,6 @@ ContextMutablePtr Context::createCopy(const ContextPtr & other) { SharedLockGuard lock(other->mutex); auto new_context = std::shared_ptr(new Context(*other)); - new_context->query_access_info = std::make_shared(*other->query_access_info); return new_context; } @@ -969,7 +1032,7 @@ Strings Context::getWarnings() const } /// Make setting's name ordered std::set obsolete_settings; - for (const auto & setting : settings) + for (const auto & setting : *settings) { if (setting.isValueChanged() && setting.isObsolete()) obsolete_settings.emplace(setting.getName()); @@ -1303,7 +1366,7 @@ ConfigurationPtr Context::getUsersConfig() return shared->users_config; } -void Context::setUser(const UUID & user_id_, const std::optional> & current_roles_) +void Context::setUser(const UUID & user_id_) { /// Prepare lists of user's profiles, constraints, settings, roles. /// NOTE: AccessControl::read() and other AccessControl's functions may require some IO work, @@ -1312,8 +1375,8 @@ void Context::setUser(const UUID & user_id_, const std::optional(user_id_); - auto new_current_roles = current_roles_ ? user->granted_roles.findGranted(*current_roles_) : user->granted_roles.findGranted(user->default_roles); - auto enabled_roles = access_control.getEnabledRolesInfo(new_current_roles, {}); + auto default_roles = user->granted_roles.findGranted(user->default_roles); + auto enabled_roles = access_control.getEnabledRolesInfo(default_roles, {}); auto enabled_profiles = access_control.getEnabledSettingsInfo(user_id_, user->settings, enabled_roles->enabled_roles, enabled_roles->settings_from_enabled_roles); const auto & database = user->default_database; @@ -1327,7 +1390,7 @@ void Context::setUser(const UUID & user_id_, const std::optional Context::getUserID() const return user_id; } -void Context::setCurrentRolesWithLock(const std::vector & current_roles_, const std::lock_guard &) +void Context::setCurrentRolesWithLock(const std::vector & new_current_roles, const std::lock_guard &) { - if (current_roles_.empty()) + if (new_current_roles.empty()) current_roles = nullptr; else - current_roles = std::make_shared>(current_roles_); + current_roles = std::make_shared>(new_current_roles); need_recalculate_access = true; } -void Context::setCurrentRoles(const std::vector & current_roles_) +void Context::setCurrentRolesImpl(const std::vector & new_current_roles, bool throw_if_not_granted, bool skip_if_not_granted, const std::shared_ptr & user) { - std::lock_guard lock(mutex); - setCurrentRolesWithLock(current_roles_, lock); + if (skip_if_not_granted) + { + auto filtered_role_ids = user->granted_roles.findGranted(new_current_roles); + std::lock_guard lock{mutex}; + setCurrentRolesWithLock(filtered_role_ids, lock); + return; + } + if (throw_if_not_granted) + { + for (const auto & role_id : new_current_roles) + { + if (!user->granted_roles.isGranted(role_id)) + { + auto role_name = getAccessControl().tryReadName(role_id); + throw Exception(ErrorCodes::SET_NON_GRANTED_ROLE, "Role {} should be granted to set as a current", role_name.value_or(toString(role_id))); + } + } + } + std::lock_guard lock2{mutex}; + setCurrentRolesWithLock(new_current_roles, lock2); +} + +void Context::setCurrentRoles(const std::vector & new_current_roles, bool check_grants) +{ + setCurrentRolesImpl(new_current_roles, /* throw_if_not_granted= */ check_grants, /* skip_if_not_granted= */ !check_grants, getUser()); +} + +void Context::setCurrentRoles(const RolesOrUsersSet & new_current_roles, bool check_grants) +{ + if (new_current_roles.all) + { + auto user = getUser(); + setCurrentRolesImpl(user->granted_roles.findGranted(new_current_roles), /* throw_if_not_granted= */ false, /* skip_if_not_granted= */ false, user); + } + else + { + setCurrentRoles(new_current_roles.getMatchingIDs(), check_grants); + } +} + +void Context::setCurrentRoles(const Strings & new_current_roles, bool check_grants) +{ + setCurrentRoles(getAccessControl().getIDs(new_current_roles), check_grants); } void Context::setCurrentRolesDefault() { auto user = getUser(); - setCurrentRoles(user->granted_roles.findGranted(user->default_roles)); + setCurrentRolesImpl(user->granted_roles.findGranted(user->default_roles), /* throw_if_not_granted= */ false, /* skip_if_not_granted= */ false, user); } std::vector Context::getCurrentRoles() const @@ -1443,7 +1547,7 @@ std::shared_ptr Context::getAccess() const /// If setUserID() was never called then this must be the global context with the full access. bool full_access = !user_id; - return ContextAccessParams{user_id, full_access, /* use_default_roles= */ false, current_roles, settings, current_database, client_info}; + return ContextAccessParams{user_id, full_access, /* use_default_roles= */ false, current_roles, *settings, current_database, client_info}; }; /// Check if the current access rights are still valid, otherwise get parameters for recalculating access rights. @@ -1525,7 +1629,7 @@ void Context::setCurrentProfilesWithLock(const SettingsProfilesInfo & profiles_i checkSettingsConstraintsWithLock(profiles_info.settings, SettingSource::PROFILE); applySettingsChangesWithLock(profiles_info.settings, lock); settings_constraints_and_current_profiles = profiles_info.getConstraintsAndProfileIDs(settings_constraints_and_current_profiles); - contextSanityClampSettingsWithLock(*this, settings, lock); + contextSanityClampSettingsWithLock(*this, *settings, lock); } void Context::setCurrentProfile(const String & profile_name, bool check_constraints) @@ -2166,15 +2270,15 @@ bool Context::displaySecretsInShowAndSelect() const Settings Context::getSettings() const { SharedLockGuard lock(mutex); - return settings; + return *settings; } void Context::setSettings(const Settings & settings_) { std::lock_guard lock(mutex); - settings = settings_; + *settings = settings_; need_recalculate_access = true; - contextSanityClampSettings(*this, settings); + contextSanityClampSettings(*this, *settings); } void Context::setSettingWithLock(std::string_view name, const String & value, const std::lock_guard & lock) @@ -2184,10 +2288,10 @@ void Context::setSettingWithLock(std::string_view name, const String & value, co setCurrentProfileWithLock(value, true /*check_constraints*/, lock); return; } - settings.set(name, value); + settings->set(name, value); if (ContextAccessParams::dependsOnSettingName(name)) need_recalculate_access = true; - contextSanityClampSettingsWithLock(*this, settings, lock); + contextSanityClampSettingsWithLock(*this, *settings, lock); } void Context::setSettingWithLock(std::string_view name, const Field & value, const std::lock_guard & lock) @@ -2197,7 +2301,7 @@ void Context::setSettingWithLock(std::string_view name, const Field & value, con setCurrentProfileWithLock(value.safeGet(), true /*check_constraints*/, lock); return; } - settings.set(name, value); + settings->set(name, value); if (ContextAccessParams::dependsOnSettingName(name)) need_recalculate_access = true; } @@ -2207,7 +2311,7 @@ void Context::applySettingChangeWithLock(const SettingChange & change, const std try { setSettingWithLock(change.name, change.value, lock); - contextSanityClampSettingsWithLock(*this, settings, lock); + contextSanityClampSettingsWithLock(*this, *settings, lock); } catch (Exception & e) { @@ -2222,7 +2326,7 @@ void Context::applySettingsChangesWithLock(const SettingsChanges & changes, cons { for (const SettingChange & change : changes) applySettingChangeWithLock(change, lock); - applySettingsQuirks(settings); + applySettingsQuirks(*settings); } void Context::setSetting(std::string_view name, const String & value) @@ -2235,7 +2339,13 @@ void Context::setSetting(std::string_view name, const Field & value) { std::lock_guard lock(mutex); setSettingWithLock(name, value, lock); - contextSanityClampSettingsWithLock(*this, settings, lock); + contextSanityClampSettingsWithLock(*this, *settings, lock); +} + +void Context::setServerSetting(std::string_view name, const Field & value) +{ + std::lock_guard lock(mutex); + shared->server_settings.set(name, value); } void Context::applySettingChange(const SettingChange & change) @@ -2262,37 +2372,37 @@ void Context::applySettingsChanges(const SettingsChanges & changes) void Context::checkSettingsConstraintsWithLock(const SettingsProfileElements & profile_elements, SettingSource source) { - getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(settings, profile_elements, source); + getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(*settings, profile_elements, source); if (getApplicationType() == ApplicationType::LOCAL || getApplicationType() == ApplicationType::SERVER) - doSettingsSanityCheckClamp(settings, getLogger("SettingsSanity")); + doSettingsSanityCheckClamp(*settings, getLogger("SettingsSanity")); } void Context::checkSettingsConstraintsWithLock(const SettingChange & change, SettingSource source) { - getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(settings, change, source); + getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(*settings, change, source); if (getApplicationType() == ApplicationType::LOCAL || getApplicationType() == ApplicationType::SERVER) - doSettingsSanityCheckClamp(settings, getLogger("SettingsSanity")); + doSettingsSanityCheckClamp(*settings, getLogger("SettingsSanity")); } void Context::checkSettingsConstraintsWithLock(const SettingsChanges & changes, SettingSource source) { - getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(settings, changes, source); + getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(*settings, changes, source); if (getApplicationType() == ApplicationType::LOCAL || getApplicationType() == ApplicationType::SERVER) - doSettingsSanityCheckClamp(settings, getLogger("SettingsSanity")); + doSettingsSanityCheckClamp(*settings, getLogger("SettingsSanity")); } void Context::checkSettingsConstraintsWithLock(SettingsChanges & changes, SettingSource source) { - getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(settings, changes, source); + getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(*settings, changes, source); if (getApplicationType() == ApplicationType::LOCAL || getApplicationType() == ApplicationType::SERVER) - doSettingsSanityCheckClamp(settings, getLogger("SettingsSanity")); + doSettingsSanityCheckClamp(*settings, getLogger("SettingsSanity")); } void Context::clampToSettingsConstraintsWithLock(SettingsChanges & changes, SettingSource source) { - getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.clamp(settings, changes, source); + getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.clamp(*settings, changes, source); if (getApplicationType() == ApplicationType::LOCAL || getApplicationType() == ApplicationType::SERVER) - doSettingsSanityCheckClamp(settings, getLogger("SettingsSanity")); + doSettingsSanityCheckClamp(*settings, getLogger("SettingsSanity")); } void Context::checkMergeTreeSettingsConstraintsWithLock(const MergeTreeSettings & merge_tree_settings, const SettingsChanges & changes) const @@ -2315,8 +2425,8 @@ void Context::checkSettingsConstraints(const SettingChange & change, SettingSour void Context::checkSettingsConstraints(const SettingsChanges & changes, SettingSource source) { SharedLockGuard lock(mutex); - getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(settings, changes, source); - doSettingsSanityCheckClamp(settings, getLogger("SettingsSanity")); + getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(*settings, changes, source); + doSettingsSanityCheckClamp(*settings, getLogger("SettingsSanity")); } void Context::checkSettingsConstraints(SettingsChanges & changes, SettingSource source) @@ -2341,7 +2451,7 @@ void Context::resetSettingsToDefaultValue(const std::vector & names) { std::lock_guard lock(mutex); for (const String & name: names) - settings.setDefaultValue(name); + settings->setDefaultValue(name); } std::shared_ptr Context::getSettingsConstraintsAndCurrentProfilesWithLock() const @@ -2561,14 +2671,14 @@ void Context::makeQueryContextForMerge(const MergeTreeSettings & merge_tree_sett { makeQueryContext(); classifier.reset(); // It is assumed that there are no active queries running using this classifier, otherwise this will lead to crashes - settings.workload = merge_tree_settings.merge_workload.value.empty() ? getMergeWorkload() : merge_tree_settings.merge_workload; + settings->workload = merge_tree_settings.merge_workload.value.empty() ? getMergeWorkload() : merge_tree_settings.merge_workload; } void Context::makeQueryContextForMutate(const MergeTreeSettings & merge_tree_settings) { makeQueryContext(); classifier.reset(); // It is assumed that there are no active queries running using this classifier, otherwise this will lead to crashes - settings.workload = merge_tree_settings.mutation_workload.value.empty() ? getMutationWorkload() : merge_tree_settings.mutation_workload; + settings->workload = merge_tree_settings.mutation_workload.value.empty() ? getMutationWorkload() : merge_tree_settings.mutation_workload; } void Context::makeSessionContext() @@ -3851,7 +3961,7 @@ void Context::reloadClusterConfig() const } const auto & config = cluster_config ? *cluster_config : getConfigRef(); - auto new_clusters = std::make_shared(config, settings, getMacros()); + auto new_clusters = std::make_shared(config, *settings, getMacros()); { std::lock_guard lock(shared->clusters_mutex); @@ -3886,7 +3996,7 @@ std::shared_ptr Context::getClustersImpl(std::lock_guard & if (!shared->clusters) { const auto & config = shared->clusters_config ? *shared->clusters_config : getConfigRef(); - shared->clusters = std::make_shared(config, settings, getMacros()); + shared->clusters = std::make_shared(config, *settings, getMacros()); } return shared->clusters; @@ -3918,9 +4028,9 @@ void Context::setClustersConfig(const ConfigurationPtr & config, bool enable_dis shared->clusters_config = config; if (!shared->clusters) - shared->clusters = std::make_shared(*shared->clusters_config, settings, getMacros(), config_name); + shared->clusters = std::make_shared(*shared->clusters_config, *settings, getMacros(), config_name); else - shared->clusters->updateClusters(*shared->clusters_config, settings, config_name, old_clusters_config); + shared->clusters->updateClusters(*shared->clusters_config, *settings, config_name, old_clusters_config); ++shared->clusters_version; } @@ -4002,7 +4112,6 @@ std::shared_ptr Context::getQueryThreadLog() const std::shared_ptr Context::getQueryViewsLog() const { SharedLockGuard lock(shared->mutex); - if (!shared->system_logs) return {}; @@ -4182,7 +4291,7 @@ std::shared_ptr Context::getBackupLog() const std::shared_ptr Context::getBlobStorageLog() const { - bool enable_blob_storage_log = settings.enable_blob_storage_log; + bool enable_blob_storage_log = settings->enable_blob_storage_log; if (hasQueryContext()) enable_blob_storage_log = getQueryContext()->getSettingsRef().enable_blob_storage_log; @@ -4623,8 +4732,8 @@ void Context::setDefaultProfiles(const Poco::Util::AbstractConfiguration & confi shared->system_profile_name = config.getString("system_profile", shared->default_profile_name); setCurrentProfile(shared->system_profile_name); - applySettingsQuirks(settings, getLogger("SettingsQuirks")); - doSettingsSanityCheckClamp(settings, getLogger("SettingsSanity")); + applySettingsQuirks(*settings, getLogger("SettingsQuirks")); + doSettingsSanityCheckClamp(*settings, getLogger("SettingsSanity")); shared->buffer_profile_name = config.getString("buffer_profile", shared->system_profile_name); buffer_context = Context::createCopy(shared_from_this()); @@ -5193,11 +5302,11 @@ AsynchronousInsertQueue * Context::tryGetAsynchronousInsertQueue() const void Context::setAsynchronousInsertQueue(const std::shared_ptr & ptr) { - AsynchronousInsertQueue::validateSettings(settings, getLogger("Context")); + AsynchronousInsertQueue::validateSettings(*settings, getLogger("Context")); SharedLockGuard lock(shared->mutex); - if (std::chrono::milliseconds(settings.async_insert_poll_timeout_ms) == std::chrono::milliseconds::zero()) + if (std::chrono::milliseconds(settings->async_insert_poll_timeout_ms) == std::chrono::milliseconds::zero()) throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Setting async_insert_poll_timeout_ms can't be zero"); shared->async_insert_queue = ptr; @@ -5344,69 +5453,69 @@ ReadSettings Context::getReadSettings() const { ReadSettings res; - std::string_view read_method_str = settings.local_filesystem_read_method.value; + std::string_view read_method_str = settings->local_filesystem_read_method.value; if (auto opt_method = magic_enum::enum_cast(read_method_str)) res.local_fs_method = *opt_method; else throw Exception(ErrorCodes::UNKNOWN_READ_METHOD, "Unknown read method '{}' for local filesystem", read_method_str); - read_method_str = settings.remote_filesystem_read_method.value; + read_method_str = settings->remote_filesystem_read_method.value; if (auto opt_method = magic_enum::enum_cast(read_method_str)) res.remote_fs_method = *opt_method; else throw Exception(ErrorCodes::UNKNOWN_READ_METHOD, "Unknown read method '{}' for remote filesystem", read_method_str); - res.local_fs_prefetch = settings.local_filesystem_read_prefetch; - res.remote_fs_prefetch = settings.remote_filesystem_read_prefetch; + res.local_fs_prefetch = settings->local_filesystem_read_prefetch; + res.remote_fs_prefetch = settings->remote_filesystem_read_prefetch; - res.load_marks_asynchronously = settings.load_marks_asynchronously; + res.load_marks_asynchronously = settings->load_marks_asynchronously; - res.enable_filesystem_read_prefetches_log = settings.enable_filesystem_read_prefetches_log; + res.enable_filesystem_read_prefetches_log = settings->enable_filesystem_read_prefetches_log; - res.remote_fs_read_max_backoff_ms = settings.remote_fs_read_max_backoff_ms; - res.remote_fs_read_backoff_max_tries = settings.remote_fs_read_backoff_max_tries; - res.enable_filesystem_cache = settings.enable_filesystem_cache; - res.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache; - res.enable_filesystem_cache_log = settings.enable_filesystem_cache_log; - res.filesystem_cache_segments_batch_size = settings.filesystem_cache_segments_batch_size; - res.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds = settings.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds; + res.remote_fs_read_max_backoff_ms = settings->remote_fs_read_max_backoff_ms; + res.remote_fs_read_backoff_max_tries = settings->remote_fs_read_backoff_max_tries; + res.enable_filesystem_cache = settings->enable_filesystem_cache; + res.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = settings->read_from_filesystem_cache_if_exists_otherwise_bypass_cache; + res.enable_filesystem_cache_log = settings->enable_filesystem_cache_log; + res.filesystem_cache_segments_batch_size = settings->filesystem_cache_segments_batch_size; + res.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds = settings->filesystem_cache_reserve_space_wait_lock_timeout_milliseconds; - res.filesystem_cache_max_download_size = settings.filesystem_cache_max_download_size; - res.skip_download_if_exceeds_query_cache = settings.skip_download_if_exceeds_query_cache; + res.filesystem_cache_max_download_size = settings->filesystem_cache_max_download_size; + res.skip_download_if_exceeds_query_cache = settings->skip_download_if_exceeds_query_cache; res.page_cache = getPageCache(); - res.use_page_cache_for_disks_without_file_cache = settings.use_page_cache_for_disks_without_file_cache; - res.read_from_page_cache_if_exists_otherwise_bypass_cache = settings.read_from_page_cache_if_exists_otherwise_bypass_cache; - res.page_cache_inject_eviction = settings.page_cache_inject_eviction; + res.use_page_cache_for_disks_without_file_cache = settings->use_page_cache_for_disks_without_file_cache; + res.read_from_page_cache_if_exists_otherwise_bypass_cache = settings->read_from_page_cache_if_exists_otherwise_bypass_cache; + res.page_cache_inject_eviction = settings->page_cache_inject_eviction; - res.remote_read_min_bytes_for_seek = settings.remote_read_min_bytes_for_seek; + res.remote_read_min_bytes_for_seek = settings->remote_read_min_bytes_for_seek; /// Zero read buffer will not make progress. - if (!settings.max_read_buffer_size) + if (!settings->max_read_buffer_size) { throw Exception(ErrorCodes::INVALID_SETTING_VALUE, - "Invalid value '{}' for max_read_buffer_size", settings.max_read_buffer_size); + "Invalid value '{}' for max_read_buffer_size", settings->max_read_buffer_size); } res.local_fs_buffer_size - = settings.max_read_buffer_size_local_fs ? settings.max_read_buffer_size_local_fs : settings.max_read_buffer_size; + = settings->max_read_buffer_size_local_fs ? settings->max_read_buffer_size_local_fs : settings->max_read_buffer_size; res.remote_fs_buffer_size - = settings.max_read_buffer_size_remote_fs ? settings.max_read_buffer_size_remote_fs : settings.max_read_buffer_size; - res.prefetch_buffer_size = settings.prefetch_buffer_size; - res.direct_io_threshold = settings.min_bytes_to_use_direct_io; - res.mmap_threshold = settings.min_bytes_to_use_mmap_io; - res.priority = Priority{settings.read_priority}; + = settings->max_read_buffer_size_remote_fs ? settings->max_read_buffer_size_remote_fs : settings->max_read_buffer_size; + res.prefetch_buffer_size = settings->prefetch_buffer_size; + res.direct_io_threshold = settings->min_bytes_to_use_direct_io; + res.mmap_threshold = settings->min_bytes_to_use_mmap_io; + res.priority = Priority{settings->read_priority}; res.remote_throttler = getRemoteReadThrottler(); res.local_throttler = getLocalReadThrottler(); - res.http_max_tries = settings.http_max_tries; - res.http_retry_initial_backoff_ms = settings.http_retry_initial_backoff_ms; - res.http_retry_max_backoff_ms = settings.http_retry_max_backoff_ms; - res.http_skip_not_found_url_for_globs = settings.http_skip_not_found_url_for_globs; - res.http_make_head_request = settings.http_make_head_request; + res.http_max_tries = settings->http_max_tries; + res.http_retry_initial_backoff_ms = settings->http_retry_initial_backoff_ms; + res.http_retry_max_backoff_ms = settings->http_retry_max_backoff_ms; + res.http_skip_not_found_url_for_globs = settings->http_skip_not_found_url_for_globs; + res.http_make_head_request = settings->http_make_head_request; res.mmap_cache = getMMappedFileCache().get(); @@ -5417,13 +5526,13 @@ WriteSettings Context::getWriteSettings() const { WriteSettings res; - res.enable_filesystem_cache_on_write_operations = settings.enable_filesystem_cache_on_write_operations; - res.enable_filesystem_cache_log = settings.enable_filesystem_cache_log; - res.throw_on_error_from_cache = settings.throw_on_error_from_cache_on_write_operations; - res.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds = settings.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds; + res.enable_filesystem_cache_on_write_operations = settings->enable_filesystem_cache_on_write_operations; + res.enable_filesystem_cache_log = settings->enable_filesystem_cache_log; + res.throw_on_error_from_cache = settings->throw_on_error_from_cache_on_write_operations; + res.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds = settings->filesystem_cache_reserve_space_wait_lock_timeout_milliseconds; - res.s3_allow_parallel_part_upload = settings.s3_allow_parallel_part_upload; - res.azure_allow_parallel_part_upload = settings.azure_allow_parallel_part_upload; + res.s3_allow_parallel_part_upload = settings->s3_allow_parallel_part_upload; + res.azure_allow_parallel_part_upload = settings->azure_allow_parallel_part_upload; res.remote_throttler = getRemoteWriteThrottler(); res.local_throttler = getLocalWriteThrottler(); @@ -5469,10 +5578,37 @@ bool Context::canUseParallelReplicasOnFollower() const return canUseTaskBasedParallelReplicas() && getClientInfo().collaborate_with_initiator; } -bool Context::canUseParallelReplicasCustomKey(const Cluster & cluster) const +bool Context::canUseParallelReplicasCustomKey() const { - return settings.max_parallel_replicas > 1 && getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY - && cluster.getShardCount() == 1 && cluster.getShardsInfo()[0].getAllNodeCount() > 1; + return settings->max_parallel_replicas > 1 && getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY; +} + +bool Context::canUseParallelReplicasCustomKeyForCluster(const Cluster & cluster) const +{ + return canUseParallelReplicasCustomKey() && cluster.getShardCount() == 1 && cluster.getShardsInfo()[0].getAllNodeCount() > 1; +} + +bool Context::canUseOffsetParallelReplicas() const +{ + return offset_parallel_replicas_enabled && settings->max_parallel_replicas > 1 + && getParallelReplicasMode() != Context::ParallelReplicasMode::READ_TASKS; +} + +void Context::disableOffsetParallelReplicas() +{ + offset_parallel_replicas_enabled = false; +} + +ClusterPtr Context::getClusterForParallelReplicas() const +{ + /// check cluster for parallel replicas + if (settings->cluster_for_parallel_replicas.value.empty()) + throw Exception( + ErrorCodes::CLUSTER_DOESNT_EXIST, + "Reading in parallel from replicas is enabled but cluster to execute query is not provided. Please set " + "'cluster_for_parallel_replicas' setting"); + + return getCluster(settings->cluster_for_parallel_replicas); } void Context::setPreparedSetsCache(const PreparedSetsCachePtr & cache) @@ -5500,4 +5636,39 @@ const ServerSettings & Context::getServerSettings() const return shared->server_settings; } +uint64_t HTTPContext::getMaxHstsAge() const +{ + return context->getSettingsRef().hsts_max_age; +} + + uint64_t HTTPContext::getMaxUriSize() const +{ + return context->getSettingsRef().http_max_uri_size; +} + +uint64_t HTTPContext::getMaxFields() const +{ +return context->getSettingsRef().http_max_fields; +} + +uint64_t HTTPContext::getMaxFieldNameSize() const +{ +return context->getSettingsRef().http_max_field_name_size; +} + +uint64_t HTTPContext::getMaxFieldValueSize() const +{ +return context->getSettingsRef().http_max_field_value_size; +} + +Poco::Timespan HTTPContext::getReceiveTimeout() const +{ +return context->getSettingsRef().http_receive_timeout; +} + +Poco::Timespan HTTPContext::getSendTimeout() const +{ +return context->getSettingsRef().http_send_timeout; +} + } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index f9b91a45978..284cac50769 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -1,7 +1,5 @@ #pragma once -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD - #include #include #include @@ -13,9 +11,11 @@ #include #include #include -#include #include +#include #include +#include +#include #include #include #include @@ -63,6 +63,7 @@ class AccessFlags; struct AccessRightsElement; class AccessRightsElements; enum class RowPolicyFilterType : uint8_t; +struct RolesOrUsersSet; class EmbeddedDictionaries; class ExternalDictionariesLoader; class ExternalUserDefinedExecutableFunctionsLoader; @@ -130,6 +131,7 @@ class ShellCommand; class ICompressionCodec; class AccessControl; class GSSAcceptorContext; +struct Settings; struct SettingsConstraintsAndProfileIDs; class SettingsProfileElements; class RemoteHostFilter; @@ -152,6 +154,8 @@ class AsyncLoader; struct TemporaryTableHolder; using TemporaryTablesMapping = std::map>; +using ClusterPtr = std::shared_ptr; + class LoadTask; using LoadTaskPtr = std::shared_ptr; using LoadTaskPtrs = std::vector; @@ -275,7 +279,7 @@ protected: mutable std::shared_ptr access; mutable bool need_recalculate_access = true; String current_database; - Settings settings; /// Setting for query execution. + std::unique_ptr settings{}; /// Setting for query execution. using ProgressCallback = std::function; ProgressCallback progress_callback; /// Callback for tracking progress of query execution. @@ -459,6 +463,11 @@ protected: /// mutation tasks of one mutation executed against different parts of the same table. PreparedSetsCachePtr prepared_sets_cache; + /// this is a mode of parallel replicas where we set parallel_replicas_count and parallel_replicas_offset + /// and generate specific filters on the replicas (e.g. when using parallel replicas with sample key) + /// if we already use a different mode of parallel replicas we want to disable this mode + bool offset_parallel_replicas_enabled = true; + public: /// Some counters for current query execution. /// Most of them are workarounds and should be removed in the future. @@ -602,13 +611,15 @@ public: /// Sets the current user assuming that he/she is already authenticated. /// WARNING: This function doesn't check password! - void setUser(const UUID & user_id_, const std::optional> & current_roles_ = {}); + void setUser(const UUID & user_id_); UserPtr getUser() const; std::optional getUserID() const; String getUserName() const; - void setCurrentRoles(const std::vector & current_roles_); + void setCurrentRoles(const Strings & new_current_roles, bool check_grants = true); + void setCurrentRoles(const std::vector & new_current_roles, bool check_grants = true); + void setCurrentRoles(const RolesOrUsersSet & new_current_roles, bool check_grants = true); void setCurrentRolesDefault(); std::vector getCurrentRoles() const; std::vector getEnabledRoles() const; @@ -825,6 +836,7 @@ public: /// Set settings by name. void setSetting(std::string_view name, const String & value); void setSetting(std::string_view name, const Field & value); + void setServerSetting(std::string_view name, const Field & value); void applySettingChange(const SettingChange & change); void applySettingsChanges(const SettingsChanges & changes); @@ -943,7 +955,7 @@ public: void makeSessionContext(); void makeGlobalContext(); - const Settings & getSettingsRef() const { return settings; } + const Settings & getSettingsRef() const { return *settings; } void setProgressCallback(ProgressCallback callback); /// Used in executeQuery() to pass it to the QueryPipeline. @@ -1310,7 +1322,13 @@ public: bool canUseTaskBasedParallelReplicas() const; bool canUseParallelReplicasOnInitiator() const; bool canUseParallelReplicasOnFollower() const; - bool canUseParallelReplicasCustomKey(const Cluster & cluster) const; + bool canUseParallelReplicasCustomKey() const; + bool canUseParallelReplicasCustomKeyForCluster(const Cluster & cluster) const; + bool canUseOffsetParallelReplicas() const; + + void disableOffsetParallelReplicas(); + + ClusterPtr getClusterForParallelReplicas() const; enum class ParallelReplicasMode : uint8_t { @@ -1335,7 +1353,7 @@ private: void setCurrentProfilesWithLock(const SettingsProfilesInfo & profiles_info, bool check_constraints, const std::lock_guard & lock); - void setCurrentRolesWithLock(const std::vector & current_roles_, const std::lock_guard & lock); + void setCurrentRolesWithLock(const std::vector & new_current_roles, const std::lock_guard & lock); void setSettingWithLock(std::string_view name, const String & value, const std::lock_guard & lock); @@ -1368,6 +1386,7 @@ private: void initGlobal(); void setUserID(const UUID & user_id_); + void setCurrentRolesImpl(const std::vector & new_current_roles, bool throw_if_not_granted, bool skip_if_not_granted, const std::shared_ptr & user); template void checkAccessImpl(const Args &... args) const; @@ -1412,48 +1431,21 @@ struct HTTPContext : public IHTTPContext : context(Context::createCopy(context_)) {} - uint64_t getMaxHstsAge() const override - { - return context->getSettingsRef().hsts_max_age; - } + uint64_t getMaxHstsAge() const override; - uint64_t getMaxUriSize() const override - { - return context->getSettingsRef().http_max_uri_size; - } + uint64_t getMaxUriSize() const override; - uint64_t getMaxFields() const override - { - return context->getSettingsRef().http_max_fields; - } + uint64_t getMaxFields() const override; - uint64_t getMaxFieldNameSize() const override - { - return context->getSettingsRef().http_max_field_name_size; - } + uint64_t getMaxFieldNameSize() const override; - uint64_t getMaxFieldValueSize() const override - { - return context->getSettingsRef().http_max_field_value_size; - } + uint64_t getMaxFieldValueSize() const override; - Poco::Timespan getReceiveTimeout() const override - { - return context->getSettingsRef().http_receive_timeout; - } + Poco::Timespan getReceiveTimeout() const override; - Poco::Timespan getSendTimeout() const override - { - return context->getSettingsRef().http_send_timeout; - } + Poco::Timespan getSendTimeout() const override; ContextPtr context; }; } - -#else - -#include - -#endif diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index 6c346836ed8..fa197d59c13 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h index 0b0460b26c8..515a35d8671 100644 --- a/src/Interpreters/DDLTask.h +++ b/src/Interpreters/DDLTask.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 5639eed552e..697fd0f406b 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index aaec94a4fb0..bb2dd158710 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1,5 +1,7 @@ +#include #include #include +#include #include #include #include @@ -26,7 +28,9 @@ #include #include #include +#include +#include #include #include "config.h" @@ -190,6 +194,7 @@ void DatabaseCatalog::initializeAndLoadTemporaryDatabase() unused_dir_rm_timeout_sec = getContext()->getConfigRef().getInt64("database_catalog_unused_dir_rm_timeout_sec", unused_dir_rm_timeout_sec); unused_dir_cleanup_period_sec = getContext()->getConfigRef().getInt64("database_catalog_unused_dir_cleanup_period_sec", unused_dir_cleanup_period_sec); drop_error_cooldown_sec = getContext()->getConfigRef().getInt64("database_catalog_drop_error_cooldown_sec", drop_error_cooldown_sec); + drop_table_concurrency = getContext()->getConfigRef().getInt64("database_catalog_drop_table_concurrency", drop_table_concurrency); auto db_for_temporary_and_external_tables = std::make_shared(TEMPORARY_DATABASE, getContext()); attachDatabase(TEMPORARY_DATABASE, db_for_temporary_and_external_tables); @@ -274,10 +279,12 @@ void DatabaseCatalog::shutdownImpl() database->shutdown(); } + TablesMarkedAsDropped tables_marked_dropped_to_destroy; { std::lock_guard lock(tables_marked_dropped_mutex); - tables_marked_dropped.clear(); + tables_marked_dropped.swap(tables_marked_dropped_to_destroy); } + tables_marked_dropped_to_destroy.clear(); std::lock_guard lock(databases_mutex); for (const auto & db : databases) @@ -1141,7 +1148,7 @@ void DatabaseCatalog::enqueueDroppedTableCleanup(StorageID table_id, StoragePtr (*drop_task)->schedule(); } -void DatabaseCatalog::dequeueDroppedTableCleanup(StorageID table_id) +void DatabaseCatalog::undropTable(StorageID table_id) { String latest_metadata_dropped_path; TableMarkedAsDropped dropped_table; @@ -1195,7 +1202,7 @@ void DatabaseCatalog::dequeueDroppedTableCleanup(StorageID table_id) /// It's unsafe to create another instance while the old one exists /// We cannot wait on shared_ptr's refcount, so it's busy wait - while (!dropped_table.table.unique()) + while (!isSharedPtrUnique(dropped_table.table)) std::this_thread::sleep_for(std::chrono::milliseconds(100)); dropped_table.table.reset(); @@ -1216,91 +1223,162 @@ void DatabaseCatalog::dequeueDroppedTableCleanup(StorageID table_id) LOG_INFO(log, "Table {} was successfully undropped.", dropped_table.table_id.getNameForLogs()); } +std::tuple DatabaseCatalog::getDroppedTablesCountAndInuseCount() +{ + std::lock_guard lock(tables_marked_dropped_mutex); + + size_t in_use_count = 0; + for (const auto & item : tables_marked_dropped) + { + bool in_use = item.table && !isSharedPtrUnique(item.table); + in_use_count += in_use; + } + return {tables_marked_dropped.size(), in_use_count}; +} + +time_t DatabaseCatalog::getMinDropTime() +{ + time_t min_drop_time = std::numeric_limits::max(); + for (const auto & item : tables_marked_dropped) + { + min_drop_time = std::min(min_drop_time, item.drop_time); + } + return min_drop_time; +} + +std::vector DatabaseCatalog::getTablesToDrop() +{ + time_t current_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); + decltype(getTablesToDrop()) result; + + std::lock_guard lock(tables_marked_dropped_mutex); + + for (auto it = tables_marked_dropped.begin(); it != tables_marked_dropped.end(); ++it) + { + bool in_use = it->table && !isSharedPtrUnique(it->table); + bool old_enough = it->drop_time <= current_time; + if (!in_use && old_enough) + result.emplace_back(it); + } + + return result; +} + +void DatabaseCatalog::rescheduleDropTableTask() +{ + std::lock_guard lock(tables_marked_dropped_mutex); + + if (tables_marked_dropped.empty()) + return; + + if (first_async_drop_in_queue != tables_marked_dropped.begin()) + { + (*drop_task)->scheduleAfter(0); + return; + } + + time_t current_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); + auto min_drop_time = getMinDropTime(); + time_t schedule_after_ms = min_drop_time > current_time ? (min_drop_time - current_time) * 1000 : 0; + + (*drop_task)->scheduleAfter(schedule_after_ms); +} + +void DatabaseCatalog::dropTablesParallel(std::vector tables_to_drop) +{ + if (tables_to_drop.empty()) + return; + + ThreadPool pool( + CurrentMetrics::DatabaseCatalogThreads, + CurrentMetrics::DatabaseCatalogThreadsActive, + CurrentMetrics::DatabaseCatalogThreadsScheduled, + /* max_threads */drop_table_concurrency, + /* max_free_threads */0, + /* queue_size */tables_to_drop.size()); + + for (const auto & item : tables_to_drop) + { + auto job = [&, table_iterator = item] () + { + try + { + dropTableFinally(*table_iterator); + + TableMarkedAsDropped table_to_delete_without_lock; + { + std::lock_guard lock(tables_marked_dropped_mutex); + + if (first_async_drop_in_queue == table_iterator) + ++first_async_drop_in_queue; + + [[maybe_unused]] auto removed = tables_marked_dropped_ids.erase(table_iterator->table_id.uuid); + chassert(removed); + + table_to_delete_without_lock = std::move(*table_iterator); + tables_marked_dropped.erase(table_iterator); + + wait_table_finally_dropped.notify_all(); + } + } + catch (...) + { + tryLogCurrentException(log, "Cannot drop table " + table_iterator->table_id.getNameForLogs() + + ". Will retry later."); + { + std::lock_guard lock(tables_marked_dropped_mutex); + + if (first_async_drop_in_queue == table_iterator) + ++first_async_drop_in_queue; + + tables_marked_dropped.splice(tables_marked_dropped.end(), tables_marked_dropped, table_iterator); + table_iterator->drop_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()) + drop_error_cooldown_sec; + + if (first_async_drop_in_queue == tables_marked_dropped.end()) + --first_async_drop_in_queue; + } + } + }; + + try + { + pool.scheduleOrThrowOnError(std::move(job)); + } + catch (...) + { + tryLogCurrentException(log, "Cannot drop tables. Will retry later."); + break; + } + } + + try + { + pool.wait(); + } + catch (...) + { + tryLogCurrentException(log, "Cannot drop tables. Will retry later."); + } +} + void DatabaseCatalog::dropTableDataTask() { /// Background task that removes data of tables which were marked as dropped by Atomic databases. /// Table can be removed when it's not used by queries and drop_delay_sec elapsed since it was marked as dropped. - bool need_reschedule = true; - /// Default reschedule time for the case when we are waiting for reference count to become 1. - size_t schedule_after_ms = reschedule_time_ms; - TableMarkedAsDropped table; - try + auto [drop_tables_count, drop_tables_in_use_count] = getDroppedTablesCountAndInuseCount(); + + auto tables_to_drop = getTablesToDrop(); + + if (!tables_to_drop.empty()) { - std::lock_guard lock(tables_marked_dropped_mutex); - if (tables_marked_dropped.empty()) - return; - time_t current_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); - time_t min_drop_time = std::numeric_limits::max(); - size_t tables_in_use_count = 0; - auto it = std::find_if(tables_marked_dropped.begin(), tables_marked_dropped.end(), [&](const auto & elem) - { - bool not_in_use = !elem.table || elem.table.unique(); - bool old_enough = elem.drop_time <= current_time; - min_drop_time = std::min(min_drop_time, elem.drop_time); - tables_in_use_count += !not_in_use; - return not_in_use && old_enough; - }); - if (it != tables_marked_dropped.end()) - { - table = std::move(*it); - LOG_INFO(log, "Have {} tables in drop queue ({} of them are in use), will try drop {}", - tables_marked_dropped.size(), tables_in_use_count, table.table_id.getNameForLogs()); - if (first_async_drop_in_queue == it) - ++first_async_drop_in_queue; - tables_marked_dropped.erase(it); - /// Schedule the task as soon as possible, while there are suitable tables to drop. - schedule_after_ms = 0; - } - else if (current_time < min_drop_time) - { - /// We are waiting for drop_delay_sec to exceed, no sense to wakeup until min_drop_time. - /// If new table is added to the queue with ignore_delay flag, schedule() is called to wakeup the task earlier. - schedule_after_ms = (min_drop_time - current_time) * 1000; - LOG_TRACE(log, "Not found any suitable tables to drop, still have {} tables in drop queue ({} of them are in use). " - "Will check again after {} seconds", tables_marked_dropped.size(), tables_in_use_count, min_drop_time - current_time); - } - need_reschedule = !tables_marked_dropped.empty(); - } - catch (...) - { - tryLogCurrentException(log, __PRETTY_FUNCTION__); + LOG_INFO(log, "Have {} tables in drop queue ({} of them are in use), will try drop {} tables", + drop_tables_count, drop_tables_in_use_count, tables_to_drop.size()); + + dropTablesParallel(tables_to_drop); } - if (table.table_id) - { - try - { - dropTableFinally(table); - std::lock_guard lock(tables_marked_dropped_mutex); - [[maybe_unused]] auto removed = tables_marked_dropped_ids.erase(table.table_id.uuid); - assert(removed); - } - catch (...) - { - tryLogCurrentException(log, "Cannot drop table " + table.table_id.getNameForLogs() + - ". Will retry later."); - { - table.drop_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()) + drop_error_cooldown_sec; - std::lock_guard lock(tables_marked_dropped_mutex); - tables_marked_dropped.emplace_back(std::move(table)); - if (first_async_drop_in_queue == tables_marked_dropped.end()) - --first_async_drop_in_queue; - /// If list of dropped tables was empty, schedule a task to retry deletion. - if (tables_marked_dropped.size() == 1) - { - need_reschedule = true; - schedule_after_ms = drop_error_cooldown_sec * 1000; - } - } - } - - wait_table_finally_dropped.notify_all(); - } - - /// Do not schedule a task if there is no tables to drop - if (need_reschedule) - (*drop_task)->scheduleAfter(schedule_after_ms); + rescheduleDropTableTask(); } void DatabaseCatalog::dropTableFinally(const TableMarkedAsDropped & table) diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 17d34e96245..23e38a6445e 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -225,7 +225,7 @@ public: String getPathForDroppedMetadata(const StorageID & table_id) const; String getPathForMetadata(const StorageID & table_id) const; void enqueueDroppedTableCleanup(StorageID table_id, StoragePtr table, String dropped_metadata_path, bool ignore_delay = false); - void dequeueDroppedTableCleanup(StorageID table_id); + void undropTable(StorageID table_id); void waitTableFinallyDropped(const UUID & uuid); @@ -296,6 +296,12 @@ private: void dropTableDataTask(); void dropTableFinally(const TableMarkedAsDropped & table); + time_t getMinDropTime() TSA_REQUIRES(tables_marked_dropped_mutex); + std::tuple getDroppedTablesCountAndInuseCount(); + std::vector getTablesToDrop(); + void dropTablesParallel(std::vector tables); + void rescheduleDropTableTask(); + void cleanupStoreDirectoryTask(); bool maybeRemoveDirectory(const String & disk_name, const DiskPtr & disk, const String & unused_dir); @@ -363,6 +369,9 @@ private: static constexpr time_t default_drop_error_cooldown_sec = 5; time_t drop_error_cooldown_sec = default_drop_error_cooldown_sec; + static constexpr size_t default_drop_table_concurrency = 10; + size_t drop_table_concurrency = default_drop_table_concurrency; + std::unique_ptr reload_disks_task; std::mutex reload_disks_mutex; std::set disks_to_reload; diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index a70ff3c6c53..4bd1c47d5a0 100644 --- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include diff --git a/src/Interpreters/ExpressionActionsSettings.h b/src/Interpreters/ExpressionActionsSettings.h index 326de983748..384a61a54a2 100644 --- a/src/Interpreters/ExpressionActionsSettings.h +++ b/src/Interpreters/ExpressionActionsSettings.h @@ -1,7 +1,7 @@ #pragma once +#include #include -#include #include diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 8e9ff9ed46c..16d0eb71278 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -986,7 +986,8 @@ static std::shared_ptr tryCreateJoin( const auto & settings = context->getSettings(); if (analyzed_join->allowParallelHashJoin()) - return std::make_shared(context, analyzed_join, settings.max_threads, right_sample_block); + return std::make_shared( + context, analyzed_join, settings.max_threads, right_sample_block, StatsCollectingParams{}); return std::make_shared(analyzed_join, right_sample_block); } diff --git a/src/Interpreters/ExternalDictionariesLoader.cpp b/src/Interpreters/ExternalDictionariesLoader.cpp index 1685c06d387..5f6d7e33927 100644 --- a/src/Interpreters/ExternalDictionariesLoader.cpp +++ b/src/Interpreters/ExternalDictionariesLoader.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include "config.h" diff --git a/src/Interpreters/ExternalLoader.h b/src/Interpreters/ExternalLoader.h index 49b5e68d821..6356a174a01 100644 --- a/src/Interpreters/ExternalLoader.h +++ b/src/Interpreters/ExternalLoader.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include diff --git a/src/Interpreters/FilesystemCacheLog.h b/src/Interpreters/FilesystemCacheLog.h index 27c616ff40c..6d92fbd7a43 100644 --- a/src/Interpreters/FilesystemCacheLog.h +++ b/src/Interpreters/FilesystemCacheLog.h @@ -2,7 +2,6 @@ #include #include -#include #include #include #include diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index 64b6eb5dce9..986134a6998 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index 6970048269b..a241d1cd258 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff --git a/src/Interpreters/HashTablesStatistics.cpp b/src/Interpreters/HashTablesStatistics.cpp new file mode 100644 index 00000000000..d66f1bbd1d3 --- /dev/null +++ b/src/Interpreters/HashTablesStatistics.cpp @@ -0,0 +1,117 @@ +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int LOGICAL_ERROR; +} + +std::optional HashTablesStatistics::getSizeHint(const Params & params) +{ + if (!params.isCollectionAndUseEnabled()) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled."); + + std::lock_guard lock(mutex); + const auto cache = getHashTableStatsCache(params, lock); + if (const auto hint = cache->get(params.key)) + { + LOG_TRACE( + getLogger("HashTablesStatistics"), + "An entry for key={} found in cache: sum_of_sizes={}, median_size={}", + params.key, + hint->sum_of_sizes, + hint->median_size); + return *hint; + } + return std::nullopt; +} + +/// Collection and use of the statistics should be enabled. +void HashTablesStatistics::update(size_t sum_of_sizes, size_t median_size, const Params & params) +{ + if (!params.isCollectionAndUseEnabled()) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled."); + + std::lock_guard lock(mutex); + const auto cache = getHashTableStatsCache(params, lock); + const auto hint = cache->get(params.key); + // We'll maintain the maximum among all the observed values until another prediction is much lower (that should indicate some change) + if (!hint || sum_of_sizes < hint->sum_of_sizes / 2 || hint->sum_of_sizes < sum_of_sizes || median_size < hint->median_size / 2 + || hint->median_size < median_size) + { + LOG_TRACE( + getLogger("HashTablesStatistics"), + "Statistics updated for key={}: new sum_of_sizes={}, median_size={}", + params.key, + sum_of_sizes, + median_size); + cache->set(params.key, std::make_shared(Entry{.sum_of_sizes = sum_of_sizes, .median_size = median_size})); + } +} + +std::optional HashTablesStatistics::getCacheStats() const +{ + std::lock_guard lock(mutex); + if (hash_table_stats) + { + size_t hits = 0, misses = 0; + hash_table_stats->getStats(hits, misses); + return DB::HashTablesCacheStatistics{.entries = hash_table_stats->count(), .hits = hits, .misses = misses}; + } + return std::nullopt; +} + +HashTablesStatistics::CachePtr HashTablesStatistics::getHashTableStatsCache(const Params & params, const std::lock_guard &) +{ + if (!hash_table_stats) + hash_table_stats = std::make_shared(params.max_entries_for_hash_table_stats * sizeof(Entry)); + return hash_table_stats; +} + +HashTablesStatistics & getHashTablesStatistics() +{ + static HashTablesStatistics hash_tables_stats; + return hash_tables_stats; +} + +std::optional getHashTablesCacheStatistics() +{ + return getHashTablesStatistics().getCacheStats(); +} + +std::optional getSizeHint(const DB::StatsCollectingParams & stats_collecting_params, size_t tables_cnt) +{ + if (stats_collecting_params.isCollectionAndUseEnabled()) + { + if (auto hint = DB::getHashTablesStatistics().getSizeHint(stats_collecting_params)) + { + const auto lower_limit = hint->sum_of_sizes / tables_cnt; + const auto upper_limit = stats_collecting_params.max_size_to_preallocate / tables_cnt; + if (hint->median_size > upper_limit) + { + /// Since we cannot afford to preallocate as much as needed, we would likely have to do at least one resize anyway. + /// Though it still sounds better than N resizes, but in actuality we saw that one big resize (remember, HT-s grow exponentially) + /// plus worse cache locality since we're dealing with big HT-s from the beginning yields worse performance. + /// So let's just do nothing. + LOG_TRACE( + getLogger("HashTablesStatistics"), + "No space were preallocated in hash tables because 'max_size_to_preallocate' has too small value: {}, " + "should be at least {}", + stats_collecting_params.max_size_to_preallocate, + hint->median_size * tables_cnt); + } + /// https://github.com/ClickHouse/ClickHouse/issues/44402#issuecomment-1359920703 + else if ((tables_cnt > 1 && hint->sum_of_sizes > 100'000) || hint->sum_of_sizes > 500'000) + { + return HashTablesStatistics::Entry{hint->sum_of_sizes, std::max(lower_limit, hint->median_size)}; + } + } + } + return std::nullopt; +} +} diff --git a/src/Interpreters/HashTablesStatistics.h b/src/Interpreters/HashTablesStatistics.h new file mode 100644 index 00000000000..7b4c4fcbfeb --- /dev/null +++ b/src/Interpreters/HashTablesStatistics.h @@ -0,0 +1,69 @@ +#pragma once + +#include + +namespace DB +{ + +struct HashTablesCacheStatistics +{ + size_t entries = 0; + size_t hits = 0; + size_t misses = 0; +}; + +struct StatsCollectingParams +{ + StatsCollectingParams() = default; + + StatsCollectingParams(UInt64 key_, bool enable_, size_t max_entries_for_hash_table_stats_, size_t max_size_to_preallocate_) + : key(enable_ ? key_ : 0) + , max_entries_for_hash_table_stats(max_entries_for_hash_table_stats_) + , max_size_to_preallocate(max_size_to_preallocate_) + { + } + + bool isCollectionAndUseEnabled() const { return key != 0; } + void disable() { key = 0; } + + UInt64 key = 0; + const size_t max_entries_for_hash_table_stats = 0; + const size_t max_size_to_preallocate = 0; +}; + +/** Collects observed HashTable-s sizes to avoid redundant intermediate resizes. + */ +class HashTablesStatistics +{ +public: + struct Entry + { + size_t sum_of_sizes; // used to determine if it's better to convert aggregation to two-level from the beginning + size_t median_size; // roughly the size we're going to preallocate on each thread + }; + + using Cache = DB::CacheBase; + using CachePtr = std::shared_ptr; + using Params = StatsCollectingParams; + + /// Collection and use of the statistics should be enabled. + std::optional getSizeHint(const Params & params); + + /// Collection and use of the statistics should be enabled. + void update(size_t sum_of_sizes, size_t median_size, const Params & params); + + std::optional getCacheStats() const; + +private: + CachePtr getHashTableStatsCache(const Params & params, const std::lock_guard &); + + mutable std::mutex mutex; + CachePtr hash_table_stats; +}; + +HashTablesStatistics & getHashTablesStatistics(); + +std::optional getHashTablesCacheStatistics(); + +std::optional getSizeHint(const DB::StatsCollectingParams & stats_collecting_params, size_t tables_cnt); +} diff --git a/src/Interpreters/IInterpreter.cpp b/src/Interpreters/IInterpreter.cpp index 148e73e43ce..a291199f24b 100644 --- a/src/Interpreters/IInterpreter.cpp +++ b/src/Interpreters/IInterpreter.cpp @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp b/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp index fed29b410db..d8f6df05ca4 100644 --- a/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp +++ b/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp @@ -1,4 +1,6 @@ #include + +#include #include #include #include @@ -15,6 +17,23 @@ namespace DB { +IInterpreterUnionOrSelectQuery::IInterpreterUnionOrSelectQuery(const DB::ASTPtr& query_ptr_, + const DB::ContextMutablePtr& context_, const DB::SelectQueryOptions& options_) + : query_ptr(query_ptr_) + , context(context_) + , options(options_) + , max_streams(context->getSettingsRef().max_threads) +{ + if (options.shard_num) + context->addSpecialScalar( + "_shard_num", + Block{{DataTypeUInt32().createColumnConst(1, *options.shard_num), std::make_shared(), "_shard_num"}}); + if (options.shard_count) + context->addSpecialScalar( + "_shard_count", + Block{{DataTypeUInt32().createColumnConst(1, *options.shard_count), std::make_shared(), "_shard_count"}}); +} + QueryPipelineBuilder IInterpreterUnionOrSelectQuery::buildQueryPipeline() { QueryPlan query_plan; diff --git a/src/Interpreters/IInterpreterUnionOrSelectQuery.h b/src/Interpreters/IInterpreterUnionOrSelectQuery.h index e4425a73505..ac8e90b5f52 100644 --- a/src/Interpreters/IInterpreterUnionOrSelectQuery.h +++ b/src/Interpreters/IInterpreterUnionOrSelectQuery.h @@ -17,21 +17,7 @@ public: { } - IInterpreterUnionOrSelectQuery(const ASTPtr & query_ptr_, const ContextMutablePtr & context_, const SelectQueryOptions & options_) - : query_ptr(query_ptr_) - , context(context_) - , options(options_) - , max_streams(context->getSettingsRef().max_threads) - { - if (options.shard_num) - context->addSpecialScalar( - "_shard_num", - Block{{DataTypeUInt32().createColumnConst(1, *options.shard_num), std::make_shared(), "_shard_num"}}); - if (options.shard_count) - context->addSpecialScalar( - "_shard_count", - Block{{DataTypeUInt32().createColumnConst(1, *options.shard_count), std::make_shared(), "_shard_count"}}); - } + IInterpreterUnionOrSelectQuery(const ASTPtr & query_ptr_, const ContextMutablePtr & context_, const SelectQueryOptions & options_); virtual void buildQueryPlan(QueryPlan & query_plan) = 0; QueryPipelineBuilder buildQueryPipeline(); diff --git a/src/Interpreters/IdentifierSemantic.cpp b/src/Interpreters/IdentifierSemantic.cpp index 36972aeb03d..24e594661e9 100644 --- a/src/Interpreters/IdentifierSemantic.cpp +++ b/src/Interpreters/IdentifierSemantic.cpp @@ -2,6 +2,8 @@ #include +#include + #include #include diff --git a/src/Interpreters/InJoinSubqueriesPreprocessor.cpp b/src/Interpreters/InJoinSubqueriesPreprocessor.cpp index 3b3ef928b42..22ecb6a5db2 100644 --- a/src/Interpreters/InJoinSubqueriesPreprocessor.cpp +++ b/src/Interpreters/InJoinSubqueriesPreprocessor.cpp @@ -11,6 +11,7 @@ #include #include #include +#include namespace DB diff --git a/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp b/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp index 79a17fd1844..0e83e2039f6 100644 --- a/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -13,14 +14,16 @@ namespace DB BlockIO InterpreterAlterNamedCollectionQuery::execute() { auto current_context = getContext(); - const auto & query = query_ptr->as(); + + const auto updated_query = removeOnClusterClauseIfNeeded(query_ptr, getContext()); + const auto & query = updated_query->as(); current_context->checkAccess(AccessType::ALTER_NAMED_COLLECTION, query.collection_name); if (!query.cluster.empty()) { DDLQueryOnClusterParams params; - return executeDDLQueryOnCluster(query_ptr, current_context, params); + return executeDDLQueryOnCluster(updated_query, current_context, params); } NamedCollectionFactory::instance().updateFromSQL(query); diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index c70a3397f4e..8f9c6bc533e 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -3,6 +3,8 @@ #include #include +#include +#include #include #include #include @@ -24,7 +26,6 @@ #include #include #include -#include #include #include @@ -175,10 +176,9 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) else throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong parameter type in ALTER query"); - if (!getContext()->getSettings().allow_experimental_statistics && ( - command_ast->type == ASTAlterCommand::ADD_STATISTICS || - command_ast->type == ASTAlterCommand::DROP_STATISTICS || - command_ast->type == ASTAlterCommand::MATERIALIZE_STATISTICS)) + if (!getContext()->getSettingsRef().allow_experimental_statistics + && (command_ast->type == ASTAlterCommand::ADD_STATISTICS || command_ast->type == ASTAlterCommand::DROP_STATISTICS + || command_ast->type == ASTAlterCommand::MATERIALIZE_STATISTICS)) throw Exception(ErrorCodes::INCORRECT_QUERY, "Alter table with statistics is now disabled. Turn on allow_experimental_statistics"); } diff --git a/src/Interpreters/InterpreterCheckQuery.cpp b/src/Interpreters/InterpreterCheckQuery.cpp index 4a84a7bf570..5c4fb7a1443 100644 --- a/src/Interpreters/InterpreterCheckQuery.cpp +++ b/src/Interpreters/InterpreterCheckQuery.cpp @@ -12,6 +12,8 @@ #include #include +#include + #include #include diff --git a/src/Interpreters/InterpreterCreateIndexQuery.cpp b/src/Interpreters/InterpreterCreateIndexQuery.cpp index a439cb672c8..1c6abe842d4 100644 --- a/src/Interpreters/InterpreterCreateIndexQuery.cpp +++ b/src/Interpreters/InterpreterCreateIndexQuery.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include diff --git a/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp b/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp index c71441daa8c..b4920b1729f 100644 --- a/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -13,14 +14,16 @@ namespace DB BlockIO InterpreterCreateNamedCollectionQuery::execute() { auto current_context = getContext(); - const auto & query = query_ptr->as(); + + const auto updated_query = removeOnClusterClauseIfNeeded(query_ptr, getContext()); + const auto & query = updated_query->as(); current_context->checkAccess(AccessType::CREATE_NAMED_COLLECTION, query.collection_name); if (!query.cluster.empty()) { DDLQueryOnClusterParams params; - return executeDDLQueryOnCluster(query_ptr, current_context, params); + return executeDDLQueryOnCluster(updated_query, current_context, params); } NamedCollectionFactory::instance().createFromSQL(query); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index a990eb651ce..84d7f0a587c 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -5,7 +5,7 @@ #include #include -#include "Common/Exception.h" +#include #include #include #include @@ -14,8 +14,8 @@ #include #include #include +#include #include -#include #include #include @@ -34,10 +34,11 @@ #include #include +#include #include #include -#include #include +#include #include #include @@ -1328,7 +1329,8 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (need_add_to_database) database = DatabaseCatalog::instance().tryGetDatabase(database_name); - if (database && database->getEngineName() == "Replicated" && create.select) + bool allow_heavy_create = getContext()->getSettingsRef().database_replicated_allow_heavy_create; + if (!allow_heavy_create && database && database->getEngineName() == "Replicated" && (create.select || create.is_populate)) { bool is_storage_replicated = false; if (create.storage && create.storage->engine) @@ -1338,11 +1340,12 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) is_storage_replicated = true; } - const bool allow_create_select_for_replicated = create.isView() || create.is_create_empty || !is_storage_replicated; + const bool allow_create_select_for_replicated = (create.isView() && !create.is_populate) || create.is_create_empty || !is_storage_replicated; if (!allow_create_select_for_replicated) throw Exception( ErrorCodes::SUPPORT_IS_DISABLED, - "CREATE AS SELECT is not supported with Replicated databases. Use separate CREATE and INSERT queries"); + "CREATE AS SELECT and POPULATE is not supported with Replicated databases. Consider using separate CREATE and INSERT queries. " + "Alternatively, you can enable 'database_replicated_allow_heavy_create' setting to allow this operation, use with caution"); } if (database && database->shouldReplicateQuery(getContext(), query_ptr)) diff --git a/src/Interpreters/InterpreterCreateQuery.h b/src/Interpreters/InterpreterCreateQuery.h index 70ef29e6b07..3982ea2cabc 100644 --- a/src/Interpreters/InterpreterCreateQuery.h +++ b/src/Interpreters/InterpreterCreateQuery.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index 9cfb8e486cb..84fe0a9f4ae 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -61,24 +62,7 @@ BlockIO InterpreterDeleteQuery::execute() auto table_lock = table->lockForShare(getContext()->getCurrentQueryId(), getContext()->getSettingsRef().lock_acquire_timeout); auto metadata_snapshot = table->getInMemoryMetadataPtr(); - if (table->supportsDelete()) - { - /// Convert to MutationCommand - MutationCommands mutation_commands; - MutationCommand mut_command; - - mut_command.type = MutationCommand::Type::DELETE; - mut_command.predicate = delete_query.predicate; - - mutation_commands.emplace_back(mut_command); - - table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef()); - MutationsInterpreter::Settings settings(false); - MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), settings).validate(); - table->mutate(mutation_commands, getContext()); - return {}; - } - else if (table->supportsLightweightDelete()) + auto lightweightDelete = [&]() { if (!getContext()->getSettingsRef().enable_lightweight_delete) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, @@ -105,17 +89,77 @@ BlockIO InterpreterDeleteQuery::execute() context->setSetting("mutations_sync", Field(context->getSettingsRef().lightweight_deletes_sync)); InterpreterAlterQuery alter_interpreter(alter_ast, context); return alter_interpreter.execute(); + }; + + if (table->supportsDelete()) + { + /// Convert to MutationCommand + MutationCommands mutation_commands; + MutationCommand mut_command; + + mut_command.type = MutationCommand::Type::DELETE; + mut_command.predicate = delete_query.predicate; + + mutation_commands.emplace_back(mut_command); + + table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef()); + MutationsInterpreter::Settings settings(false); + MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), settings).validate(); + table->mutate(mutation_commands, getContext()); + return {}; + } + else if (table->supportsLightweightDelete()) + { + return lightweightDelete(); } else { - /// Currently just better exception for the case of a table with projection, - /// can act differently according to the setting. if (table->hasProjection()) { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "DELETE query is not supported for table {} as it has projections. " - "User should drop all the projections manually before running the query", - table->getStorageID().getFullTableName()); + auto context = Context::createCopy(getContext()); + auto mode = context->getSettingsRef().lightweight_mutation_projection_mode; + if (mode == LightweightMutationProjectionMode::THROW) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "DELETE query is not supported for table {} as it has projections. " + "User should drop all the projections manually before running the query", + table->getStorageID().getFullTableName()); + } + else if (mode == LightweightMutationProjectionMode::DROP) + { + std::vector all_projections = metadata_snapshot->projections.getAllRegisteredNames(); + + context->setSetting("mutations_sync", Field(context->getSettingsRef().lightweight_deletes_sync)); + + /// Drop projections first so that lightweight delete can be performed. + for (const auto & projection : all_projections) + { + String alter_query = + "ALTER TABLE " + table->getStorageID().getFullTableName() + + (delete_query.cluster.empty() ? "" : " ON CLUSTER " + backQuoteIfNeed(delete_query.cluster)) + + " DROP PROJECTION IF EXISTS " + projection; + + ParserAlterQuery parser; + ASTPtr alter_ast = parseQuery( + parser, + alter_query.data(), + alter_query.data() + alter_query.size(), + "ALTER query", + 0, + DBMS_DEFAULT_MAX_PARSER_DEPTH, + DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); + + InterpreterAlterQuery alter_interpreter(alter_ast, context); + alter_interpreter.execute(); + } + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Unrecognized lightweight_mutation_projection_mode, only throw and drop are allowed."); + } + + return lightweightDelete(); } throw Exception(ErrorCodes::BAD_ARGUMENTS, diff --git a/src/Interpreters/InterpreterDescribeQuery.cpp b/src/Interpreters/InterpreterDescribeQuery.cpp index 87b0eb89302..39fc85a5e23 100644 --- a/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/src/Interpreters/InterpreterDescribeQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/InterpreterDropIndexQuery.cpp b/src/Interpreters/InterpreterDropIndexQuery.cpp index f052aa201f1..8777532e4d0 100644 --- a/src/Interpreters/InterpreterDropIndexQuery.cpp +++ b/src/Interpreters/InterpreterDropIndexQuery.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include diff --git a/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp b/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp index 2edaef1b2f2..6233d21b439 100644 --- a/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -13,14 +14,16 @@ namespace DB BlockIO InterpreterDropNamedCollectionQuery::execute() { auto current_context = getContext(); - const auto & query = query_ptr->as(); + + const auto updated_query = removeOnClusterClauseIfNeeded(query_ptr, getContext()); + const auto & query = updated_query->as(); current_context->checkAccess(AccessType::DROP_NAMED_COLLECTION, query.collection_name); if (!query.cluster.empty()) { DDLQueryOnClusterParams params; - return executeDDLQueryOnCluster(query_ptr, current_context, params); + return executeDDLQueryOnCluster(updated_query, current_context, params); } NamedCollectionFactory::instance().removeFromSQL(query); diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index ba788a2e1f3..b68b3ddcd48 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include "config.h" diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 3a06e1b2301..d7addcd6e34 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -29,6 +29,7 @@ #include #include +#include #include #include @@ -43,6 +44,7 @@ namespace ErrorCodes extern const int UNKNOWN_SETTING; extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; + extern const int BAD_ARGUMENTS; } namespace @@ -68,7 +70,7 @@ namespace static void visit(ASTSelectQuery & select, ASTPtr & node, Data & data) { /// we need to read statistic when `allow_statistics_optimize` is enabled. - bool only_analyze = !data.getContext()->getSettings().allow_statistics_optimize; + bool only_analyze = !data.getContext()->getSettingsRef().allow_statistics_optimize; InterpreterSelectQuery interpreter( node, data.getContext(), SelectQueryOptions(QueryProcessingStage::FetchColumns).analyze(only_analyze).modify()); @@ -170,6 +172,7 @@ struct QueryASTSettings struct QueryTreeSettings { bool run_passes = true; + bool dump_tree = true; bool dump_passes = false; bool dump_ast = false; Int64 passes = -1; @@ -179,6 +182,7 @@ struct QueryTreeSettings std::unordered_map> boolean_settings = { {"run_passes", run_passes}, + {"dump_tree", dump_tree}, {"dump_passes", dump_passes}, {"dump_ast", dump_ast} }; @@ -398,7 +402,11 @@ QueryPipeline InterpreterExplainQuery::executeImpl() throw Exception(ErrorCodes::INCORRECT_QUERY, "Only SELECT is supported for EXPLAIN QUERY TREE query"); auto settings = checkAndGetSettings(ast.getSettings()); + if (!settings.dump_tree && !settings.dump_ast) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Either 'dump_tree' or 'dump_ast' must be set for EXPLAIN QUERY TREE query"); + auto query_tree = buildQueryTree(ast.getExplainedQuery(), getContext()); + bool need_newline = false; if (settings.run_passes) { @@ -410,23 +418,26 @@ QueryPipeline InterpreterExplainQuery::executeImpl() if (settings.dump_passes) { query_tree_pass_manager.dump(buf, pass_index); - if (pass_index > 0) - buf << '\n'; + need_newline = true; } query_tree_pass_manager.run(query_tree, pass_index); + } + + if (settings.dump_tree) + { + if (need_newline) + buf << "\n\n"; query_tree->dumpTree(buf); - } - else - { - query_tree->dumpTree(buf); + need_newline = true; } if (settings.dump_ast) { - buf << '\n'; - buf << '\n'; + if (need_newline) + buf << "\n\n"; + query_tree->toAST()->format(IAST::FormatSettings(buf, false)); } diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp index 387d056ffe7..12b3b510098 100644 --- a/src/Interpreters/InterpreterFactory.cpp +++ b/src/Interpreters/InterpreterFactory.cpp @@ -60,6 +60,7 @@ #include #include #include +#include namespace ProfileEvents diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index f396db70d21..ef298d4d45a 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/InterpreterKillQueryQuery.cpp b/src/Interpreters/InterpreterKillQueryQuery.cpp index 6d6b1085ffb..7eb487ba7b3 100644 --- a/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp index 32c475d138f..c6e52590f89 100644 --- a/src/Interpreters/InterpreterRenameQuery.cpp +++ b/src/Interpreters/InterpreterRenameQuery.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include diff --git a/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp b/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp index 6eac2db20c9..b0da4caa3ac 100644 --- a/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp +++ b/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 90c484636ea..cd91f9532b9 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -84,17 +84,20 @@ #include #include #include +#include +#include #include +#include #include #include #include #include #include +#include +#include #include #include #include -#include -#include namespace ProfileEvents @@ -565,7 +568,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( settings.additional_table_filters, joined_tables.tablesWithColumns().front().table, *context); ASTPtr parallel_replicas_custom_filter_ast = nullptr; - if (storage && context->getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY && !joined_tables.tablesWithColumns().empty()) + if (storage && context->canUseParallelReplicasCustomKey() && !joined_tables.tablesWithColumns().empty()) { if (settings.parallel_replicas_count > 1) { @@ -586,16 +589,28 @@ InterpreterSelectQuery::InterpreterSelectQuery( else if (settings.parallel_replica_offset > 0) { throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Parallel replicas processing with custom_key has been requested " - "(setting 'max_parallel_replicas') but the table does not have custom_key defined for it " - "or it's invalid (settings `parallel_replicas_custom_key`)"); + ErrorCodes::BAD_ARGUMENTS, + "Parallel replicas processing with custom_key has been requested " + "(setting 'max_parallel_replicas') but the table does not have custom_key defined for it " + "or it's invalid (settings `parallel_replicas_custom_key`)"); } } + /// We disable prefer_localhost_replica because if one of the replicas is local it will create a single local plan + /// instead of executing the query with multiple replicas + /// We can enable this setting again for custom key parallel replicas when we can generate a plan that will use both a + /// local plan and remote replicas else if (auto * distributed = dynamic_cast(storage.get()); - distributed && context->canUseParallelReplicasCustomKey(*distributed->getCluster())) + distributed && context->canUseParallelReplicasCustomKeyForCluster(*distributed->getCluster())) { context->setSetting("distributed_group_by_no_merge", 2); + context->setSetting("prefer_localhost_replica", Field(0)); + } + else if ( + storage->isMergeTree() && (storage->supportsReplication() || settings.parallel_replicas_for_non_replicated_merge_tree) + && context->getClientInfo().distributed_depth == 0 + && context->canUseParallelReplicasCustomKeyForCluster(*context->getClusterForParallelReplicas())) + { + context->setSetting("prefer_localhost_replica", Field(0)); } } @@ -2603,10 +2618,10 @@ static Aggregator::Params getAggregatorParams( size_t group_by_two_level_threshold, size_t group_by_two_level_threshold_bytes) { - const auto stats_collecting_params = Aggregator::Params::StatsCollectingParams( - query_ptr, + const auto stats_collecting_params = StatsCollectingParams( + calculateCacheKey(query_ptr), settings.collect_hash_table_stats_during_aggregation, - settings.max_entries_for_hash_table_stats, + context.getServerSettings().max_entries_for_hash_table_stats, settings.max_size_to_preallocate_for_aggregation); return Aggregator::Params diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index cc1d7dd6531..e953a07b41d 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include diff --git a/src/Interpreters/InterpreterShowColumnsQuery.cpp b/src/Interpreters/InterpreterShowColumnsQuery.cpp index f32ebceaa63..d8fff4e6026 100644 --- a/src/Interpreters/InterpreterShowColumnsQuery.cpp +++ b/src/Interpreters/InterpreterShowColumnsQuery.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include #include diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index d3526941b33..c284acfa308 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/InterpreterTransactionControlQuery.cpp b/src/Interpreters/InterpreterTransactionControlQuery.cpp index 13872fbe3f5..e92d6f9c5e7 100644 --- a/src/Interpreters/InterpreterTransactionControlQuery.cpp +++ b/src/Interpreters/InterpreterTransactionControlQuery.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Interpreters/InterpreterUndropQuery.cpp b/src/Interpreters/InterpreterUndropQuery.cpp index 920df3d6aed..8f935e951ef 100644 --- a/src/Interpreters/InterpreterUndropQuery.cpp +++ b/src/Interpreters/InterpreterUndropQuery.cpp @@ -64,7 +64,7 @@ BlockIO InterpreterUndropQuery::executeToTable(ASTUndropQuery & query) database->checkMetadataFilenameAvailability(table_id.table_name); - DatabaseCatalog::instance().dequeueDroppedTableCleanup(table_id); + DatabaseCatalog::instance().undropTable(table_id); return {}; } diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp index 457ed3ef4a6..0de2bf9cb1f 100644 --- a/src/Interpreters/JoinedTables.cpp +++ b/src/Interpreters/JoinedTables.cpp @@ -1,5 +1,6 @@ #include +#include #include #include diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 6d3a4f30b34..480c6736bc5 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -17,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +43,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Interpreters/MutationsNonDeterministicHelpers.cpp b/src/Interpreters/MutationsNonDeterministicHelpers.cpp index 7a4cb91acc0..bcff3e18b25 100644 --- a/src/Interpreters/MutationsNonDeterministicHelpers.cpp +++ b/src/Interpreters/MutationsNonDeterministicHelpers.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.h b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.h index e8194f0dfe1..b2f55003da5 100644 --- a/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.h +++ b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.h @@ -1,11 +1,10 @@ #pragma once -#include - +#include #include #include -#include +#include namespace DB { diff --git a/src/Interpreters/ObjectStorageQueueLog.cpp b/src/Interpreters/ObjectStorageQueueLog.cpp index 24261429434..c841984fd08 100644 --- a/src/Interpreters/ObjectStorageQueueLog.cpp +++ b/src/Interpreters/ObjectStorageQueueLog.cpp @@ -36,7 +36,6 @@ ColumnsDescription ObjectStorageQueueLogElement::getColumnsDescription() {"status", status_datatype, "Status of the processing file"}, {"processing_start_time", std::make_shared(std::make_shared()), "Time of the start of processing the file"}, {"processing_end_time", std::make_shared(std::make_shared()), "Time of the end of processing the file"}, - {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared()), "Profile events collected while loading this file"}, {"exception", std::make_shared(), "Exception message if happened"}, }; } @@ -64,8 +63,6 @@ void ObjectStorageQueueLogElement::appendToBlock(MutableColumns & columns) const else columns[i++]->insertDefault(); - ProfileEvents::dumpToMapColumn(counters_snapshot, columns[i++].get(), true); - columns[i++]->insert(exception); } diff --git a/src/Interpreters/ObjectStorageQueueLog.h b/src/Interpreters/ObjectStorageQueueLog.h index b0e843a0cc3..669238d8dbb 100644 --- a/src/Interpreters/ObjectStorageQueueLog.h +++ b/src/Interpreters/ObjectStorageQueueLog.h @@ -26,7 +26,6 @@ struct ObjectStorageQueueLogElement Failed, }; ObjectStorageQueueStatus status; - ProfileEvents::Counters::Snapshot counters_snapshot; time_t processing_start_time; time_t processing_end_time; std::string exception; diff --git a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp index 20451fb20ad..48c9988b6fc 100644 --- a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp +++ b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp @@ -73,66 +73,55 @@ static bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & v return false; } -void OptimizeIfWithConstantConditionVisitor::visit(ASTPtr & current_ast) +void OptimizeIfWithConstantConditionVisitorData::visit(ASTFunction & function_node, ASTPtr & ast) { - if (!current_ast) - return; - checkStackSize(); - for (ASTPtr & child : current_ast->children) + if (function_node.name != "if") + return; + + if (!function_node.arguments) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Wrong number of arguments for function 'if' (0 instead of 3)"); + + if (function_node.arguments->children.size() != 3) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Wrong number of arguments for function 'if' ({} instead of 3)", + function_node.arguments->children.size()); + + const auto * args = function_node.arguments->as(); + + ASTPtr condition_expr = args->children[0]; + ASTPtr then_expr = args->children[1]; + ASTPtr else_expr = args->children[2]; + + bool condition; + if (tryExtractConstValueFromCondition(condition_expr, condition)) { - auto * function_node = child->as(); - if (!function_node || function_node->name != "if") + ASTPtr replace_ast = condition ? then_expr : else_expr; + ASTPtr child_copy = ast; + String replace_alias = replace_ast->tryGetAlias(); + String if_alias = ast->tryGetAlias(); + + if (replace_alias.empty()) { - visit(child); - continue; + replace_ast->setAlias(if_alias); + ast = replace_ast; + } + else + { + /// Only copy of one node is required here. + /// But IAST has only method for deep copy of subtree. + /// This can be a reason of performance degradation in case of deep queries. + ASTPtr replace_ast_deep_copy = replace_ast->clone(); + replace_ast_deep_copy->setAlias(if_alias); + ast = replace_ast_deep_copy; } - if (!function_node->arguments) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Wrong number of arguments for function 'if' (0 instead of 3)"); - - if (function_node->arguments->children.size() != 3) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Wrong number of arguments for function 'if' ({} instead of 3)", - function_node->arguments->children.size()); - - visit(function_node->arguments); - const auto * args = function_node->arguments->as(); - - ASTPtr condition_expr = args->children[0]; - ASTPtr then_expr = args->children[1]; - ASTPtr else_expr = args->children[2]; - - bool condition; - if (tryExtractConstValueFromCondition(condition_expr, condition)) + if (!if_alias.empty()) { - ASTPtr replace_ast = condition ? then_expr : else_expr; - ASTPtr child_copy = child; - String replace_alias = replace_ast->tryGetAlias(); - String if_alias = child->tryGetAlias(); - - if (replace_alias.empty()) - { - replace_ast->setAlias(if_alias); - child = replace_ast; - } - else - { - /// Only copy of one node is required here. - /// But IAST has only method for deep copy of subtree. - /// This can be a reason of performance degradation in case of deep queries. - ASTPtr replace_ast_deep_copy = replace_ast->clone(); - replace_ast_deep_copy->setAlias(if_alias); - child = replace_ast_deep_copy; - } - - if (!if_alias.empty()) - { - auto alias_it = aliases.find(if_alias); - if (alias_it != aliases.end() && alias_it->second.get() == child_copy.get()) - alias_it->second = child; - } + auto alias_it = aliases.find(if_alias); + if (alias_it != aliases.end() && alias_it->second.get() == child_copy.get()) + alias_it->second = ast; } } } diff --git a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h index ad98f92bafd..3b46f90f07c 100644 --- a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h +++ b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h @@ -1,23 +1,24 @@ #pragma once #include +#include namespace DB { - -/// It removes Function_if node from AST if condition is constant. -/// TODO: rewrite with InDepthNodeVisitor -class OptimizeIfWithConstantConditionVisitor +struct OptimizeIfWithConstantConditionVisitorData { -public: - explicit OptimizeIfWithConstantConditionVisitor(Aliases & aliases_) + using TypeToVisit = ASTFunction; + + explicit OptimizeIfWithConstantConditionVisitorData(Aliases & aliases_) : aliases(aliases_) {} - void visit(ASTPtr & ast); - + void visit(ASTFunction & function_node, ASTPtr & ast); private: Aliases & aliases; }; +/// It removes Function_if node from AST if condition is constant. +using OptimizeIfWithConstantConditionVisitor = InDepthNodeVisitor, false>; + } diff --git a/src/Interpreters/PredicateExpressionsOptimizer.cpp b/src/Interpreters/PredicateExpressionsOptimizer.cpp index 8dc8c1c92cc..af67e62be1a 100644 --- a/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include diff --git a/src/Interpreters/PreparedSets.cpp b/src/Interpreters/PreparedSets.cpp index b04c8b1b725..43a04f8aff0 100644 --- a/src/Interpreters/PreparedSets.cpp +++ b/src/Interpreters/PreparedSets.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/ProcessorsProfileLog.h b/src/Interpreters/ProcessorsProfileLog.h index 8319d373f39..abece2604f2 100644 --- a/src/Interpreters/ProcessorsProfileLog.h +++ b/src/Interpreters/ProcessorsProfileLog.h @@ -25,11 +25,11 @@ struct ProcessorProfileLogElement String processor_name; /// Milliseconds spend in IProcessor::work() - UInt32 elapsed_us{}; + UInt64 elapsed_us{}; /// IProcessor::NeedData - UInt32 input_wait_elapsed_us{}; + UInt64 input_wait_elapsed_us{}; /// IProcessor::PortFull - UInt32 output_wait_elapsed_us{}; + UInt64 output_wait_elapsed_us{}; size_t input_rows{}; size_t input_bytes{}; diff --git a/src/Interpreters/QueryLog.cpp b/src/Interpreters/QueryLog.cpp index e63a2ae31aa..527159dc981 100644 --- a/src/Interpreters/QueryLog.cpp +++ b/src/Interpreters/QueryLog.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/QueryLog.h b/src/Interpreters/QueryLog.h index bbaa7179757..d9cce4b9dd7 100644 --- a/src/Interpreters/QueryLog.h +++ b/src/Interpreters/QueryLog.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include #include @@ -22,6 +22,8 @@ namespace ProfileEvents namespace DB { +struct Settings; + /** Allows to log information about queries execution: * - info about start of query execution; diff --git a/src/Interpreters/QueryViewsLog.h b/src/Interpreters/QueryViewsLog.h index 2de06fe3ddc..8f382ba6183 100644 --- a/src/Interpreters/QueryViewsLog.h +++ b/src/Interpreters/QueryViewsLog.h @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/RewriteCountVariantsVisitor.cpp b/src/Interpreters/RewriteCountVariantsVisitor.cpp index f207bc51527..4a541c3765a 100644 --- a/src/Interpreters/RewriteCountVariantsVisitor.cpp +++ b/src/Interpreters/RewriteCountVariantsVisitor.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp deleted file mode 100644 index f0202199752..00000000000 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp +++ /dev/null @@ -1,157 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -namespace -{ - -ASTPtr transformToSubcolumn(const String & name_in_storage, const String & subcolumn_name) -{ - return std::make_shared(Nested::concatenateName(name_in_storage, subcolumn_name)); -} - -ASTPtr transformEmptyToSubcolumn(const String & name_in_storage, const String & subcolumn_name) -{ - auto ast = transformToSubcolumn(name_in_storage, subcolumn_name); - return makeASTFunction("equals", ast, std::make_shared(0u)); -} - -ASTPtr transformNotEmptyToSubcolumn(const String & name_in_storage, const String & subcolumn_name) -{ - auto ast = transformToSubcolumn(name_in_storage, subcolumn_name); - return makeASTFunction("notEquals", ast, std::make_shared(0u)); -} - -ASTPtr transformIsNotNullToSubcolumn(const String & name_in_storage, const String & subcolumn_name) -{ - auto ast = transformToSubcolumn(name_in_storage, subcolumn_name); - return makeASTFunction("not", ast); -} - -ASTPtr transformCountNullableToSubcolumn(const String & name_in_storage, const String & subcolumn_name) -{ - auto ast = transformToSubcolumn(name_in_storage, subcolumn_name); - return makeASTFunction("sum", makeASTFunction("not", ast)); -} - -ASTPtr transformMapContainsToSubcolumn(const String & name_in_storage, const String & subcolumn_name, const ASTPtr & arg) -{ - auto ast = transformToSubcolumn(name_in_storage, subcolumn_name); - return makeASTFunction("has", ast, arg); -} - -const std::unordered_map> unary_function_to_subcolumn = -{ - {"length", {TypeIndex::Array, "size0", transformToSubcolumn}}, - {"empty", {TypeIndex::Array, "size0", transformEmptyToSubcolumn}}, - {"notEmpty", {TypeIndex::Array, "size0", transformNotEmptyToSubcolumn}}, - {"isNull", {TypeIndex::Nullable, "null", transformToSubcolumn}}, - {"isNotNull", {TypeIndex::Nullable, "null", transformIsNotNullToSubcolumn}}, - {"count", {TypeIndex::Nullable, "null", transformCountNullableToSubcolumn}}, - {"mapKeys", {TypeIndex::Map, "keys", transformToSubcolumn}}, - {"mapValues", {TypeIndex::Map, "values", transformToSubcolumn}}, -}; - -const std::unordered_map> binary_function_to_subcolumn -{ - {"mapContains", {TypeIndex::Map, "keys", transformMapContainsToSubcolumn}}, -}; - -} - -void RewriteFunctionToSubcolumnData::visit(ASTFunction & function, ASTPtr & ast) const -{ - const auto & arguments = function.arguments->children; - if (arguments.empty() || arguments.size() > 2) - return; - - const auto * identifier = arguments[0]->as(); - if (!identifier) - return; - - const auto & columns = metadata_snapshot->getColumns(); - const auto & name_in_storage = identifier->name(); - - if (!columns.has(name_in_storage)) - return; - - const auto & column_type = columns.get(name_in_storage).type; - TypeIndex column_type_id = column_type->getTypeId(); - const auto & alias = function.tryGetAlias(); - - if (arguments.size() == 1) - { - auto it = unary_function_to_subcolumn.find(function.name); - if (it != unary_function_to_subcolumn.end()) - { - const auto & [type_id, subcolumn_name, transformer] = it->second; - if (column_type_id == type_id) - { - ast = transformer(name_in_storage, subcolumn_name); - ast->setAlias(alias); - } - } - } - else - { - if (function.name == "tupleElement" && column_type_id == TypeIndex::Tuple) - { - const auto * literal = arguments[1]->as(); - if (!literal) - return; - - String subcolumn_name; - auto value_type = literal->value.getType(); - if (value_type == Field::Types::UInt64) - { - const auto & type_tuple = assert_cast(*column_type); - auto index = literal->value.get(); - subcolumn_name = type_tuple.getNameByPosition(index); - } - else if (value_type == Field::Types::String) - subcolumn_name = literal->value.get(); - else - return; - - ast = transformToSubcolumn(name_in_storage, subcolumn_name); - ast->setAlias(alias); - } - else if (function.name == "variantElement" && column_type_id == TypeIndex::Variant) - { - const auto * literal = arguments[1]->as(); - if (!literal) - return; - - String subcolumn_name; - auto value_type = literal->value.getType(); - if (value_type != Field::Types::String) - return; - - subcolumn_name = literal->value.get(); - ast = transformToSubcolumn(name_in_storage, subcolumn_name); - ast->setAlias(alias); - } - else - { - auto it = binary_function_to_subcolumn.find(function.name); - if (it != binary_function_to_subcolumn.end()) - { - const auto & [type_id, subcolumn_name, transformer] = it->second; - if (column_type_id == type_id) - { - ast = transformer(name_in_storage, subcolumn_name, arguments[1]); - ast->setAlias(alias); - } - } - } - } -} - -} diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h deleted file mode 100644 index 4d064bdee10..00000000000 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h +++ /dev/null @@ -1,25 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -class ASTFunction; - -/// Rewrites functions to subcolumns, if possible, to reduce amount of read data. -/// E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' -class RewriteFunctionToSubcolumnData -{ -public: - using TypeToVisit = ASTFunction; - void visit(ASTFunction & function, ASTPtr & ast) const; - - StorageMetadataPtr metadata_snapshot; -}; - -using RewriteFunctionToSubcolumnMatcher = OneTypeMatcher; -using RewriteFunctionToSubcolumnVisitor = InDepthNodeVisitor; - -} diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index cdca3f5c049..b65a6bebdf1 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -9,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -130,7 +132,7 @@ public: LOG_TRACE(log, "Reuse session from storage with session_id: {}, user_id: {}", key.second, key.first); - if (!session.unique()) + if (!isSharedPtrUnique(session)) throw Exception(ErrorCodes::SESSION_IS_LOCKED, "Session {} is locked by a concurrent client", session_id); return {session, false}; } @@ -156,7 +158,7 @@ public: return; } - if (!it->second.unique()) + if (!isSharedPtrUnique(it->second)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot close session {} with refcount {}", session_id, it->second.use_count()); sessions.erase(it); diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 557065b23ff..7d84efba1b5 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -356,10 +357,15 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf if (blob_storage_log) logs.emplace_back(blob_storage_log.get()); + bool should_prepare = global_context->getServerSettings().prepare_system_log_tables_on_startup; try { for (auto & log : logs) + { log->startup(); + if (should_prepare) + log->prepareTable(); + } } catch (...) { diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 94cb8c3e2fd..0ac468b15ec 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -135,6 +135,12 @@ public: void stopFlushThread() override; + /** Creates new table if it does not exist. + * Renames old table if its structure is not suitable. + * This cannot be done in constructor to avoid deadlock while renaming a table under locked Context when SystemLog object is created. + */ + void prepareTable() override; + protected: LoggerPtr log; @@ -145,12 +151,6 @@ protected: StoragePtr getStorage() const; - /** Creates new table if it does not exist. - * Renames old table if its structure is not suitable. - * This cannot be done in constructor to avoid deadlock while renaming a table under locked Context when SystemLog object is created. - */ - void prepareTable() override; - /// Some tables can override settings for internal queries virtual void addSettingsForQuery(ContextMutablePtr & mutable_context, IAST::QueryKind query_kind) const; diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index 8e83233e54c..ed14fe4273c 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -2,7 +2,6 @@ #include #include -#include #include #include #include diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 9ca521a4ab3..1f7c6b1fe68 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -233,7 +234,8 @@ void ThreadStatus::attachToGroupImpl(const ThreadGroupPtr & thread_group_) { /// Attach or init current thread to thread group and copy useful information from it thread_group = thread_group_; - thread_group->linkThread(thread_id); + if (!internal_thread) + thread_group->linkThread(thread_id); performance_counters.setParent(&thread_group->performance_counters); memory_tracker.setParent(&thread_group->memory_tracker); @@ -269,7 +271,8 @@ void ThreadStatus::detachFromGroup() /// Extract MemoryTracker out from query and user context memory_tracker.setParent(&total_memory_tracker); - thread_group->unlinkThread(); + if (!internal_thread) + thread_group->unlinkThread(); thread_group.reset(); diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index c331c8640d6..b872eb94fde 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -564,12 +563,6 @@ void transformIfStringsIntoEnum(ASTPtr & query) ConvertStringsToEnumVisitor(convert_data).visit(query); } -void optimizeFunctionsToSubcolumns(ASTPtr & query, const StorageMetadataPtr & metadata_snapshot) -{ - RewriteFunctionToSubcolumnVisitor::Data data{metadata_snapshot}; - RewriteFunctionToSubcolumnVisitor(data).visit(query); -} - void optimizeOrLikeChain(ASTPtr & query) { ConvertFunctionOrLikeVisitor::Data data = {}; @@ -584,7 +577,8 @@ void TreeOptimizer::optimizeIf(ASTPtr & query, Aliases & aliases, bool if_chain_ optimizeMultiIfToIf(query); /// Optimize if with constant condition after constants was substituted instead of scalar subqueries. - OptimizeIfWithConstantConditionVisitor(aliases).visit(query); + OptimizeIfWithConstantConditionVisitorData visitor_data(aliases); + OptimizeIfWithConstantConditionVisitor(visitor_data).visit(query); if (if_chain_to_multiif) OptimizeIfChainsVisitor().visit(query); @@ -634,9 +628,6 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result, if (!select_query) throw Exception(ErrorCodes::LOGICAL_ERROR, "Select analyze for not select asts."); - if (settings.optimize_functions_to_subcolumns && result.storage_snapshot && result.storage->supportsSubcolumns()) - optimizeFunctionsToSubcolumns(query, result.storage_snapshot->metadata); - /// Move arithmetic operations out of aggregation functions if (settings.optimize_arithmetic_operations_in_aggregate_functions) optimizeAggregationFunctions(query); diff --git a/src/Interpreters/evaluateConstantExpression.cpp b/src/Interpreters/evaluateConstantExpression.cpp index 4e1a2bcf5ee..ac80ad2fb68 100644 --- a/src/Interpreters/evaluateConstantExpression.cpp +++ b/src/Interpreters/evaluateConstantExpression.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -24,6 +25,7 @@ #include #include #include + #include diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index e372f036073..5d237d28089 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -237,6 +238,7 @@ private: Int64 timeout_seconds = 120; bool is_replicated_database = false; bool throw_on_timeout = true; + bool throw_on_timeout_only_active = false; bool only_running_hosts = false; bool timeout_exceeded = false; @@ -316,8 +318,8 @@ DDLQueryStatusSource::DDLQueryStatusSource( , log(getLogger("DDLQueryStatusSource")) { auto output_mode = context->getSettingsRef().distributed_ddl_output_mode; - throw_on_timeout = output_mode == DistributedDDLOutputMode::THROW || output_mode == DistributedDDLOutputMode::THROW_ONLY_ACTIVE - || output_mode == DistributedDDLOutputMode::NONE || output_mode == DistributedDDLOutputMode::NONE_ONLY_ACTIVE; + throw_on_timeout = output_mode == DistributedDDLOutputMode::THROW || output_mode == DistributedDDLOutputMode::NONE; + throw_on_timeout_only_active = output_mode == DistributedDDLOutputMode::THROW_ONLY_ACTIVE || output_mode == DistributedDDLOutputMode::NONE_ONLY_ACTIVE; if (hosts_to_wait) { @@ -451,7 +453,7 @@ Chunk DDLQueryStatusSource::generate() "({} of them are currently executing the task, {} are inactive). " "They are going to execute the query in background. Was waiting for {} seconds{}"; - if (throw_on_timeout) + if (throw_on_timeout || (throw_on_timeout_only_active && !stop_waiting_offline_hosts)) { if (!first_exception) first_exception = std::make_unique(Exception(ErrorCodes::TIMEOUT_EXCEEDED, diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 9f33cbf1c27..d9d3ba58160 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -475,10 +475,9 @@ void logQueryFinish( processor_elem.processor_name = processor->getName(); - /// NOTE: convert this to UInt64 - processor_elem.elapsed_us = static_cast(processor->getElapsedUs()); - processor_elem.input_wait_elapsed_us = static_cast(processor->getInputWaitElapsedUs()); - processor_elem.output_wait_elapsed_us = static_cast(processor->getOutputWaitElapsedUs()); + processor_elem.elapsed_us = static_cast(processor->getElapsedNs() / 1000U); + processor_elem.input_wait_elapsed_us = static_cast(processor->getInputWaitElapsedNs() / 1000U); + processor_elem.output_wait_elapsed_us = static_cast(processor->getOutputWaitElapsedNs() / 1000U); auto stats = processor->getProcessorDataStats(); processor_elem.input_rows = stats.input_rows; diff --git a/src/Interpreters/formatWithPossiblyHidingSecrets.h b/src/Interpreters/formatWithPossiblyHidingSecrets.h index 25e1e7a5616..039bcbc2bca 100644 --- a/src/Interpreters/formatWithPossiblyHidingSecrets.h +++ b/src/Interpreters/formatWithPossiblyHidingSecrets.h @@ -2,6 +2,8 @@ #include "Access/ContextAccess.h" #include "Interpreters/Context.h" +#include + namespace DB { struct SecretHidingFormatSettings diff --git a/src/Interpreters/getColumnFromBlock.cpp b/src/Interpreters/getColumnFromBlock.cpp index 972e109afb3..2e70a58b5a1 100644 --- a/src/Interpreters/getColumnFromBlock.cpp +++ b/src/Interpreters/getColumnFromBlock.cpp @@ -31,6 +31,36 @@ ColumnPtr tryGetColumnFromBlock(const Block & block, const NameAndTypePair & req return castColumn({elem_column, elem_type, ""}, requested_column.type); } +ColumnPtr tryGetSubcolumnFromBlock(const Block & block, const DataTypePtr & requested_column_type, const NameAndTypePair & requested_subcolumn) +{ + const auto * elem = block.findByName(requested_subcolumn.getNameInStorage()); + if (!elem) + return nullptr; + + auto subcolumn_name = requested_subcolumn.getSubcolumnName(); + /// If requested subcolumn is dynamic, we should first perform cast and then + /// extract the subcolumn, because the data of dynamic subcolumn can change after cast. + if (elem->type->hasDynamicSubcolumns() && !elem->type->equals(*requested_column_type)) + { + auto casted_column = castColumn({elem->column, elem->type, ""}, requested_column_type); + auto elem_column = requested_column_type->tryGetSubcolumn(subcolumn_name, casted_column); + auto elem_type = requested_column_type->tryGetSubcolumnType(subcolumn_name); + + if (!elem_type || !elem_column) + return nullptr; + + return elem_column; + } + + auto elem_column = elem->type->tryGetSubcolumn(subcolumn_name, elem->column); + auto elem_type = elem->type->tryGetSubcolumnType(subcolumn_name); + + if (!elem_type || !elem_column) + return nullptr; + + return castColumn({elem_column, elem_type, ""}, requested_subcolumn.type); +} + ColumnPtr getColumnFromBlock(const Block & block, const NameAndTypePair & requested_column) { auto result_column = tryGetColumnFromBlock(block, requested_column); diff --git a/src/Interpreters/getColumnFromBlock.h b/src/Interpreters/getColumnFromBlock.h index 26500cfdd17..737ce9db555 100644 --- a/src/Interpreters/getColumnFromBlock.h +++ b/src/Interpreters/getColumnFromBlock.h @@ -9,5 +9,6 @@ namespace DB ColumnPtr getColumnFromBlock(const Block & block, const NameAndTypePair & requested_column); ColumnPtr tryGetColumnFromBlock(const Block & block, const NameAndTypePair & requested_column); +ColumnPtr tryGetSubcolumnFromBlock(const Block & block, const DataTypePtr & requested_column_type, const NameAndTypePair & requested_subcolumn); } diff --git a/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp b/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp index 43be7c5f043..6f902c7cd7e 100644 --- a/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp +++ b/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp @@ -1,5 +1,7 @@ #include +#include + #include #include #include diff --git a/src/Interpreters/getHeaderForProcessingStage.cpp b/src/Interpreters/getHeaderForProcessingStage.cpp index 06c5d424d2f..cf18cbbb54a 100644 --- a/src/Interpreters/getHeaderForProcessingStage.cpp +++ b/src/Interpreters/getHeaderForProcessingStage.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/src/Interpreters/interpretSubquery.cpp b/src/Interpreters/interpretSubquery.cpp index 6b42345f1d6..340f6d1d805 100644 --- a/src/Interpreters/interpretSubquery.cpp +++ b/src/Interpreters/interpretSubquery.cpp @@ -1,4 +1,5 @@ #include +#include #include #include diff --git a/src/Interpreters/loadMetadata.cpp b/src/Interpreters/loadMetadata.cpp index 9c3922b8bda..b5148174043 100644 --- a/src/Interpreters/loadMetadata.cpp +++ b/src/Interpreters/loadMetadata.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include diff --git a/src/Interpreters/parseColumnsListForTableFunction.cpp b/src/Interpreters/parseColumnsListForTableFunction.cpp index 3529863a623..b9fdaabede1 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.cpp +++ b/src/Interpreters/parseColumnsListForTableFunction.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -21,6 +22,18 @@ extern const int ILLEGAL_COLUMN; } +DataTypeValidationSettings::DataTypeValidationSettings(const DB::Settings& settings) + : allow_suspicious_low_cardinality_types(settings.allow_suspicious_low_cardinality_types) + , allow_experimental_object_type(settings.allow_experimental_object_type) + , allow_suspicious_fixed_string_types(settings.allow_suspicious_fixed_string_types) + , allow_experimental_variant_type(settings.allow_experimental_variant_type) + , allow_suspicious_variant_types(settings.allow_suspicious_variant_types) + , validate_nested_types(settings.validate_experimental_and_suspicious_types_inside_nested_types) + , allow_experimental_dynamic_type(settings.allow_experimental_dynamic_type) +{ +} + + void validateDataType(const DataTypePtr & type_to_check, const DataTypeValidationSettings & settings) { auto validate_callback = [&](const IDataType & data_type) diff --git a/src/Interpreters/parseColumnsListForTableFunction.h b/src/Interpreters/parseColumnsListForTableFunction.h index e2d2bc97ff7..6f15c585e4f 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.h +++ b/src/Interpreters/parseColumnsListForTableFunction.h @@ -2,28 +2,19 @@ #include #include -#include namespace DB { class Context; +struct Settings; struct DataTypeValidationSettings { DataTypeValidationSettings() = default; - explicit DataTypeValidationSettings(const Settings & settings) - : allow_suspicious_low_cardinality_types(settings.allow_suspicious_low_cardinality_types) - , allow_experimental_object_type(settings.allow_experimental_object_type) - , allow_suspicious_fixed_string_types(settings.allow_suspicious_fixed_string_types) - , allow_experimental_variant_type(settings.allow_experimental_variant_type) - , allow_suspicious_variant_types(settings.allow_suspicious_variant_types) - , validate_nested_types(settings.validate_experimental_and_suspicious_types_inside_nested_types) - , allow_experimental_dynamic_type(settings.allow_experimental_dynamic_type) - { - } + explicit DataTypeValidationSettings(const Settings & settings); bool allow_suspicious_low_cardinality_types = true; bool allow_experimental_object_type = true; diff --git a/src/Interpreters/removeOnClusterClauseIfNeeded.cpp b/src/Interpreters/removeOnClusterClauseIfNeeded.cpp index 44167fe7242..a6595d330fc 100644 --- a/src/Interpreters/removeOnClusterClauseIfNeeded.cpp +++ b/src/Interpreters/removeOnClusterClauseIfNeeded.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -15,6 +16,10 @@ #include #include #include +#include +#include +#include +#include namespace DB @@ -38,6 +43,13 @@ static bool isAccessControlQuery(const ASTPtr & query) || query->as(); } +static bool isNamedCollectionQuery(const ASTPtr & query) +{ + return query->as() + || query->as() + || query->as(); +} + ASTPtr removeOnClusterClauseIfNeeded(const ASTPtr & query, ContextPtr context, const WithoutOnClusterASTRewriteParams & params) { auto * query_on_cluster = dynamic_cast(query.get()); @@ -46,11 +58,14 @@ ASTPtr removeOnClusterClauseIfNeeded(const ASTPtr & query, ContextPtr context, c return query; if ((isUserDefinedFunctionQuery(query) - && context->getSettings().ignore_on_cluster_for_replicated_udf_queries + && context->getSettingsRef().ignore_on_cluster_for_replicated_udf_queries && context->getUserDefinedSQLObjectsStorage().isReplicated()) || (isAccessControlQuery(query) - && context->getSettings().ignore_on_cluster_for_replicated_access_entities_queries - && context->getAccessControl().containsStorage(ReplicatedAccessStorage::STORAGE_TYPE))) + && context->getSettingsRef().ignore_on_cluster_for_replicated_access_entities_queries + && context->getAccessControl().containsStorage(ReplicatedAccessStorage::STORAGE_TYPE)) + || (isNamedCollectionQuery(query) + && context->getSettingsRef().ignore_on_cluster_for_replicated_named_collections_queries + && NamedCollectionFactory::instance().usesReplicatedStorage())) { LOG_DEBUG(getLogger("removeOnClusterClauseIfNeeded"), "ON CLUSTER clause was ignored for query {}", query->getID()); return query_on_cluster->getRewrittenASTWithoutOnCluster(params); diff --git a/src/Loggers/OwnSplitChannel.h b/src/Loggers/OwnSplitChannel.h index 7ca27cf6584..88bb6b9ce76 100644 --- a/src/Loggers/OwnSplitChannel.h +++ b/src/Loggers/OwnSplitChannel.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index 602ef8c232b..f39229d7566 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -408,25 +408,26 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format { const char * operators[] = { - "multiply", " * ", - "divide", " / ", - "modulo", " % ", - "plus", " + ", - "minus", " - ", - "notEquals", " != ", - "lessOrEquals", " <= ", - "greaterOrEquals", " >= ", - "less", " < ", - "greater", " > ", - "equals", " = ", - "like", " LIKE ", - "ilike", " ILIKE ", - "notLike", " NOT LIKE ", - "notILike", " NOT ILIKE ", - "in", " IN ", - "notIn", " NOT IN ", - "globalIn", " GLOBAL IN ", - "globalNotIn", " GLOBAL NOT IN ", + "multiply", " * ", + "divide", " / ", + "modulo", " % ", + "plus", " + ", + "minus", " - ", + "notEquals", " != ", + "lessOrEquals", " <= ", + "greaterOrEquals", " >= ", + "less", " < ", + "greater", " > ", + "equals", " = ", + "isNotDistinctFrom", " <=> ", + "like", " LIKE ", + "ilike", " ILIKE ", + "notLike", " NOT LIKE ", + "notILike", " NOT ILIKE ", + "in", " IN ", + "notIn", " NOT IN ", + "globalIn", " GLOBAL IN ", + "globalNotIn", " GLOBAL NOT IN ", nullptr }; diff --git a/src/Parsers/ASTTablesInSelectQuery.cpp b/src/Parsers/ASTTablesInSelectQuery.cpp index e782bad797e..d22a4eca0fc 100644 --- a/src/Parsers/ASTTablesInSelectQuery.cpp +++ b/src/Parsers/ASTTablesInSelectQuery.cpp @@ -243,7 +243,7 @@ void ASTTableJoin::formatImplAfterTable(const FormatSettings & settings, FormatS void ASTTableJoin::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { formatImplBeforeTable(settings, state, frame); - settings.ostr << " ... "; + settings.ostr << " ..."; formatImplAfterTable(settings, state, frame); } diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 5997452bcf3..d4fc9a4bc4d 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -1129,11 +1129,11 @@ inline static bool makeHexOrBinStringLiteral(IParser::Pos & pos, ASTPtr & node, if (hex) { - hexStringDecode(str_begin, str_end, res_pos); + hexStringDecode(str_begin, str_end, res_pos, word_size); } else { - binStringDecode(str_begin, str_end, res_pos); + binStringDecode(str_begin, str_end, res_pos, word_size); } return makeStringLiteral(pos, node, String(reinterpret_cast(res.data()), (res_pos - res_begin - 1))); diff --git a/src/Parsers/isUnquotedIdentifier.cpp b/src/Parsers/isUnquotedIdentifier.cpp new file mode 100644 index 00000000000..6f2442635ec --- /dev/null +++ b/src/Parsers/isUnquotedIdentifier.cpp @@ -0,0 +1,20 @@ +#include + +#include + +namespace DB +{ + +bool isUnquotedIdentifier(const String & name) +{ + Lexer lexer(name.data(), name.data() + name.size()); + + auto maybe_ident = lexer.nextToken(); + + if (maybe_ident.type != TokenType::BareWord) + return false; + + return lexer.nextToken().isEnd(); +} + +} diff --git a/src/Parsers/isUnquotedIdentifier.h b/src/Parsers/isUnquotedIdentifier.h new file mode 100644 index 00000000000..839e5860ad3 --- /dev/null +++ b/src/Parsers/isUnquotedIdentifier.h @@ -0,0 +1,10 @@ +#pragma once + +#include + +namespace DB +{ + +bool isUnquotedIdentifier(const String & name); + +} diff --git a/src/Planner/CollectSets.cpp b/src/Planner/CollectSets.cpp index 52a0d748d63..9cac4e61380 100644 --- a/src/Planner/CollectSets.cpp +++ b/src/Planner/CollectSets.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 2d42ed73223..5640d84731f 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include #include @@ -39,6 +41,7 @@ #include #include +#include #include #include @@ -370,10 +373,10 @@ Aggregator::Params getAggregatorParams(const PlannerContextPtr & planner_context const auto & query_context = planner_context->getQueryContext(); const Settings & settings = query_context->getSettingsRef(); - const auto stats_collecting_params = Aggregator::Params::StatsCollectingParams( - select_query_info.query, + const auto stats_collecting_params = StatsCollectingParams( + calculateCacheKey(select_query_info.query), settings.collect_hash_table_stats_during_aggregation, - settings.max_entries_for_hash_table_stats, + query_context->getServerSettings().max_entries_for_hash_table_stats, settings.max_size_to_preallocate_for_aggregation); auto aggregate_descriptions = aggregation_analysis_result.aggregate_descriptions; diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 7a12d5d690d..a0c7f9b2516 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -30,6 +30,8 @@ #include #include +#include + namespace DB { diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp index ceb506d1bbb..68e0ef9785e 100644 --- a/src/Planner/PlannerExpressionAnalysis.cpp +++ b/src/Planner/PlannerExpressionAnalysis.cpp @@ -23,6 +23,8 @@ #include #include +#include + namespace DB { diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index d26092d57cb..ab599331a9d 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -1,5 +1,7 @@ #include +#include + #include #include @@ -647,7 +649,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres auto table_expression_query_info = select_query_info; table_expression_query_info.table_expression = table_expression; table_expression_query_info.filter_actions_dag = table_expression_data.getFilterActions(); - table_expression_query_info.analyzer_can_use_parallel_replicas_on_follower = table_node == planner_context->getGlobalPlannerContext()->parallel_replicas_table; + table_expression_query_info.current_table_chosen_for_reading_with_parallel_replicas + = table_node == planner_context->getGlobalPlannerContext()->parallel_replicas_table; size_t max_streams = settings.max_threads; size_t max_threads_execute_query = settings.max_threads; @@ -834,7 +837,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres if (row_policy_filter_info.actions) table_expression_data.setRowLevelFilterActions(row_policy_filter_info.actions); - if (query_context->getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY) + if (query_context->canUseParallelReplicasCustomKey()) { if (settings.parallel_replicas_count > 1) { @@ -843,9 +846,14 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres add_filter(parallel_replicas_custom_key_filter_info, "Parallel replicas custom key filter"); } else if (auto * distributed = typeid_cast(storage.get()); - distributed && query_context->canUseParallelReplicasCustomKey(*distributed->getCluster())) + distributed && query_context->canUseParallelReplicasCustomKeyForCluster(*distributed->getCluster())) { planner_context->getMutableQueryContext()->setSetting("distributed_group_by_no_merge", 2); + /// We disable prefer_localhost_replica because if one of the replicas is local it will create a single local plan + /// instead of executing the query with multiple replicas + /// We can enable this setting again for custom key parallel replicas when we can generate a plan that will use both a + /// local plan and remote replicas + planner_context->getMutableQueryContext()->setSetting("prefer_localhost_replica", Field{0}); } } @@ -857,6 +865,15 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres from_stage = storage->getQueryProcessingStage( query_context, select_query_options.to_stage, storage_snapshot, table_expression_query_info); + /// It is just a safety check needed until we have a proper sending plan to replicas. + /// If we have a non-trivial storage like View it might create its own Planner inside read(), run findTableForParallelReplicas() + /// and find some other table that might be used for reading with parallel replicas. It will lead to errors. + const bool other_table_already_chosen_for_reading_with_parallel_replicas + = planner_context->getGlobalPlannerContext()->parallel_replicas_table + && !table_expression_query_info.current_table_chosen_for_reading_with_parallel_replicas; + if (other_table_already_chosen_for_reading_with_parallel_replicas) + planner_context->getMutableQueryContext()->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); + storage->read( query_plan, columns_names, @@ -879,7 +896,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres }; /// query_plan can be empty if there is nothing to read - if (query_plan.isInitialized() && parallel_replicas_enabled_for_storage(storage, settings) && query_context->canUseParallelReplicasOnInitiator()) + if (query_plan.isInitialized() && parallel_replicas_enabled_for_storage(storage, settings)) { // (1) find read step QueryPlan::Node * node = query_plan.getRootNode(); @@ -906,54 +923,78 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres } chassert(reading); - - // (2) if it's ReadFromMergeTree - run index analysis and check number of rows to read - if (settings.parallel_replicas_min_number_of_rows_per_replica > 0) + if (query_context->canUseParallelReplicasCustomKey() && query_context->getClientInfo().distributed_depth == 0) { - auto result_ptr = reading->selectRangesToRead(); - - UInt64 rows_to_read = result_ptr->selected_rows; - if (table_expression_query_info.trivial_limit > 0 && table_expression_query_info.trivial_limit < rows_to_read) - rows_to_read = table_expression_query_info.trivial_limit; - - if (max_block_size_limited && (max_block_size_limited < rows_to_read)) - rows_to_read = max_block_size_limited; - - const size_t number_of_replicas_to_use = rows_to_read / settings.parallel_replicas_min_number_of_rows_per_replica; - LOG_TRACE( - getLogger("Planner"), - "Estimated {} rows to read. It is enough work for {} parallel replicas", - rows_to_read, - number_of_replicas_to_use); - - if (number_of_replicas_to_use <= 1) + if (auto cluster = query_context->getClusterForParallelReplicas(); + query_context->canUseParallelReplicasCustomKeyForCluster(*cluster)) { - planner_context->getMutableQueryContext()->setSetting( - "allow_experimental_parallel_reading_from_replicas", Field(0)); - planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", UInt64{1}); - LOG_DEBUG(getLogger("Planner"), "Disabling parallel replicas because there aren't enough rows to read"); - } - else if (number_of_replicas_to_use < settings.max_parallel_replicas) - { - planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", number_of_replicas_to_use); - LOG_DEBUG(getLogger("Planner"), "Reducing the number of replicas to use to {}", number_of_replicas_to_use); + planner_context->getMutableQueryContext()->setSetting("prefer_localhost_replica", Field{0}); + auto modified_query_info = select_query_info; + modified_query_info.cluster = std::move(cluster); + from_stage = QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit; + QueryPlan query_plan_parallel_replicas; + ClusterProxy::executeQueryWithParallelReplicasCustomKey( + query_plan_parallel_replicas, + storage->getStorageID(), + modified_query_info, + storage->getInMemoryMetadataPtr()->getColumns(), + storage_snapshot, + from_stage, + table_expression_query_info.query_tree, + query_context); + query_plan = std::move(query_plan_parallel_replicas); } } - - // (3) if parallel replicas still enabled - replace reading step - if (planner_context->getQueryContext()->canUseParallelReplicasOnInitiator()) + else if (query_context->canUseParallelReplicasOnInitiator()) { - from_stage = QueryProcessingStage::WithMergeableState; - QueryPlan query_plan_parallel_replicas; - ClusterProxy::executeQueryWithParallelReplicas( - query_plan_parallel_replicas, - storage->getStorageID(), - from_stage, - table_expression_query_info.query_tree, - table_expression_query_info.planner_context, - query_context, - table_expression_query_info.storage_limits); - query_plan = std::move(query_plan_parallel_replicas); + // (2) if it's ReadFromMergeTree - run index analysis and check number of rows to read + if (settings.parallel_replicas_min_number_of_rows_per_replica > 0) + { + auto result_ptr = reading->selectRangesToRead(); + + UInt64 rows_to_read = result_ptr->selected_rows; + if (table_expression_query_info.trivial_limit > 0 && table_expression_query_info.trivial_limit < rows_to_read) + rows_to_read = table_expression_query_info.trivial_limit; + + if (max_block_size_limited && (max_block_size_limited < rows_to_read)) + rows_to_read = max_block_size_limited; + + const size_t number_of_replicas_to_use = rows_to_read / settings.parallel_replicas_min_number_of_rows_per_replica; + LOG_TRACE( + getLogger("Planner"), + "Estimated {} rows to read. It is enough work for {} parallel replicas", + rows_to_read, + number_of_replicas_to_use); + + if (number_of_replicas_to_use <= 1) + { + planner_context->getMutableQueryContext()->setSetting( + "allow_experimental_parallel_reading_from_replicas", Field(0)); + planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", UInt64{1}); + LOG_DEBUG(getLogger("Planner"), "Disabling parallel replicas because there aren't enough rows to read"); + } + else if (number_of_replicas_to_use < settings.max_parallel_replicas) + { + planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", number_of_replicas_to_use); + LOG_DEBUG(getLogger("Planner"), "Reducing the number of replicas to use to {}", number_of_replicas_to_use); + } + } + + // (3) if parallel replicas still enabled - replace reading step + if (planner_context->getQueryContext()->canUseParallelReplicasOnInitiator()) + { + from_stage = QueryProcessingStage::WithMergeableState; + QueryPlan query_plan_parallel_replicas; + ClusterProxy::executeQueryWithParallelReplicas( + query_plan_parallel_replicas, + storage->getStorageID(), + from_stage, + table_expression_query_info.query_tree, + table_expression_query_info.planner_context, + query_context, + table_expression_query_info.storage_limits); + query_plan = std::move(query_plan_parallel_replicas); + } } } diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index e752c57b08b..d9360a58240 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -43,6 +43,9 @@ #include #include +#include +#include + namespace DB { @@ -528,7 +531,7 @@ JoinClausesAndActions buildJoinClausesAndActions( size_t join_clause_key_nodes_size = join_clause.getLeftKeyNodes().size(); if (join_clause_key_nodes_size == 0) - throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "JOIN {} cannot get JOIN keys", + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "Cannot determine join keys in {}", join_node.formatASTForErrorMessage()); for (size_t i = 0; i < join_clause_key_nodes_size; ++i) @@ -768,10 +771,8 @@ std::shared_ptr tryDirectJoin(const std::shared_ptr(table_join, right_table_expression_header, storage, right_table_expression_header_with_storage_column_names); } - } - static std::shared_ptr tryCreateJoin(JoinAlgorithm algorithm, std::shared_ptr & table_join, const QueryTreeNodePtr & right_table_expression, @@ -803,9 +804,17 @@ static std::shared_ptr tryCreateJoin(JoinAlgorithm algorithm, algorithm == JoinAlgorithm::DEFAULT) { auto query_context = planner_context->getQueryContext(); - if (table_join->allowParallelHashJoin()) - return std::make_shared(query_context, table_join, query_context->getSettings().max_threads, right_table_expression_header); + { + const auto & settings = query_context->getSettingsRef(); + StatsCollectingParams params{ + calculateCacheKey(table_join, right_table_expression), + settings.collect_hash_table_stats_during_joins, + query_context->getServerSettings().max_entries_for_hash_table_stats, + settings.max_size_to_preallocate_for_joins}; + return std::make_shared( + query_context, table_join, query_context->getSettingsRef().max_threads, right_table_expression_header, params); + } return std::make_shared(table_join, right_table_expression_header, query_context->getSettingsRef().join_any_take_last_row); } diff --git a/src/Planner/PlannerSorting.cpp b/src/Planner/PlannerSorting.cpp index 611a26f78fa..1c6e820fed1 100644 --- a/src/Planner/PlannerSorting.cpp +++ b/src/Planner/PlannerSorting.cpp @@ -1,5 +1,7 @@ #include +#include + #include #include diff --git a/src/Planner/PlannerWindowFunctions.cpp b/src/Planner/PlannerWindowFunctions.cpp index ce74d82c08d..225852de5a7 100644 --- a/src/Planner/PlannerWindowFunctions.cpp +++ b/src/Planner/PlannerWindowFunctions.cpp @@ -4,6 +4,8 @@ #include #include +#include + #include #include diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index 18a6d297838..3c54c57a28c 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -33,6 +33,8 @@ #include #include +#include + #include #include #include diff --git a/src/Planner/findParallelReplicasQuery.cpp b/src/Planner/findParallelReplicasQuery.cpp index f2bc1f060d8..c89a70be541 100644 --- a/src/Planner/findParallelReplicasQuery.cpp +++ b/src/Planner/findParallelReplicasQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp index 2631f665f9c..5f6cf2f7230 100644 --- a/src/Processors/Chunk.cpp +++ b/src/Processors/Chunk.cpp @@ -125,7 +125,7 @@ void Chunk::addColumn(size_t position, ColumnPtr column) if (position >= columns.size()) throw Exception(ErrorCodes::POSITION_OUT_OF_BOUND, "Position {} out of bound in Chunk::addColumn(), max position = {}", - position, columns.size() - 1); + position, !columns.empty() ? columns.size() - 1 : 0); if (empty()) num_rows = column->size(); else if (column->size() != num_rows) @@ -143,7 +143,7 @@ void Chunk::erase(size_t position) if (position >= columns.size()) throw Exception(ErrorCodes::POSITION_OUT_OF_BOUND, "Position {} out of bound in Chunk::erase(), max position = {}", - toString(position), toString(columns.size() - 1)); + toString(position), toString(!columns.empty() ? columns.size() - 1 : 0)); columns.erase(columns.begin() + position); } diff --git a/src/Processors/Executors/ExecutingGraph.cpp b/src/Processors/Executors/ExecutingGraph.cpp index 27f6a454b24..6d5b60d8159 100644 --- a/src/Processors/Executors/ExecutingGraph.cpp +++ b/src/Processors/Executors/ExecutingGraph.cpp @@ -292,7 +292,7 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue } else if (last_status == IProcessor::Status::NeedData && status != IProcessor::Status::NeedData) { - processor.input_wait_elapsed_us += processor.input_wait_watch.elapsedMicroseconds(); + processor.input_wait_elapsed_ns += processor.input_wait_watch.elapsedNanoseconds(); } /// PortFull @@ -302,7 +302,7 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue } else if (last_status == IProcessor::Status::PortFull && status != IProcessor::Status::PortFull) { - processor.output_wait_elapsed_us += processor.output_wait_watch.elapsedMicroseconds(); + processor.output_wait_elapsed_ns += processor.output_wait_watch.elapsedNanoseconds(); } } } diff --git a/src/Processors/Executors/ExecutionThreadContext.cpp b/src/Processors/Executors/ExecutionThreadContext.cpp index 05669725f9a..17b6773ad83 100644 --- a/src/Processors/Executors/ExecutionThreadContext.cpp +++ b/src/Processors/Executors/ExecutionThreadContext.cpp @@ -103,10 +103,10 @@ bool ExecutionThreadContext::executeTask() if (profile_processors) { - UInt64 elapsed_microseconds = execution_time_watch->elapsedMicroseconds(); - node->processor->elapsed_us += elapsed_microseconds; + UInt64 elapsed_ns = execution_time_watch->elapsedNanoseconds(); + node->processor->elapsed_ns += elapsed_ns; if (trace_processors) - span->addAttribute("execution_time_ms", elapsed_microseconds); + span->addAttribute("execution_time_ms", elapsed_ns / 1000U); } #ifndef NDEBUG execution_time_ns += execution_time_watch->elapsed(); diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index 49ec9999521..82cad471a29 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #ifndef NDEBUG #include diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp index ac5da172210..c5336f3bcc7 100644 --- a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -55,10 +56,25 @@ std::vector BinaryFormatReader::readNames() template std::vector BinaryFormatReader::readTypes() { - auto types = readHeaderRow(); - for (const auto & type_name : types) - read_data_types.push_back(DataTypeFactory::instance().get(type_name)); - return types; + read_data_types.reserve(read_columns); + Names type_names; + if (format_settings.binary.decode_types_in_binary_format) + { + type_names.reserve(read_columns); + for (size_t i = 0; i < read_columns; ++i) + { + read_data_types.push_back(decodeDataType(*in)); + type_names.push_back(read_data_types.back()->getName()); + } + } + else + { + type_names = readHeaderRow(); + for (const auto & type_name : type_names) + read_data_types.push_back(DataTypeFactory::instance().get(type_name)); + } + + return type_names; } template diff --git a/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp b/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp index ff904f61d22..d4c2348d080 100644 --- a/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -35,9 +36,15 @@ void BinaryRowOutputFormat::writePrefix() if (with_types) { - for (size_t i = 0; i < columns; ++i) + if (format_settings.binary.encode_types_in_binary_format) { - writeStringBinary(header.safeGetByPosition(i).type->getName(), out); + for (size_t i = 0; i < columns; ++i) + encodeDataType(header.safeGetByPosition(i).type, out); + } + else + { + for (size_t i = 0; i < columns; ++i) + writeStringBinary(header.safeGetByPosition(i).type->getName(), out); } } } diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index dd7d6c6b024..b7f84748f61 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -616,7 +616,7 @@ void registerCSVSchemaReader(FormatFactory & factory) { String result = getAdditionalFormatInfoByEscapingRule(settings, FormatSettings::EscapingRule::CSV); if (!with_names) - result += fmt::format(", column_names_for_schema_inference={}, try_detect_header={}", settings.column_names_for_schema_inference, settings.csv.try_detect_header); + result += fmt::format(", column_names_for_schema_inference={}, try_detect_header={}, skip_first_lines={}", settings.column_names_for_schema_inference, settings.csv.try_detect_header, settings.csv.skip_first_lines); return result; }); } diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index 9d056b42101..06e8668cd7c 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -227,7 +228,12 @@ private: return true; } - String column_name = "_dummy_" + std::to_string(replaced_literals.size()); + /// When generating placeholder names, ensure that we use names + /// requiring quotes to be valid identifiers. This prevents the + /// tuple() function from generating named tuples. Otherwise, + /// inserting named tuples with different names into another named + /// tuple will result in only default values being inserted. + String column_name = "-dummy-" + std::to_string(replaced_literals.size()); replaced_literals.emplace_back(literal, column_name, force_nullable); setDataType(replaced_literals.back()); ast = std::make_shared(column_name); diff --git a/src/Processors/Formats/Impl/FormRowInputFormat.cpp b/src/Processors/Formats/Impl/FormRowInputFormat.cpp index d3c6f3798cc..698efb896ea 100644 --- a/src/Processors/Formats/Impl/FormRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/FormRowInputFormat.cpp @@ -1,7 +1,9 @@ -#include +#include #include "Formats/EscapingRuleUtils.h" #include +#include + namespace DB { diff --git a/src/Processors/Formats/Impl/NativeFormat.cpp b/src/Processors/Formats/Impl/NativeFormat.cpp index a7a49ab6a8c..38fac60eef6 100644 --- a/src/Processors/Formats/Impl/NativeFormat.cpp +++ b/src/Processors/Formats/Impl/NativeFormat.cpp @@ -21,9 +21,7 @@ public: buf, header_, 0, - settings.skip_unknown_fields, - settings.null_as_default, - settings.native.allow_types_conversion, + settings, settings.defaults_for_omitted_fields ? &block_missing_values : nullptr)) , header(header_) {} @@ -72,9 +70,9 @@ private: class NativeOutputFormat final : public IOutputFormat { public: - NativeOutputFormat(WriteBuffer & buf, const Block & header, UInt64 client_protocol_version = 0) + NativeOutputFormat(WriteBuffer & buf, const Block & header, const FormatSettings & settings, UInt64 client_protocol_version = 0) : IOutputFormat(header, buf) - , writer(buf, client_protocol_version, header) + , writer(buf, client_protocol_version, header, settings) { } @@ -103,14 +101,17 @@ private: class NativeSchemaReader : public ISchemaReader { public: - explicit NativeSchemaReader(ReadBuffer & in_) : ISchemaReader(in_) {} + explicit NativeSchemaReader(ReadBuffer & in_, const FormatSettings & settings_) : ISchemaReader(in_), settings(settings_) {} NamesAndTypesList readSchema() override { - auto reader = NativeReader(in, 0); + auto reader = NativeReader(in, 0, settings); auto block = reader.read(); return block.getNamesAndTypesList(); } + +private: + const FormatSettings settings; }; @@ -134,16 +135,16 @@ void registerOutputFormatNative(FormatFactory & factory) const Block & sample, const FormatSettings & settings) { - return std::make_shared(buf, sample, settings.client_protocol_version); + return std::make_shared(buf, sample, settings, settings.client_protocol_version); }); } void registerNativeSchemaReader(FormatFactory & factory) { - factory.registerSchemaReader("Native", [](ReadBuffer & buf, const FormatSettings &) + factory.registerSchemaReader("Native", [](ReadBuffer & buf, const FormatSettings & settings) { - return std::make_shared(buf); + return std::make_shared(buf, settings); }); } diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp index dcd5a531b05..c10969b02b7 100644 --- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp @@ -900,6 +900,11 @@ bool NativeORCBlockInputFormat::prepareStripeReader() orc::RowReaderOptions row_reader_options; row_reader_options.includeTypes(include_indices); + if (format_settings.orc.read_use_writer_time_zone) + { + String writer_time_zone = current_stripe_info->getWriterTimezone(); + row_reader_options.setTimezoneName(writer_time_zone); + } row_reader_options.range(current_stripe_info->getOffset(), current_stripe_info->getLength()); if (format_settings.orc.filter_push_down && sarg) { diff --git a/src/Processors/Formats/Impl/NpyRowInputFormat.cpp b/src/Processors/Formats/Impl/NpyRowInputFormat.cpp index 65e0f9dd192..773cbc9268e 100644 --- a/src/Processors/Formats/Impl/NpyRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/NpyRowInputFormat.cpp @@ -445,6 +445,9 @@ bool NpyRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & /* elements_in_current_column *= header.shape[i]; } + if (typeid_cast(current_column)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected nesting level of column '{}', expected {}", column->getName(), header.shape.size() - 1); + for (size_t i = 0; i != elements_in_current_column; ++i) readValue(current_column); diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp index 9a15789f267..18a0db7484e 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp +++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp @@ -113,7 +113,7 @@ private: { throw Exception( ErrorCodes::PARQUET_EXCEPTION, - "Unsupported logical type: {} and physical type: {} for field =={}=={}", + "Unsupported logical type: {} and physical type: {} for field `{}`{}", col_descriptor.logical_type()->ToString(), col_descriptor.physical_type(), col_descriptor.name(), msg); } }; diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h index 4c52300fbd1..698efecd4b2 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h @@ -74,7 +74,8 @@ void registerPrettyFormatWithNoEscapesAndMonoBlock(FormatFactory & factory, cons const Block & sample, const FormatSettings & format_settings) { - bool color = !no_escapes && format_settings.pretty.color.valueOr(format_settings.is_writing_to_terminal); + bool color = !no_escapes + && (format_settings.pretty.color == 1 || (format_settings.pretty.color == 2 && format_settings.is_writing_to_terminal)); return std::make_shared(buf, sample, format_settings, mono_block, color); }); if (!mono_block) diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index 6d4dcba9e60..d2e17e92924 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -440,9 +440,10 @@ void registerTSVSchemaReader(FormatFactory & factory) settings, is_raw ? FormatSettings::EscapingRule::Raw : FormatSettings::EscapingRule::Escaped); if (!with_names) result += fmt::format( - ", column_names_for_schema_inference={}, try_detect_header={}", + ", column_names_for_schema_inference={}, try_detect_header={}, skip_first_lines={}", settings.column_names_for_schema_inference, - settings.tsv.try_detect_header); + settings.tsv.try_detect_header, + settings.tsv.skip_first_lines); return result; }); } diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index 1493779ec2d..c23d717d52f 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Processors/Formats/InputFormatErrorsLogger.cpp b/src/Processors/Formats/InputFormatErrorsLogger.cpp index 814c4679cf9..b5aa936df07 100644 --- a/src/Processors/Formats/InputFormatErrorsLogger.cpp +++ b/src/Processors/Formats/InputFormatErrorsLogger.cpp @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB diff --git a/src/Processors/IProcessor.h b/src/Processors/IProcessor.h index 6f779e7a8d4..02f7b6b3d12 100644 --- a/src/Processors/IProcessor.h +++ b/src/Processors/IProcessor.h @@ -303,9 +303,9 @@ public: IQueryPlanStep * getQueryPlanStep() const { return query_plan_step; } size_t getQueryPlanStepGroup() const { return query_plan_step_group; } - uint64_t getElapsedUs() const { return elapsed_us; } - uint64_t getInputWaitElapsedUs() const { return input_wait_elapsed_us; } - uint64_t getOutputWaitElapsedUs() const { return output_wait_elapsed_us; } + uint64_t getElapsedNs() const { return elapsed_ns; } + uint64_t getInputWaitElapsedNs() const { return input_wait_elapsed_ns; } + uint64_t getOutputWaitElapsedNs() const { return output_wait_elapsed_ns; } struct ProcessorDataStats { @@ -369,21 +369,21 @@ protected: private: /// For: - /// - elapsed_us + /// - elapsed_ns friend class ExecutionThreadContext; /// For - /// - input_wait_elapsed_us - /// - output_wait_elapsed_us + /// - input_wait_elapsed_ns + /// - output_wait_elapsed_ns friend class ExecutingGraph; std::string processor_description; /// For processors_profile_log - uint64_t elapsed_us = 0; + uint64_t elapsed_ns = 0; Stopwatch input_wait_watch; - uint64_t input_wait_elapsed_us = 0; + uint64_t input_wait_elapsed_ns = 0; Stopwatch output_wait_watch; - uint64_t output_wait_elapsed_us = 0; + uint64_t output_wait_elapsed_ns = 0; size_t stream_number = NO_STREAM; diff --git a/src/Processors/QueryPlan/BufferChunksTransform.cpp b/src/Processors/QueryPlan/BufferChunksTransform.cpp new file mode 100644 index 00000000000..3601a68d36e --- /dev/null +++ b/src/Processors/QueryPlan/BufferChunksTransform.cpp @@ -0,0 +1,85 @@ +#include + +namespace DB +{ + +BufferChunksTransform::BufferChunksTransform( + const Block & header_, + size_t max_rows_to_buffer_, + size_t max_bytes_to_buffer_, + size_t limit_) + : IProcessor({header_}, {header_}) + , input(inputs.front()) + , output(outputs.front()) + , max_rows_to_buffer(max_rows_to_buffer_) + , max_bytes_to_buffer(max_bytes_to_buffer_) + , limit(limit_) +{ +} + +IProcessor::Status BufferChunksTransform::prepare() +{ + if (output.isFinished()) + { + chunks = {}; + input.close(); + return Status::Finished; + } + + if (input.isFinished() && chunks.empty()) + { + output.finish(); + return Status::Finished; + } + + if (output.canPush()) + { + input.setNeeded(); + + if (!chunks.empty()) + { + auto chunk = std::move(chunks.front()); + chunks.pop(); + + num_buffered_rows -= chunk.getNumRows(); + num_buffered_bytes -= chunk.bytes(); + + output.push(std::move(chunk)); + } + else if (input.hasData()) + { + auto chunk = pullChunk(); + output.push(std::move(chunk)); + } + } + + if (input.hasData() && (num_buffered_rows < max_rows_to_buffer || num_buffered_bytes < max_bytes_to_buffer)) + { + auto chunk = pullChunk(); + num_buffered_rows += chunk.getNumRows(); + num_buffered_bytes += chunk.bytes(); + chunks.push(std::move(chunk)); + } + + if (num_buffered_rows >= max_rows_to_buffer && num_buffered_bytes >= max_bytes_to_buffer) + { + input.setNotNeeded(); + return Status::PortFull; + } + + input.setNeeded(); + return Status::NeedData; +} + +Chunk BufferChunksTransform::pullChunk() +{ + auto chunk = input.pull(); + num_processed_rows += chunk.getNumRows(); + + if (limit && num_processed_rows >= limit) + input.close(); + + return chunk; +} + +} diff --git a/src/Processors/QueryPlan/BufferChunksTransform.h b/src/Processors/QueryPlan/BufferChunksTransform.h new file mode 100644 index 00000000000..752f9910734 --- /dev/null +++ b/src/Processors/QueryPlan/BufferChunksTransform.h @@ -0,0 +1,42 @@ +#pragma once +#include +#include + +namespace DB +{ + +/// Transform that buffers chunks from the input +/// up to the certain limit and pushes chunks to +/// the output whenever it is ready. It can be used +/// to increase parallelism of execution, for example +/// when it is adeded before MergingSortedTransform. +class BufferChunksTransform : public IProcessor +{ +public: + /// OR condition is used for the limits on rows and bytes. + BufferChunksTransform( + const Block & header_, + size_t max_rows_to_buffer_, + size_t max_bytes_to_buffer_, + size_t limit_); + + Status prepare() override; + String getName() const override { return "BufferChunks"; } + +private: + Chunk pullChunk(); + + InputPort & input; + OutputPort & output; + + size_t max_rows_to_buffer; + size_t max_bytes_to_buffer; + size_t limit; + + std::queue chunks; + size_t num_buffered_rows = 0; + size_t num_buffered_bytes = 0; + size_t num_processed_rows = 0; +}; + +} diff --git a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.h b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.h index 70ae4c6156e..27171511703 100644 --- a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.h +++ b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.h @@ -1,4 +1,6 @@ #pragma once + +#include #include #include diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp index 1f4f271fa6e..14f58585d3d 100644 --- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp +++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h index b1ab5561958..c48bdf1552a 100644 --- a/src/Processors/QueryPlan/Optimizations/Optimizations.h +++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h @@ -46,6 +46,10 @@ size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes); /// Replace chain `FilterStep -> ExpressionStep` to single FilterStep size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &); +/// Replace chain `FilterStep -> FilterStep` to single FilterStep +/// Note: this breaks short-circuit logic, so it is disabled for now. +size_t tryMergeFilters(QueryPlan::Node * parent_node, QueryPlan::Nodes &); + /// Move FilterStep down if possible. /// May split FilterStep and push down only part of it. size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes); @@ -81,11 +85,12 @@ size_t tryAggregatePartitionsIndependently(QueryPlan::Node * node, QueryPlan::No inline const auto & getOptimizations() { - static const std::array optimizations = {{ + static const std::array optimizations = {{ {tryLiftUpArrayJoin, "liftUpArrayJoin", &QueryPlanOptimizationSettings::lift_up_array_join}, {tryPushDownLimit, "pushDownLimit", &QueryPlanOptimizationSettings::push_down_limit}, {trySplitFilter, "splitFilter", &QueryPlanOptimizationSettings::split_filter}, {tryMergeExpressions, "mergeExpressions", &QueryPlanOptimizationSettings::merge_expressions}, + {tryMergeFilters, "mergeFilters", &QueryPlanOptimizationSettings::merge_filters}, {tryPushDownFilter, "pushDownFilter", &QueryPlanOptimizationSettings::filter_push_down}, {tryConvertOuterJoinToInnerJoin, "convertOuterJoinToInnerJoin", &QueryPlanOptimizationSettings::convert_outer_join_to_inner_join}, {tryExecuteFunctionsAfterSorting, "liftUpFunctions", &QueryPlanOptimizationSettings::execute_functions_after_sorting}, diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp index 2738de1ff5f..4d984133efd 100644 --- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp +++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp @@ -20,6 +20,8 @@ QueryPlanOptimizationSettings QueryPlanOptimizationSettings::fromSettings(const settings.merge_expressions = from.query_plan_enable_optimizations && from.query_plan_merge_expressions; + settings.merge_filters = from.query_plan_enable_optimizations && from.query_plan_merge_filters; + settings.filter_push_down = from.query_plan_enable_optimizations && from.query_plan_filter_push_down; settings.convert_outer_join_to_inner_join = from.query_plan_enable_optimizations && from.query_plan_convert_outer_join_to_inner_join; diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h index 85042cea4ed..539ff2eafbb 100644 --- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h +++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h @@ -31,6 +31,9 @@ struct QueryPlanOptimizationSettings /// If merge-expressions optimization is enabled. bool merge_expressions = true; + /// If merge-filters optimization is enabled. + bool merge_filters = false; + /// If filter push down optimization is enabled. bool filter_push_down = true; diff --git a/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp index 6ace1b3b5ce..118abdd701f 100644 --- a/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp +++ b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp @@ -34,7 +34,6 @@ size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &) auto * parent_expr = typeid_cast(parent.get()); auto * parent_filter = typeid_cast(parent.get()); auto * child_expr = typeid_cast(child.get()); - auto * child_filter = typeid_cast(child.get()); if (parent_expr && child_expr) { @@ -76,7 +75,23 @@ size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &) parent_node->children.swap(child_node->children); return 1; } - else if (parent_filter && child_filter) + + return 0; +} +size_t tryMergeFilters(QueryPlan::Node * parent_node, QueryPlan::Nodes &) +{ + if (parent_node->children.size() != 1) + return false; + + QueryPlan::Node * child_node = parent_node->children.front(); + + auto & parent = parent_node->step; + auto & child = child_node->step; + + auto * parent_filter = typeid_cast(parent.get()); + auto * child_filter = typeid_cast(child.get()); + + if (parent_filter && child_filter) { const auto & child_actions = child_filter->getExpression(); const auto & parent_actions = parent_filter->getExpression(); diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp index 1badd315200..3cb7f8a5d83 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -8,6 +9,7 @@ #include #include #include + namespace DB { diff --git a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp index 537555afa2a..28eb4da2e17 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp @@ -22,9 +22,10 @@ #include #include #include -#include "Storages/KeyDescription.h" +#include #include #include +#include #include @@ -919,15 +920,23 @@ void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes) { auto & union_node = node.children.front(); - std::vector infos; + bool use_buffering = false; const SortDescription * max_sort_descr = nullptr; + + std::vector infos; infos.reserve(node.children.size()); + for (auto * child : union_node->children) { infos.push_back(buildInputOrderInfo(*sorting, *child, steps_to_update)); - if (infos.back() && (!max_sort_descr || max_sort_descr->size() < infos.back()->sort_description_for_merging.size())) - max_sort_descr = &infos.back()->sort_description_for_merging; + if (infos.back()) + { + if (!max_sort_descr || max_sort_descr->size() < infos.back()->sort_description_for_merging.size()) + max_sort_descr = &infos.back()->sort_description_for_merging; + + use_buffering |= infos.back()->limit == 0; + } } if (!max_sort_descr || max_sort_descr->empty()) @@ -972,12 +981,13 @@ void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes) } } - sorting->convertToFinishSorting(*max_sort_descr); + sorting->convertToFinishSorting(*max_sort_descr, use_buffering); } else if (auto order_info = buildInputOrderInfo(*sorting, *node.children.front(), steps_to_update)) { - sorting->convertToFinishSorting(order_info->sort_description_for_merging); - /// update data stream's sorting properties + /// Use buffering only if have filter or don't have limit. + bool use_buffering = order_info->limit == 0; + sorting->convertToFinishSorting(order_info->sort_description_for_merging, use_buffering); updateStepsDataStreams(steps_to_update); } } @@ -1091,7 +1101,7 @@ size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node, bool can_read = read_from_merge_tree->requestReadingInOrder(order_info->used_prefix_of_sorting_key_size, order_info->direction, order_info->limit); if (!can_read) return 0; - sorting->convertToFinishSorting(order_info->sort_description_for_merging); + sorting->convertToFinishSorting(order_info->sort_description_for_merging, false); } return 0; diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index 70327bc95b4..9e2c2dcc96d 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -515,10 +516,11 @@ AggregateProjectionCandidates getAggregateProjectionCandidates( if (!candidates.minmax_projection) { - auto it = std::find_if(agg_projections.begin(), agg_projections.end(), [&](const auto * projection) - { - return projection->name == context->getSettings().preferred_optimize_projection_name.value; - }); + auto it = std::find_if( + agg_projections.begin(), + agg_projections.end(), + [&](const auto * projection) + { return projection->name == context->getSettingsRef().preferred_optimize_projection_name.value; }); if (it != agg_projections.end()) { diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp index 0af3869ccf1..291800c2c86 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp @@ -7,9 +7,11 @@ #include #include #include +#include #include #include #include + #include namespace DB::QueryPlanOptimizations @@ -110,10 +112,10 @@ std::optional optimizeUseNormalProjections(Stack & stack, QueryPlan::Nod return {}; ContextPtr context = reading->getContext(); - auto it = std::find_if(normal_projections.begin(), normal_projections.end(), [&](const auto * projection) - { - return projection->name == context->getSettings().preferred_optimize_projection_name.value; - }); + auto it = std::find_if( + normal_projections.begin(), + normal_projections.end(), + [&](const auto * projection) { return projection->name == context->getSettingsRef().preferred_optimize_projection_name.value; }); if (it != normal_projections.end()) { diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp index af1578d6af8..4d60a1135a6 100644 --- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp +++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include #include diff --git a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp index 2e7693b1b36..6dc0c021a14 100644 --- a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp +++ b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp @@ -30,12 +30,15 @@ public: std::shared_ptr> parallel_execution_index_, InitializerFunc initializer_func_ = {}) : ISource(storage_snapshot->getSampleBlockForColumns(column_names_)) - , column_names_and_types(storage_snapshot->getColumnsByNames( + , requested_column_names_and_types(storage_snapshot->getColumnsByNames( GetColumnsOptions(GetColumnsOptions::All).withSubcolumns().withExtendedObjects(), column_names_)) , data(data_) , parallel_execution_index(parallel_execution_index_) , initializer_func(std::move(initializer_func_)) { + auto all_column_names_and_types = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::All).withSubcolumns().withExtendedObjects()); + for (const auto & [name, type] : all_column_names_and_types) + all_names_to_types[name] = type; } String getName() const override { return "Memory"; } @@ -59,17 +62,20 @@ protected: const Block & src = (*data)[current_index]; Columns columns; - size_t num_columns = column_names_and_types.size(); + size_t num_columns = requested_column_names_and_types.size(); columns.reserve(num_columns); - auto name_and_type = column_names_and_types.begin(); + auto name_and_type = requested_column_names_and_types.begin(); for (size_t i = 0; i < num_columns; ++i) { - columns.emplace_back(tryGetColumnFromBlock(src, *name_and_type)); + if (name_and_type->isSubcolumn()) + columns.emplace_back(tryGetSubcolumnFromBlock(src, all_names_to_types[name_and_type->getNameInStorage()], *name_and_type)); + else + columns.emplace_back(tryGetColumnFromBlock(src, *name_and_type)); ++name_and_type; } - fillMissingColumns(columns, src.rows(), column_names_and_types, column_names_and_types, {}, nullptr); + fillMissingColumns(columns, src.rows(), requested_column_names_and_types, requested_column_names_and_types, {}, nullptr); assert(std::all_of(columns.begin(), columns.end(), [](const auto & column) { return column != nullptr; })); return Chunk(std::move(columns), src.rows()); @@ -88,7 +94,9 @@ private: } } - const NamesAndTypesList column_names_and_types; + const NamesAndTypesList requested_column_names_and_types; + /// Map (name -> type) for all columns from the storage header. + std::unordered_map all_names_to_types; size_t execution_index = 0; std::shared_ptr data; std::shared_ptr> parallel_execution_index; diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 433dd4beee8..6e86d14f5cc 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -40,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -383,7 +385,7 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas( pool, std::move(algorithm), prewhere_info, actions_settings, block_size_copy, reader_settings); - auto source = std::make_shared(std::move(processor)); + auto source = std::make_shared(std::move(processor), data.getLogName()); pipes.emplace_back(std::move(source)); } @@ -482,7 +484,7 @@ Pipe ReadFromMergeTree::readFromPool( pool, std::move(algorithm), prewhere_info, actions_settings, block_size_copy, reader_settings); - auto source = std::make_shared(std::move(processor)); + auto source = std::make_shared(std::move(processor), data.getLogName()); if (i == 0) source->addTotalRowsApprox(total_rows); @@ -595,7 +597,7 @@ Pipe ReadFromMergeTree::readInOrder( processor->addPartLevelToChunk(isQueryWithFinal()); - auto source = std::make_shared(std::move(processor)); + auto source = std::make_shared(std::move(processor), data.getLogName()); if (set_total_rows_approx) source->addTotalRowsApprox(total_rows); diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index 84c2515e8ca..fe724a8a198 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Processors/QueryPlan/ReadFromStreamLikeEngine.cpp b/src/Processors/QueryPlan/ReadFromStreamLikeEngine.cpp index 4a257bba922..51117ad3c9b 100644 --- a/src/Processors/QueryPlan/ReadFromStreamLikeEngine.cpp +++ b/src/Processors/QueryPlan/ReadFromStreamLikeEngine.cpp @@ -1,5 +1,6 @@ #include +#include #include #include diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index eb974259c5e..a3ae035afdd 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp index 8f40e523b42..48fad9f5fdb 100644 --- a/src/Processors/QueryPlan/SortingStep.cpp +++ b/src/Processors/QueryPlan/SortingStep.cpp @@ -1,5 +1,3 @@ -#include -#include #include #include #include @@ -8,12 +6,16 @@ #include #include #include +#include #include #include +#include #include #include +#include + namespace CurrentMetrics { extern const Metric TemporaryFilesForSort; @@ -38,6 +40,7 @@ SortingStep::Settings::Settings(const Context & context) tmp_data = context.getTempDataOnDisk(); min_free_disk_space = settings.min_free_disk_space_for_temporary_data; max_block_bytes = settings.prefer_external_sort_block_bytes; + read_in_order_use_buffering = settings.read_in_order_use_buffering; } SortingStep::Settings::Settings(size_t max_block_size_) @@ -153,10 +156,11 @@ void SortingStep::updateLimit(size_t limit_) } } -void SortingStep::convertToFinishSorting(SortDescription prefix_description_) +void SortingStep::convertToFinishSorting(SortDescription prefix_description_, bool use_buffering_) { type = Type::FinishSorting; prefix_description = std::move(prefix_description_); + use_buffering = use_buffering_; } void SortingStep::scatterByPartitionIfNeeded(QueryPipelineBuilder& pipeline) @@ -244,6 +248,14 @@ void SortingStep::mergingSorted(QueryPipelineBuilder & pipeline, const SortDescr /// If there are several streams, then we merge them into one if (pipeline.getNumStreams() > 1) { + if (use_buffering && sort_settings.read_in_order_use_buffering) + { + pipeline.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, sort_settings.max_block_size, sort_settings.max_block_bytes, limit_); + }); + } + auto transform = std::make_shared( pipeline.getHeader(), pipeline.getNumStreams(), @@ -373,9 +385,8 @@ void SortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const Build mergingSorted(pipeline, prefix_description, (need_finish_sorting ? 0 : limit)); if (need_finish_sorting) - { finishSorting(pipeline, prefix_description, result_description, limit); - } + return; } diff --git a/src/Processors/QueryPlan/SortingStep.h b/src/Processors/QueryPlan/SortingStep.h index 49dcf9f3121..b4a49394a13 100644 --- a/src/Processors/QueryPlan/SortingStep.h +++ b/src/Processors/QueryPlan/SortingStep.h @@ -28,6 +28,7 @@ public: TemporaryDataOnDiskScopePtr tmp_data = nullptr; size_t min_free_disk_space = 0; size_t max_block_bytes = 0; + size_t read_in_order_use_buffering = 0; explicit Settings(const Context & context); explicit Settings(size_t max_block_size_); @@ -80,7 +81,7 @@ public: const SortDescription & getSortDescription() const { return result_description; } - void convertToFinishSorting(SortDescription prefix_description); + void convertToFinishSorting(SortDescription prefix_description, bool use_buffering_); Type getType() const { return type; } const Settings & getSettings() const { return sort_settings; } @@ -126,6 +127,7 @@ private: UInt64 limit; bool always_read_till_end = false; + bool use_buffering = false; Settings sort_settings; diff --git a/src/Processors/QueryPlan/TotalsHavingStep.cpp b/src/Processors/QueryPlan/TotalsHavingStep.cpp index ac5e144bf4a..10384f6296a 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.cpp +++ b/src/Processors/QueryPlan/TotalsHavingStep.cpp @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Processors/Sources/MySQLSource.cpp b/src/Processors/Sources/MySQLSource.cpp index 985a82a7b17..5d533a7747e 100644 --- a/src/Processors/Sources/MySQLSource.cpp +++ b/src/Processors/Sources/MySQLSource.cpp @@ -2,7 +2,9 @@ #if USE_MYSQL #include + #include +#include #include #include #include diff --git a/src/Processors/Sources/MySQLSource.h b/src/Processors/Sources/MySQLSource.h index 4ae5af22dab..b99e48a7759 100644 --- a/src/Processors/Sources/MySQLSource.h +++ b/src/Processors/Sources/MySQLSource.h @@ -6,11 +6,12 @@ #include #include #include -#include namespace DB { +struct Settings; + struct StreamSettings { /// Check if setting is enabled, otherwise use common `max_block_size` setting. diff --git a/src/Processors/Sources/PostgreSQLSource.cpp b/src/Processors/Sources/PostgreSQLSource.cpp index 37a84d9fe96..a3d6fd691d8 100644 --- a/src/Processors/Sources/PostgreSQLSource.cpp +++ b/src/Processors/Sources/PostgreSQLSource.cpp @@ -193,7 +193,15 @@ PostgreSQLSource::~PostgreSQLSource() { if (stream) { + /** Internally libpqxx::stream_from runs PostgreSQL copy query `COPY query TO STDOUT`. + * During transaction abort we try to execute PostgreSQL `ROLLBACK` command and if + * copy query is not cancelled, we wait until it finishes. + */ tx->conn().cancel_query(); + + /** If stream is not closed, libpqxx::stream_from closes stream in destructor, but that way + * exception is added into transaction pending error and we can potentially ignore exception message. + */ stream->close(); } diff --git a/src/Processors/Sources/RecursiveCTESource.cpp b/src/Processors/Sources/RecursiveCTESource.cpp index 221198c622a..40e72e9cbb1 100644 --- a/src/Processors/Sources/RecursiveCTESource.cpp +++ b/src/Processors/Sources/RecursiveCTESource.cpp @@ -17,6 +17,8 @@ #include #include +#include + namespace DB { diff --git a/src/Processors/TTL/TTLAggregationAlgorithm.cpp b/src/Processors/TTL/TTLAggregationAlgorithm.cpp index 45e8a96412e..2d7a37d0abe 100644 --- a/src/Processors/TTL/TTLAggregationAlgorithm.cpp +++ b/src/Processors/TTL/TTLAggregationAlgorithm.cpp @@ -1,5 +1,6 @@ -#include +#include #include +#include namespace DB { diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index 65f0612d738..cdbe194cfac 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -783,7 +783,7 @@ void AggregatingTransform::initGenerate() { /// Just a reasonable constant, matches default value for the setting `preferred_block_size_bytes` static constexpr size_t oneMB = 1024 * 1024; - return std::make_shared(header, params->params.max_block_size, oneMB); + return std::make_shared(header, params->params.max_block_size, oneMB); }); } /// AggregatingTransform::expandPipeline expects single output port. diff --git a/src/Processors/Transforms/CheckConstraintsTransform.cpp b/src/Processors/Transforms/CheckConstraintsTransform.cpp index e43aa6028da..cdae8c23a3e 100644 --- a/src/Processors/Transforms/CheckConstraintsTransform.cpp +++ b/src/Processors/Transforms/CheckConstraintsTransform.cpp @@ -10,6 +10,7 @@ #include #include #include +#include namespace DB @@ -31,6 +32,7 @@ CheckConstraintsTransform::CheckConstraintsTransform( , table_id(table_id_) , constraints_to_check(constraints_.filterConstraints(ConstraintsDescription::ConstraintType::CHECK)) , expressions(constraints_.getExpressions(context_, header.getNamesAndTypesList())) + , context(std::move(context_)) { } @@ -39,6 +41,10 @@ void CheckConstraintsTransform::onConsume(Chunk chunk) { if (chunk.getNumRows() > 0) { + if (rows_written == 0) + for (const auto & expression : expressions) + VirtualColumnUtils::buildSetsForDAG(expression->getActionsDAG(), context); + Block block_to_calculate = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); for (size_t i = 0; i < expressions.size(); ++i) { diff --git a/src/Processors/Transforms/CheckConstraintsTransform.h b/src/Processors/Transforms/CheckConstraintsTransform.h index 09833ff396b..f92d0ab855e 100644 --- a/src/Processors/Transforms/CheckConstraintsTransform.h +++ b/src/Processors/Transforms/CheckConstraintsTransform.h @@ -35,6 +35,7 @@ private: StorageID table_id; const ASTs constraints_to_check; const ConstraintsExpressions expressions; + ContextPtr context; size_t rows_written = 0; Chunk cur_chunk; }; diff --git a/src/Processors/Transforms/DistinctSortedChunkTransform.h b/src/Processors/Transforms/DistinctSortedChunkTransform.h index 188e3d5c4c7..cdb20410a5b 100644 --- a/src/Processors/Transforms/DistinctSortedChunkTransform.h +++ b/src/Processors/Transforms/DistinctSortedChunkTransform.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Processors/Transforms/DistinctSortedTransform.h b/src/Processors/Transforms/DistinctSortedTransform.h index 86c5c968e4b..52819f1afd7 100644 --- a/src/Processors/Transforms/DistinctSortedTransform.h +++ b/src/Processors/Transforms/DistinctSortedTransform.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp index 34b733cde5e..b5a40c75c5b 100644 --- a/src/Processors/Transforms/SquashingTransform.cpp +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -7,6 +7,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; +extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; } SquashingTransform::SquashingTransform( @@ -56,53 +57,170 @@ void SquashingTransform::work() } } -SimpleSquashingTransform::SimpleSquashingTransform( +SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) - : ISimpleTransform(header, header, false) - , squashing(header, min_block_size_rows, min_block_size_bytes) + : IInflatingTransform(header, header), squashing(min_block_size_rows, min_block_size_bytes) { } -void SimpleSquashingTransform::transform(Chunk & chunk) +void SimpleSquashingChunksTransform::consume(Chunk chunk) { - if (!finished) - { - Chunk planned_chunk = squashing.add(std::move(chunk)); - if (planned_chunk.hasChunkInfo()) - chunk = DB::Squashing::squash(std::move(planned_chunk)); - } - else - { - if (chunk.hasRows()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk expected to be empty, otherwise it will be lost"); - - chunk = squashing.flush(); - if (chunk.hasChunkInfo()) - chunk = DB::Squashing::squash(std::move(chunk)); - } + Block current_block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); + squashed_chunk.setColumns(current_block.getColumns(), current_block.rows()); } -IProcessor::Status SimpleSquashingTransform::prepare() +Chunk SimpleSquashingChunksTransform::generate() { - if (!finished && input.isFinished()) + if (squashed_chunk.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform"); + + return std::move(squashed_chunk); +} + +bool SimpleSquashingChunksTransform::canGenerate() +{ + return !squashed_chunk.empty(); +} + +Chunk SimpleSquashingChunksTransform::getRemaining() +{ + Block current_block = squashing.add({}); + squashed_chunk.setColumns(current_block.getColumns(), current_block.rows()); + return std::move(squashed_chunk); +} + +SquashingLegacy::SquashingLegacy(size_t min_block_size_rows_, size_t min_block_size_bytes_) + : min_block_size_rows(min_block_size_rows_) + , min_block_size_bytes(min_block_size_bytes_) +{ +} + +Block SquashingLegacy::add(Block && input_block) +{ + return addImpl(std::move(input_block)); +} + +Block SquashingLegacy::add(const Block & input_block) +{ + return addImpl(input_block); +} + +/* + * To minimize copying, accept two types of argument: const reference for output + * stream, and rvalue reference for input stream, and decide whether to copy + * inside this function. This allows us not to copy Block unless we absolutely + * have to. + */ +template +Block SquashingLegacy::addImpl(ReferenceType input_block) +{ + /// End of input stream. + if (!input_block) { - if (output.isFinished()) - return Status::Finished; + Block to_return; + std::swap(to_return, accumulated_block); + return to_return; + } - if (!output.canPush()) - return Status::PortFull; - - if (has_output) + /// Just read block is already enough. + if (isEnoughSize(input_block)) + { + /// If no accumulated data, return just read block. + if (!accumulated_block) { - output.pushData(std::move(output_data)); - has_output = false; - return Status::PortFull; + return std::move(input_block); } - finished = true; - /// On the next call to transform() we will return all data buffered in `squashing` (if any) - return Status::Ready; + /// Return accumulated data (maybe it has small size) and place new block to accumulated data. + Block to_return = std::move(input_block); + std::swap(to_return, accumulated_block); + return to_return; } - return ISimpleTransform::prepare(); + + /// Accumulated block is already enough. + if (isEnoughSize(accumulated_block)) + { + /// Return accumulated data and place new block to accumulated data. + Block to_return = std::move(input_block); + std::swap(to_return, accumulated_block); + return to_return; + } + + append(std::move(input_block)); + if (isEnoughSize(accumulated_block)) + { + Block to_return; + std::swap(to_return, accumulated_block); + return to_return; + } + + /// Squashed block is not ready. + return {}; } + + +template +void SquashingLegacy::append(ReferenceType input_block) +{ + if (!accumulated_block) + { + accumulated_block = std::move(input_block); + return; + } + + assert(blocksHaveEqualStructure(input_block, accumulated_block)); + + try + { + for (size_t i = 0, size = accumulated_block.columns(); i < size; ++i) + { + const auto source_column = input_block.getByPosition(i).column; + + auto mutable_column = IColumn::mutate(std::move(accumulated_block.getByPosition(i).column)); + mutable_column->insertRangeFrom(*source_column, 0, source_column->size()); + accumulated_block.getByPosition(i).column = std::move(mutable_column); + } + } + catch (...) + { + /// add() may be called again even after a previous add() threw an exception. + /// Keep accumulated_block in a valid state. + /// Seems ok to discard accumulated data because we're throwing an exception, which the caller will + /// hopefully interpret to mean "this block and all *previous* blocks are potentially lost". + accumulated_block.clear(); + throw; + } +} + + +bool SquashingLegacy::isEnoughSize(const Block & block) +{ + size_t rows = 0; + size_t bytes = 0; + + for (const auto & [column, type, name] : block) + { + if (!column) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid column in block."); + + if (!rows) + rows = column->size(); + else if (rows != column->size()) + throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Sizes of columns doesn't match"); + + bytes += column->byteSize(); + } + + return isEnoughSize(rows, bytes); +} + + +bool SquashingLegacy::isEnoughSize(size_t rows, size_t bytes) const +{ + return (!min_block_size_rows && !min_block_size_bytes) + || (min_block_size_rows && rows >= min_block_size_rows) + || (min_block_size_bytes && bytes >= min_block_size_bytes); +} + + } diff --git a/src/Processors/Transforms/SquashingTransform.h b/src/Processors/Transforms/SquashingTransform.h index c5b727ac6ec..452317e7d5e 100644 --- a/src/Processors/Transforms/SquashingTransform.h +++ b/src/Processors/Transforms/SquashingTransform.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -29,22 +30,51 @@ private: Chunk finish_chunk; }; -/// Doesn't care about propagating exceptions and thus doesn't throw LOGICAL_ERROR if the following transform closes its input port. -class SimpleSquashingTransform : public ISimpleTransform + +class SquashingLegacy { public: - explicit SimpleSquashingTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); + /// Conditions on rows and bytes are OR-ed. If one of them is zero, then corresponding condition is ignored. + SquashingLegacy(size_t min_block_size_rows_, size_t min_block_size_bytes_); + + /** Add next block and possibly returns squashed block. + * At end, you need to pass empty block. As the result for last (empty) block, you will get last Result with ready = true. + */ + Block add(Block && block); + Block add(const Block & block); + +private: + size_t min_block_size_rows; + size_t min_block_size_bytes; + + Block accumulated_block; + + template + Block addImpl(ReferenceType block); + + template + void append(ReferenceType block); + + bool isEnoughSize(const Block & block); + bool isEnoughSize(size_t rows, size_t bytes) const; +}; + +class SimpleSquashingChunksTransform : public IInflatingTransform +{ +public: + explicit SimpleSquashingChunksTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); String getName() const override { return "SimpleSquashingTransform"; } protected: - void transform(Chunk &) override; - - IProcessor::Status prepare() override; + void consume(Chunk chunk) override; + bool canGenerate() override; + Chunk generate() override; + Chunk getRemaining() override; private: - Squashing squashing; - - bool finished = false; + SquashingLegacy squashing; + Chunk squashed_chunk; }; + } diff --git a/src/Processors/Transforms/TotalsHavingTransform.cpp b/src/Processors/Transforms/TotalsHavingTransform.cpp index aa86879e62c..3f278ca884f 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.cpp +++ b/src/Processors/Transforms/TotalsHavingTransform.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index b9f61d30182..a694fa43e46 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -17,6 +17,9 @@ #include #include +#include +#include + #include @@ -71,6 +74,9 @@ public: size_t function_index) const = 0; virtual std::optional getDefaultFrame() const { return {}; } + + /// Is the frame type supported by this function. + virtual bool checkWindowFrameType(const WindowTransform * /*transform*/) const { return true; } }; // Compares ORDER BY column values at given rows to find the boundaries of frame: @@ -402,6 +408,19 @@ WindowTransform::WindowTransform(const Block & input_header_, } } } + + for (const auto & workspace : workspaces) + { + if (workspace.window_function_impl) + { + if (!workspace.window_function_impl->checkWindowFrameType(this)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported window frame type for function '{}'", + workspace.aggregate_function->getName()); + } + } + + } } WindowTransform::~WindowTransform() @@ -1609,6 +1628,34 @@ struct WindowFunctionHelpers { recurrent_detail::setValueToOutputColumn(transform, function_index, value); } + + ALWAYS_INLINE static bool checkPartitionEnterFirstRow(const WindowTransform * transform) { return transform->current_row_number == 1; } + + ALWAYS_INLINE static bool checkPartitionEnterLastRow(const WindowTransform * transform) + { + /// This is for fast check. + if (!transform->partition_ended) + return false; + + auto current_row = transform->current_row; + /// checkPartitionEnterLastRow is called on each row, also move on current_row.row here. + current_row.row++; + const auto & partition_end_row = transform->partition_end; + + /// The partition end is reached, when following is true + /// - current row is the partition end row, + /// - or current row is the last row of all input. + if (current_row != partition_end_row) + { + /// when current row is not the partition end row, we need to check whether it's the last + /// input row. + if (current_row.row < transform->blockRowsNumber(current_row)) + return false; + if (partition_end_row.block != current_row.block + 1 || partition_end_row.row) + return false; + } + return true; + } }; template @@ -2058,8 +2105,6 @@ namespace const WindowTransform * transform, size_t function_index, const DataTypes & argument_types); - - static void checkWindowFrameType(const WindowTransform * transform); }; } @@ -2080,6 +2125,29 @@ struct WindowFunctionNtile final : public StatefulWindowFunction bool allocatesMemoryInArena() const override { return false; } + bool checkWindowFrameType(const WindowTransform * transform) const override + { + if (transform->order_by_indices.empty()) + { + LOG_ERROR(getLogger("WindowFunctionNtile"), "Window frame for 'ntile' function must have ORDER BY clause"); + return false; + } + + // We must wait all for the partition end and get the total rows number in this + // partition. So before the end of this partition, there is no any block could be + // dropped out. + bool is_frame_supported = transform->window_description.frame.begin_type == WindowFrame::BoundaryType::Unbounded + && transform->window_description.frame.end_type == WindowFrame::BoundaryType::Unbounded; + if (!is_frame_supported) + { + LOG_ERROR( + getLogger("WindowFunctionNtile"), + "Window frame for function 'ntile' should be 'ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING'"); + return false; + } + return true; + } + std::optional getDefaultFrame() const override { WindowFrame frame; @@ -2106,7 +2174,6 @@ namespace { if (!buckets) [[unlikely]] { - checkWindowFrameType(transform); const auto & current_block = transform->blockAt(transform->current_row); const auto & workspace = transform->workspaces[function_index]; const auto & arg_col = *current_block.original_input_columns[workspace.argument_column_indices[0]]; @@ -2128,7 +2195,7 @@ namespace } } // new partition - if (transform->current_row_number == 1) [[unlikely]] + if (WindowFunctionHelpers::checkPartitionEnterFirstRow(transform)) [[unlikely]] { current_partition_rows = 0; current_partition_inserted_row = 0; @@ -2137,25 +2204,9 @@ namespace current_partition_rows++; // Only do the action when we meet the last row in this partition. - if (!transform->partition_ended) + if (!WindowFunctionHelpers::checkPartitionEnterLastRow(transform)) return; - else - { - auto current_row = transform->current_row; - current_row.row++; - const auto & end_row = transform->partition_end; - if (current_row != end_row) - { - if (current_row.row < transform->blockRowsNumber(current_row)) - return; - if (end_row.block != current_row.block + 1 || end_row.row) - { - return; - } - // else, current_row is the last input row. - } - } auto bucket_capacity = current_partition_rows / buckets; auto capacity_diff = current_partition_rows - bucket_capacity * buckets; @@ -2193,23 +2244,115 @@ namespace bucket_num += 1; } } +} - void NtileState::checkWindowFrameType(const WindowTransform * transform) +namespace +{ +struct PercentRankState +{ + RowNumber start_row; + UInt64 current_partition_rows = 0; +}; +} + +struct WindowFunctionPercentRank final : public StatefulWindowFunction +{ +public: + WindowFunctionPercentRank(const std::string & name_, + const DataTypes & argument_types_, const Array & parameters_) + : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared()) + {} + + bool allocatesMemoryInArena() const override { return false; } + + bool checkWindowFrameType(const WindowTransform * transform) const override { - if (transform->order_by_indices.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window frame for 'ntile' function must have ORDER BY clause"); + if (transform->window_description.frame.type != WindowFrame::FrameType::RANGE + || transform->window_description.frame.begin_type != WindowFrame::BoundaryType::Unbounded + || transform->window_description.frame.end_type != WindowFrame::BoundaryType::Current) + { + LOG_ERROR( + getLogger("WindowFunctionPercentRank"), + "Window frame for function 'percent_rank' should be 'RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT'"); + return false; + } + return true; + } - // We must wait all for the partition end and get the total rows number in this - // partition. So before the end of this partition, there is no any block could be - // dropped out. - bool is_frame_supported = transform->window_description.frame.begin_type == WindowFrame::BoundaryType::Unbounded - && transform->window_description.frame.end_type == WindowFrame::BoundaryType::Unbounded; - if (!is_frame_supported) + std::optional getDefaultFrame() const override + { + WindowFrame frame; + frame.type = WindowFrame::FrameType::RANGE; + frame.begin_type = WindowFrame::BoundaryType::Unbounded; + frame.end_type = WindowFrame::BoundaryType::Current; + return frame; + } + + void windowInsertResultInto(const WindowTransform * transform, size_t function_index) const override + { + auto & state = getWorkspaceState(transform, function_index); + if (WindowFunctionHelpers::checkPartitionEnterFirstRow(transform)) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window frame for function 'ntile' should be 'ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING'"); + state.current_partition_rows = 0; + state.start_row = transform->current_row; + } + + insertRankIntoColumn(transform, function_index); + state.current_partition_rows++; + + if (!WindowFunctionHelpers::checkPartitionEnterLastRow(transform)) + { + return; + } + + UInt64 remaining_rows = state.current_partition_rows; + Float64 percent_rank_denominator = remaining_rows == 1 ? 1 : remaining_rows - 1; + + while (remaining_rows > 0) + { + auto block_rows_number = transform->blockRowsNumber(state.start_row); + auto available_block_rows = block_rows_number - state.start_row.row; + if (available_block_rows <= remaining_rows) + { + /// This partition involves multiple blocks. Finish current block and move on to the + /// next block. + auto & to_column = *transform->blockAt(state.start_row).output_columns[function_index]; + auto & data = assert_cast(to_column).getData(); + for (size_t i = state.start_row.row; i < block_rows_number; ++i) + data[i] = (data[i] - 1) / percent_rank_denominator; + + state.start_row.block++; + state.start_row.row = 0; + remaining_rows -= available_block_rows; + } + else + { + /// The partition ends in current block.s + auto & to_column = *transform->blockAt(state.start_row).output_columns[function_index]; + auto & data = assert_cast(to_column).getData(); + for (size_t i = state.start_row.row, n = state.start_row.row + remaining_rows; i < n; ++i) + { + data[i] = (data[i] - 1) / percent_rank_denominator; + } + state.start_row.row += remaining_rows; + remaining_rows = 0; + } } } -} + + + inline PercentRankState & getWorkspaceState(const WindowTransform * transform, size_t function_index) const + { + const auto & workspace = transform->workspaces[function_index]; + return getState(workspace); + } + + inline void insertRankIntoColumn(const WindowTransform * transform, size_t function_index) const + { + auto & to_column = *transform->blockAt(transform->current_row).output_columns[function_index]; + assert_cast(to_column).getData().push_back(static_cast(transform->peer_group_start_row_number)); + } +}; // ClickHouse-specific variant of lag/lead that respects the window frame. template @@ -2582,6 +2725,13 @@ void registerWindowFunctions(AggregateFunctionFactory & factory) parameters); }, properties}, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("percent_rank", {[](const std::string & name, + const DataTypes & argument_types, const Array & parameters, const Settings *) + { + return std::make_shared(name, argument_types, + parameters); + }, properties}, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("row_number", {[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *) { diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 25fbf13b0e7..c616c882ed9 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include diff --git a/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp b/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp index a2a42f27c3f..c33e87815b1 100644 --- a/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp +++ b/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp @@ -14,7 +14,8 @@ #include #include #include "IO/CompressionMethod.h" -#include "Parsers/ASTLiteral.h" +#include +#include namespace DB diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index 1686a101bde..b08f2002f64 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -104,8 +105,16 @@ RemoteQueryExecutor::RemoteQueryExecutor( connection_entries.emplace_back(std::move(result.entry)); } + else + { + chassert(!fail_message.empty()); + if (result.entry.isNull()) + LOG_DEBUG(log, "Failed to connect to replica {}. {}", pool->getAddress(), fail_message); + else + LOG_DEBUG(log, "Replica is not usable for remote query execution: {}. {}", pool->getAddress(), fail_message); + } - auto res = std::make_unique(std::move(connection_entries), current_settings, throttler); + auto res = std::make_unique(std::move(connection_entries), context, throttler); if (extension_ && extension_->replica_info) res->setReplicaInfo(*extension_->replica_info); @@ -127,7 +136,7 @@ RemoteQueryExecutor::RemoteQueryExecutor( { create_connections = [this, &connection, throttler, extension_](AsyncCallback) { - auto res = std::make_unique(connection, context->getSettingsRef(), throttler); + auto res = std::make_unique(connection, context, throttler); if (extension_ && extension_->replica_info) res->setReplicaInfo(*extension_->replica_info); return res; @@ -148,7 +157,7 @@ RemoteQueryExecutor::RemoteQueryExecutor( { create_connections = [this, connection_ptr, throttler, extension_](AsyncCallback) { - auto res = std::make_unique(connection_ptr, context->getSettingsRef(), throttler); + auto res = std::make_unique(connection_ptr, context, throttler); if (extension_ && extension_->replica_info) res->setReplicaInfo(*extension_->replica_info); return res; @@ -169,7 +178,7 @@ RemoteQueryExecutor::RemoteQueryExecutor( { create_connections = [this, connections_, throttler, extension_](AsyncCallback) mutable { - auto res = std::make_unique(std::move(connections_), context->getSettingsRef(), throttler); + auto res = std::make_unique(std::move(connections_), context, throttler); if (extension_ && extension_->replica_info) res->setReplicaInfo(*extension_->replica_info); return res; @@ -234,7 +243,7 @@ RemoteQueryExecutor::RemoteQueryExecutor( timeouts, current_settings, pool_mode, std::move(async_callback), skip_unavailable_endpoints, priority_func); } - auto res = std::make_unique(std::move(connection_entries), current_settings, throttler); + auto res = std::make_unique(std::move(connection_entries), context, throttler); if (extension && extension->replica_info) res->setReplicaInfo(*extension->replica_info); return res; @@ -899,4 +908,10 @@ void RemoteQueryExecutor::setProfileInfoCallback(ProfileInfoCallback callback) std::lock_guard guard(was_cancelled_mutex); profile_info_callback = std::move(callback); } + +bool RemoteQueryExecutor::needToSkipUnavailableShard() const +{ + return context->getSettingsRef().skip_unavailable_shards && (0 == connections->size()); +} + } diff --git a/src/QueryPipeline/RemoteQueryExecutor.h b/src/QueryPipeline/RemoteQueryExecutor.h index 6b1539bd08e..04a59cc3b7e 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.h +++ b/src/QueryPipeline/RemoteQueryExecutor.h @@ -218,7 +218,7 @@ public: IConnections & getConnections() { return *connections; } - bool needToSkipUnavailableShard() const { return context->getSettingsRef().skip_unavailable_shards && (0 == connections->size()); } + bool needToSkipUnavailableShard() const; bool isReplicaUnavailable() const { return extension && extension->parallel_reading_coordinator && connections->size() == 0; } diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index 10b59751b22..d8a4d7f0e1f 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -1082,8 +1083,8 @@ namespace read_buffer = wrapReadBufferWithCompressionMethod(std::move(read_buffer), input_compression_method); assert(!pipeline); - auto source = query_context->getInputFormat( - input_format, *read_buffer, header, query_context->getSettings().max_insert_block_size); + auto source + = query_context->getInputFormat(input_format, *read_buffer, header, query_context->getSettingsRef().max_insert_block_size); pipeline = std::make_unique(std::move(source)); pipeline_executor = std::make_unique(*pipeline); @@ -1151,8 +1152,7 @@ namespace external_table_context->applySettingsChanges(settings_changes); } auto in = external_table_context->getInputFormat( - format, *buf, metadata_snapshot->getSampleBlock(), - external_table_context->getSettings().max_insert_block_size); + format, *buf, metadata_snapshot->getSampleBlock(), external_table_context->getSettingsRef().max_insert_block_size); QueryPipelineBuilder cur_pipeline; cur_pipeline.init(Pipe(std::move(in))); @@ -1735,10 +1735,19 @@ namespace class GRPCServer::Runner { public: - explicit Runner(GRPCServer & owner_) : owner(owner_) {} + explicit Runner(GRPCServer & owner_) : owner(owner_), log(owner.log) {} ~Runner() { + try + { + stop(); + } + catch (...) + { + tryLogCurrentException(log, "~Runner"); + } + if (queue_thread.joinable()) queue_thread.join(); } @@ -1756,13 +1765,27 @@ public: } catch (...) { - tryLogCurrentException("GRPCServer"); + tryLogCurrentException(log, "run"); } }; queue_thread = ThreadFromGlobalPool{runner_function}; } - void stop() { stopReceivingNewCalls(); } + void stop() + { + std::lock_guard lock{mutex}; + should_stop = true; + + if (current_calls.empty()) + { + /// If there are no current calls then we call shutdownQueue() to signal the queue to stop waiting for next events. + /// The following line will make CompletionQueue::Next() stop waiting if the queue is empty and return false instead. + shutdownQueue(); + + /// If there are some current calls then we can't call shutdownQueue() right now because we want to let the current calls finish. + /// In this case function shutdownQueue() will be called later in run(). + } + } size_t getNumCurrentCalls() const { @@ -1789,12 +1812,6 @@ private: [this, call_type](bool ok) { onNewCall(call_type, ok); }); } - void stopReceivingNewCalls() - { - std::lock_guard lock{mutex}; - should_stop = true; - } - void onNewCall(CallType call_type, bool responder_started_ok) { std::lock_guard lock{mutex}; @@ -1827,38 +1844,47 @@ private: void run() { setThreadName("GRPCServerQueue"); - while (true) + + bool ok = false; + void * tag = nullptr; + + while (owner.queue->Next(&tag, &ok)) { - { - std::lock_guard lock{mutex}; - finished_calls.clear(); /// Destroy finished calls. - - /// If (should_stop == true) we continue processing until there is no active calls. - if (should_stop && current_calls.empty()) - { - bool all_responders_gone = std::all_of( - responders_for_new_calls.begin(), responders_for_new_calls.end(), - [](std::unique_ptr & responder) { return !responder; }); - if (all_responders_gone) - break; - } - } - - bool ok = false; - void * tag = nullptr; - if (!owner.queue->Next(&tag, &ok)) - { - /// Queue shutted down. - break; - } - auto & callback = *static_cast(tag); callback(ok); + + std::lock_guard lock{mutex}; + finished_calls.clear(); /// Destroy finished calls. + + /// If (should_stop == true) we continue processing while there are current calls. + if (should_stop && current_calls.empty()) + shutdownQueue(); } + + /// CompletionQueue::Next() returns false if the queue is fully drained and shut down. + } + + /// Shutdown the queue if that isn't done yet. + void shutdownQueue() + { + chassert(should_stop); + if (queue_is_shut_down) + return; + + queue_is_shut_down = true; + + /// Server should be shut down before CompletionQueue. + if (owner.grpc_server) + owner.grpc_server->Shutdown(); + + if (owner.queue) + owner.queue->Shutdown(); } GRPCServer & owner; + LoggerRawPtr log; ThreadFromGlobalPool queue_thread; + bool queue_is_shut_down = false; std::vector> responders_for_new_calls; std::map> current_calls; std::vector> finished_calls; @@ -1876,16 +1902,6 @@ GRPCServer::GRPCServer(IServer & iserver_, const Poco::Net::SocketAddress & addr GRPCServer::~GRPCServer() { - /// Server should be shutdown before CompletionQueue. - if (grpc_server) - grpc_server->Shutdown(); - - /// Completion Queue should be shutdown before destroying the runner, - /// because the runner is now probably executing CompletionQueue::Next() on queue_thread - /// which is blocked until an event is available or the queue is shutting down. - if (queue) - queue->Shutdown(); - runner.reset(); } diff --git a/src/Server/HTTP/authenticateUserByHTTP.cpp b/src/Server/HTTP/authenticateUserByHTTP.cpp new file mode 100644 index 00000000000..ac43bfd64c0 --- /dev/null +++ b/src/Server/HTTP/authenticateUserByHTTP.cpp @@ -0,0 +1,265 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#if USE_SSL +#include +#endif + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int AUTHENTICATION_FAILED; + extern const int BAD_ARGUMENTS; + extern const int SUPPORT_IS_DISABLED; +} + + +namespace +{ + /// Throws an exception that multiple authorization schemes are used simultaneously. + [[noreturn]] void throwMultipleAuthenticationMethods(std::string_view method1, std::string_view method2) + { + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "Invalid authentication: it is not allowed to use {} and {} simultaneously", method1, method2); + } + + /// Checks that a specified user name is not empty, and throws an exception if it's empty. + void checkUserNameNotEmpty(const String & user_name, std::string_view method) + { + if (user_name.empty()) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Got an empty user name from {}", method); + } +} + + +bool authenticateUserByHTTP( + const HTTPServerRequest & request, + const HTMLForm & params, + HTTPServerResponse & response, + Session & session, + std::unique_ptr & request_credentials, + ContextPtr global_context, + LoggerPtr log) +{ + /// Get the credentials created by the previous call of authenticateUserByHTTP() while handling the previous HTTP request. + auto current_credentials = std::move(request_credentials); + + /// The user and password can be passed by headers (similar to X-Auth-*), + /// which is used by load balancers to pass authentication information. + std::string user = request.get("X-ClickHouse-User", ""); + std::string password = request.get("X-ClickHouse-Key", ""); + std::string quota_key = request.get("X-ClickHouse-Quota", ""); + bool has_auth_headers = !user.empty() || !password.empty(); + + /// The header 'X-ClickHouse-SSL-Certificate-Auth: on' enables checking the common name + /// extracted from the SSL certificate used for this connection instead of checking password. + bool has_ssl_certificate_auth = (request.get("X-ClickHouse-SSL-Certificate-Auth", "") == "on"); + + /// User name and password can be passed using HTTP Basic auth or query parameters + /// (both methods are insecure). + bool has_http_credentials = request.hasCredentials(); + bool has_credentials_in_query_params = params.has("user") || params.has("password"); + + std::string spnego_challenge; + SSLCertificateSubjects certificate_subjects; + + if (has_ssl_certificate_auth) + { +#if USE_SSL + /// For SSL certificate authentication we extract the user name from the "X-ClickHouse-User" HTTP header. + checkUserNameNotEmpty(user, "X-ClickHouse HTTP headers"); + + /// It is prohibited to mix different authorization schemes. + if (!password.empty()) + throwMultipleAuthenticationMethods("SSL certificate authentication", "authentication via password"); + if (has_http_credentials) + throwMultipleAuthenticationMethods("SSL certificate authentication", "Authorization HTTP header"); + if (has_credentials_in_query_params) + throwMultipleAuthenticationMethods("SSL certificate authentication", "authentication via parameters"); + + if (request.havePeerCertificate()) + certificate_subjects = extractSSLCertificateSubjects(request.peerCertificate()); + + if (certificate_subjects.empty()) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "Invalid authentication: SSL certificate authentication requires nonempty certificate's Common Name or Subject Alternative Name"); +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "SSL certificate authentication disabled because ClickHouse was built without SSL library"); +#endif + } + else if (has_auth_headers) + { + checkUserNameNotEmpty(user, "X-ClickHouse HTTP headers"); + + /// It is prohibited to mix different authorization schemes. + if (has_http_credentials) + throwMultipleAuthenticationMethods("X-ClickHouse HTTP headers", "Authorization HTTP header"); + if (has_credentials_in_query_params) + throwMultipleAuthenticationMethods("X-ClickHouse HTTP headers", "authentication via parameters"); + } + else if (has_http_credentials) + { + /// It is prohibited to mix different authorization schemes. + if (has_credentials_in_query_params) + throwMultipleAuthenticationMethods("Authorization HTTP header", "authentication via parameters"); + + std::string scheme; + std::string auth_info; + request.getCredentials(scheme, auth_info); + + if (Poco::icompare(scheme, "Basic") == 0) + { + Poco::Net::HTTPBasicCredentials credentials(auth_info); + user = credentials.getUsername(); + password = credentials.getPassword(); + checkUserNameNotEmpty(user, "Authorization HTTP header"); + } + else if (Poco::icompare(scheme, "Negotiate") == 0) + { + spnego_challenge = auth_info; + + if (spnego_challenge.empty()) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: SPNEGO challenge is empty"); + } + else + { + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: '{}' HTTP Authorization scheme is not supported", scheme); + } + } + else + { + /// If the user name is not set we assume it's the 'default' user. + user = params.get("user", "default"); + password = params.get("password", ""); + checkUserNameNotEmpty(user, "authentication via parameters"); + } + + if (!certificate_subjects.empty()) + { + chassert(!user.empty()); + if (!current_credentials) + current_credentials = std::make_unique(user, std::move(certificate_subjects)); + + auto * certificate_credentials = dynamic_cast(current_credentials.get()); + if (!certificate_credentials) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: expected SSL certificate authorization scheme"); + } + else if (!spnego_challenge.empty()) + { + if (!current_credentials) + current_credentials = global_context->makeGSSAcceptorContext(); + + auto * gss_acceptor_context = dynamic_cast(current_credentials.get()); + if (!gss_acceptor_context) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: unexpected 'Negotiate' HTTP Authorization scheme expected"); + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunreachable-code" + const auto spnego_response = base64Encode(gss_acceptor_context->processToken(base64Decode(spnego_challenge), log)); +#pragma clang diagnostic pop + + if (!spnego_response.empty()) + response.set("WWW-Authenticate", "Negotiate " + spnego_response); + + if (!gss_acceptor_context->isFailed() && !gss_acceptor_context->isReady()) + { + if (spnego_response.empty()) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: 'Negotiate' HTTP Authorization failure"); + + response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED); + response.send(); + /// Keep the credentials for next HTTP request. A client can handle HTTP_UNAUTHORIZED and send us more credentials with the next HTTP request. + request_credentials = std::move(current_credentials); + return false; + } + } + else // I.e., now using user name and password strings ("Basic"). + { + if (!current_credentials) + current_credentials = std::make_unique(); + + auto * basic_credentials = dynamic_cast(current_credentials.get()); + if (!basic_credentials) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: expected 'Basic' HTTP Authorization scheme"); + + chassert(!user.empty()); + basic_credentials->setUserName(user); + basic_credentials->setPassword(password); + } + + if (params.has("quota_key")) + { + if (!quota_key.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Invalid authentication: it is not allowed " + "to use quota key as HTTP header and as parameter simultaneously"); + + quota_key = params.get("quota_key"); + } + + /// Set client info. It will be used for quota accounting parameters in 'setUser' method. + + session.setHTTPClientInfo(request); + session.setQuotaClientKey(quota_key); + + /// Extract the last entry from comma separated list of forwarded_for addresses. + /// Only the last proxy can be trusted (if any). + String forwarded_address = session.getClientInfo().getLastForwardedFor(); + try + { + if (!forwarded_address.empty() && global_context->getConfigRef().getBool("auth_use_forwarded_address", false)) + session.authenticate(*current_credentials, Poco::Net::SocketAddress(forwarded_address, request.clientAddress().port())); + else + session.authenticate(*current_credentials, request.clientAddress()); + } + catch (const Authentication::Require & required_credentials) + { + current_credentials = std::make_unique(); + + if (required_credentials.getRealm().empty()) + response.set("WWW-Authenticate", "Basic"); + else + response.set("WWW-Authenticate", "Basic realm=\"" + required_credentials.getRealm() + "\""); + + response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED); + response.send(); + /// Keep the credentials for next HTTP request. A client can handle HTTP_UNAUTHORIZED and send us more credentials with the next HTTP request. + request_credentials = std::move(current_credentials); + return false; + } + catch (const Authentication::Require & required_credentials) + { + current_credentials = global_context->makeGSSAcceptorContext(); + + if (required_credentials.getRealm().empty()) + response.set("WWW-Authenticate", "Negotiate"); + else + response.set("WWW-Authenticate", "Negotiate realm=\"" + required_credentials.getRealm() + "\""); + + response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED); + response.send(); + /// Keep the credentials for next HTTP request. A client can handle HTTP_UNAUTHORIZED and send us more credentials with the next HTTP request. + request_credentials = std::move(current_credentials); + return false; + } + + return true; +} + +} diff --git a/src/Server/HTTP/authenticateUserByHTTP.h b/src/Server/HTTP/authenticateUserByHTTP.h new file mode 100644 index 00000000000..3b5a04cae68 --- /dev/null +++ b/src/Server/HTTP/authenticateUserByHTTP.h @@ -0,0 +1,31 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class HTTPServerRequest; +class HTMLForm; +class HTTPServerResponse; +class Session; +class Credentials; + +/// Authenticates a user via HTTP protocol and initializes a session. +/// Usually retrieves the name and the password for that user from either the request's headers or from the query parameters. +/// Returns true when the user successfully authenticated, +/// the session instance will be configured accordingly, and the request_credentials instance will be dropped. +/// Returns false when the user is not authenticated yet, and the HTTP_UNAUTHORIZED response is sent with the "WWW-Authenticate" header, +/// in this case the `request_credentials` instance must be preserved until the next request or until any exception. +/// Throws an exception if authentication failed. +bool authenticateUserByHTTP( + const HTTPServerRequest & request, + const HTMLForm & params, + HTTPServerResponse & response, + Session & session, + std::unique_ptr & request_credentials, + ContextPtr global_context, + LoggerPtr log); + +} diff --git a/src/Server/HTTP/exceptionCodeToHTTPStatus.cpp b/src/Server/HTTP/exceptionCodeToHTTPStatus.cpp new file mode 100644 index 00000000000..6de57217aac --- /dev/null +++ b/src/Server/HTTP/exceptionCodeToHTTPStatus.cpp @@ -0,0 +1,158 @@ +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int CANNOT_PARSE_TEXT; + extern const int CANNOT_PARSE_ESCAPE_SEQUENCE; + extern const int CANNOT_PARSE_QUOTED_STRING; + extern const int CANNOT_PARSE_DATE; + extern const int CANNOT_PARSE_DATETIME; + extern const int CANNOT_PARSE_NUMBER; + extern const int CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING; + extern const int CANNOT_PARSE_IPV4; + extern const int CANNOT_PARSE_IPV6; + extern const int CANNOT_PARSE_UUID; + extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; + extern const int CANNOT_SCHEDULE_TASK; + extern const int CANNOT_OPEN_FILE; + extern const int CANNOT_COMPILE_REGEXP; + extern const int DUPLICATE_COLUMN; + extern const int ILLEGAL_COLUMN; + extern const int THERE_IS_NO_COLUMN; + extern const int UNKNOWN_ELEMENT_IN_AST; + extern const int UNKNOWN_TYPE_OF_AST_NODE; + extern const int TOO_DEEP_AST; + extern const int TOO_BIG_AST; + extern const int UNEXPECTED_AST_STRUCTURE; + extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE; + + extern const int SYNTAX_ERROR; + + extern const int INCORRECT_DATA; + extern const int TYPE_MISMATCH; + + extern const int UNKNOWN_TABLE; + extern const int UNKNOWN_FUNCTION; + extern const int UNKNOWN_IDENTIFIER; + extern const int UNKNOWN_TYPE; + extern const int UNKNOWN_STORAGE; + extern const int UNKNOWN_DATABASE; + extern const int UNKNOWN_SETTING; + extern const int UNKNOWN_DIRECTION_OF_SORTING; + extern const int UNKNOWN_AGGREGATE_FUNCTION; + extern const int UNKNOWN_FORMAT; + extern const int UNKNOWN_DATABASE_ENGINE; + extern const int UNKNOWN_TYPE_OF_QUERY; + extern const int UNKNOWN_ROLE; + + extern const int QUERY_IS_TOO_LARGE; + + extern const int NOT_IMPLEMENTED; + extern const int SOCKET_TIMEOUT; + + extern const int UNKNOWN_USER; + extern const int WRONG_PASSWORD; + extern const int REQUIRED_PASSWORD; + extern const int AUTHENTICATION_FAILED; + extern const int SET_NON_GRANTED_ROLE; + + extern const int HTTP_LENGTH_REQUIRED; + + extern const int TIMEOUT_EXCEEDED; +} + + +Poco::Net::HTTPResponse::HTTPStatus exceptionCodeToHTTPStatus(int exception_code) +{ + using namespace Poco::Net; + + if (exception_code == ErrorCodes::REQUIRED_PASSWORD) + { + return HTTPResponse::HTTP_UNAUTHORIZED; + } + else if (exception_code == ErrorCodes::UNKNOWN_USER || + exception_code == ErrorCodes::WRONG_PASSWORD || + exception_code == ErrorCodes::AUTHENTICATION_FAILED || + exception_code == ErrorCodes::SET_NON_GRANTED_ROLE) + { + return HTTPResponse::HTTP_FORBIDDEN; + } + else if (exception_code == ErrorCodes::BAD_ARGUMENTS || + exception_code == ErrorCodes::CANNOT_COMPILE_REGEXP || + exception_code == ErrorCodes::CANNOT_PARSE_TEXT || + exception_code == ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE || + exception_code == ErrorCodes::CANNOT_PARSE_QUOTED_STRING || + exception_code == ErrorCodes::CANNOT_PARSE_DATE || + exception_code == ErrorCodes::CANNOT_PARSE_DATETIME || + exception_code == ErrorCodes::CANNOT_PARSE_NUMBER || + exception_code == ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING || + exception_code == ErrorCodes::CANNOT_PARSE_IPV4 || + exception_code == ErrorCodes::CANNOT_PARSE_IPV6 || + exception_code == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED || + exception_code == ErrorCodes::CANNOT_PARSE_UUID || + exception_code == ErrorCodes::DUPLICATE_COLUMN || + exception_code == ErrorCodes::ILLEGAL_COLUMN || + exception_code == ErrorCodes::UNKNOWN_ELEMENT_IN_AST || + exception_code == ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE || + exception_code == ErrorCodes::THERE_IS_NO_COLUMN || + exception_code == ErrorCodes::TOO_DEEP_AST || + exception_code == ErrorCodes::TOO_BIG_AST || + exception_code == ErrorCodes::UNEXPECTED_AST_STRUCTURE || + exception_code == ErrorCodes::SYNTAX_ERROR || + exception_code == ErrorCodes::INCORRECT_DATA || + exception_code == ErrorCodes::TYPE_MISMATCH || + exception_code == ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE) + { + return HTTPResponse::HTTP_BAD_REQUEST; + } + else if (exception_code == ErrorCodes::UNKNOWN_TABLE || + exception_code == ErrorCodes::UNKNOWN_FUNCTION || + exception_code == ErrorCodes::UNKNOWN_IDENTIFIER || + exception_code == ErrorCodes::UNKNOWN_TYPE || + exception_code == ErrorCodes::UNKNOWN_STORAGE || + exception_code == ErrorCodes::UNKNOWN_DATABASE || + exception_code == ErrorCodes::UNKNOWN_SETTING || + exception_code == ErrorCodes::UNKNOWN_DIRECTION_OF_SORTING || + exception_code == ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION || + exception_code == ErrorCodes::UNKNOWN_FORMAT || + exception_code == ErrorCodes::UNKNOWN_DATABASE_ENGINE || + exception_code == ErrorCodes::UNKNOWN_TYPE_OF_QUERY || + exception_code == ErrorCodes::UNKNOWN_ROLE) + { + return HTTPResponse::HTTP_NOT_FOUND; + } + else if (exception_code == ErrorCodes::QUERY_IS_TOO_LARGE) + { + return HTTPResponse::HTTP_REQUESTENTITYTOOLARGE; + } + else if (exception_code == ErrorCodes::NOT_IMPLEMENTED) + { + return HTTPResponse::HTTP_NOT_IMPLEMENTED; + } + else if (exception_code == ErrorCodes::SOCKET_TIMEOUT || + exception_code == ErrorCodes::CANNOT_OPEN_FILE) + { + return HTTPResponse::HTTP_SERVICE_UNAVAILABLE; + } + else if (exception_code == ErrorCodes::HTTP_LENGTH_REQUIRED) + { + return HTTPResponse::HTTP_LENGTH_REQUIRED; + } + else if (exception_code == ErrorCodes::TIMEOUT_EXCEEDED) + { + return HTTPResponse::HTTP_REQUEST_TIMEOUT; + } + else if (exception_code == ErrorCodes::CANNOT_SCHEDULE_TASK) + { + return HTTPResponse::HTTP_SERVICE_UNAVAILABLE; + } + + return HTTPResponse::HTTP_INTERNAL_SERVER_ERROR; +} + +} diff --git a/src/Server/HTTP/exceptionCodeToHTTPStatus.h b/src/Server/HTTP/exceptionCodeToHTTPStatus.h new file mode 100644 index 00000000000..aadec7aac5a --- /dev/null +++ b/src/Server/HTTP/exceptionCodeToHTTPStatus.h @@ -0,0 +1,11 @@ +#pragma once +#include + + +namespace DB +{ + +/// Converts Exception code to HTTP status code. +Poco::Net::HTTPResponse::HTTPStatus exceptionCodeToHTTPStatus(int exception_code); + +} diff --git a/src/Server/HTTP/sendExceptionToHTTPClient.cpp b/src/Server/HTTP/sendExceptionToHTTPClient.cpp new file mode 100644 index 00000000000..022a763a9a2 --- /dev/null +++ b/src/Server/HTTP/sendExceptionToHTTPClient.cpp @@ -0,0 +1,81 @@ +#include + +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int HTTP_LENGTH_REQUIRED; + extern const int REQUIRED_PASSWORD; +} + + +void sendExceptionToHTTPClient( + const String & exception_message, + int exception_code, + HTTPServerRequest & request, + HTTPServerResponse & response, + WriteBufferFromHTTPServerResponse * out, + LoggerPtr log) +{ + setHTTPResponseStatusAndHeadersForException(exception_code, request, response, out, log); + + if (!out) + { + /// If nothing was sent yet. + WriteBufferFromHTTPServerResponse out_for_message{response, request.getMethod() == HTTPRequest::HTTP_HEAD, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT}; + + out_for_message.writeln(exception_message); + out_for_message.finalize(); + } + else + { + /// If buffer has data, and that data wasn't sent yet, then no need to send that data + bool data_sent = (out->count() != out->offset()); + + if (!data_sent) + out->position() = out->buffer().begin(); + + out->writeln(exception_message); + out->finalize(); + } +} + + +void setHTTPResponseStatusAndHeadersForException( + int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, WriteBufferFromHTTPServerResponse * out, LoggerPtr log) +{ + if (out) + out->setExceptionCode(exception_code); + else + response.set("X-ClickHouse-Exception-Code", toString(exception_code)); + + /// If HTTP method is POST and Keep-Alive is turned on, we should try to read the whole request body + /// to avoid reading part of the current request body in the next request. + if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST && response.getKeepAlive() + && exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED) + { + try + { + if (!request.getStream().eof()) + request.getStream().ignoreAll(); + } + catch (...) + { + tryLogCurrentException(log, "Cannot read remaining request body during exception handling"); + response.setKeepAlive(false); + } + } + + if (exception_code == ErrorCodes::REQUIRED_PASSWORD) + response.requireAuthentication("ClickHouse server HTTP API"); + else + response.setStatusAndReason(exceptionCodeToHTTPStatus(exception_code)); +} +} diff --git a/src/Server/HTTP/sendExceptionToHTTPClient.h b/src/Server/HTTP/sendExceptionToHTTPClient.h new file mode 100644 index 00000000000..31fda88d900 --- /dev/null +++ b/src/Server/HTTP/sendExceptionToHTTPClient.h @@ -0,0 +1,27 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class HTTPServerRequest; +class HTTPServerResponse; +class WriteBufferFromHTTPServerResponse; + +/// Sends an exception to HTTP client. This function doesn't handle its own exceptions so it needs to be wrapped in try-catch. +/// Argument `out` may be either created from `response` or be nullptr (if it wasn't created before the exception). +void sendExceptionToHTTPClient( + const String & exception_message, + int exception_code, + HTTPServerRequest & request, + HTTPServerResponse & response, + WriteBufferFromHTTPServerResponse * out, + LoggerPtr log); + +/// Sets "X-ClickHouse-Exception-Code" header and the correspondent HTTP status in the response for an exception. +/// This is a part of what sendExceptionToHTTPClient() does. +void setHTTPResponseStatusAndHeadersForException( + int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, WriteBufferFromHTTPServerResponse * out, LoggerPtr log); +} diff --git a/src/Server/HTTP/setReadOnlyIfHTTPMethodIdempotent.cpp b/src/Server/HTTP/setReadOnlyIfHTTPMethodIdempotent.cpp new file mode 100644 index 00000000000..48a3b39a3e7 --- /dev/null +++ b/src/Server/HTTP/setReadOnlyIfHTTPMethodIdempotent.cpp @@ -0,0 +1,25 @@ +#include + +#include +#include +#include + + +namespace DB +{ + +void setReadOnlyIfHTTPMethodIdempotent(ContextMutablePtr context, const String & http_method) +{ + /// Anything else beside HTTP POST should be readonly queries. + if (http_method != HTTPServerRequest::HTTP_POST) + { + /// 'readonly' setting values mean: + /// readonly = 0 - any query is allowed, client can change any setting. + /// readonly = 1 - only readonly queries are allowed, client can't change settings. + /// readonly = 2 - only readonly queries are allowed, client can change any setting except 'readonly'. + if (context->getSettingsRef().readonly == 0) + context->setSetting("readonly", 2); + } +} + +} diff --git a/src/Server/HTTP/setReadOnlyIfHTTPMethodIdempotent.h b/src/Server/HTTP/setReadOnlyIfHTTPMethodIdempotent.h new file mode 100644 index 00000000000..c46f2032d82 --- /dev/null +++ b/src/Server/HTTP/setReadOnlyIfHTTPMethodIdempotent.h @@ -0,0 +1,12 @@ +#pragma once + +#include + + +namespace DB +{ + +/// Sets readonly = 2 if the current HTTP method is not HTTP POST and if readonly is not set already. +void setReadOnlyIfHTTPMethodIdempotent(ContextMutablePtr context, const String & http_method); + +} diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index a00f6fb5412..c8a58527f2c 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -1,14 +1,11 @@ #include -#include #include -#include -#include -#include -#include #include #include #include +#include +#include #include #include #include @@ -37,20 +34,14 @@ #include #include #include +#include +#include +#include #include -#include -#include "config.h" - -#include -#include -#include #include -#include -#include -#include -#include -#include + +#include "config.h" #include #include @@ -60,78 +51,19 @@ #include #include -#if USE_SSL -#include -#endif - namespace DB { namespace ErrorCodes { - extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; extern const int CANNOT_COMPILE_REGEXP; - extern const int CANNOT_OPEN_FILE; - extern const int CANNOT_PARSE_TEXT; - extern const int CANNOT_PARSE_ESCAPE_SEQUENCE; - extern const int CANNOT_PARSE_QUOTED_STRING; - extern const int CANNOT_PARSE_DATE; - extern const int CANNOT_PARSE_DATETIME; - extern const int CANNOT_PARSE_NUMBER; - extern const int CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING; - extern const int CANNOT_PARSE_IPV4; - extern const int CANNOT_PARSE_IPV6; - extern const int CANNOT_PARSE_UUID; - extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; - extern const int CANNOT_SCHEDULE_TASK; - extern const int DUPLICATE_COLUMN; - extern const int ILLEGAL_COLUMN; - extern const int THERE_IS_NO_COLUMN; - extern const int UNKNOWN_ELEMENT_IN_AST; - extern const int UNKNOWN_TYPE_OF_AST_NODE; - extern const int TOO_DEEP_AST; - extern const int TOO_BIG_AST; - extern const int UNEXPECTED_AST_STRUCTURE; - extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE; - extern const int SYNTAX_ERROR; - - extern const int INCORRECT_DATA; - extern const int TYPE_MISMATCH; - - extern const int UNKNOWN_TABLE; - extern const int UNKNOWN_FUNCTION; - extern const int UNKNOWN_IDENTIFIER; - extern const int UNKNOWN_TYPE; - extern const int UNKNOWN_STORAGE; - extern const int UNKNOWN_DATABASE; - extern const int UNKNOWN_SETTING; - extern const int UNKNOWN_DIRECTION_OF_SORTING; - extern const int UNKNOWN_AGGREGATE_FUNCTION; - extern const int UNKNOWN_FORMAT; - extern const int UNKNOWN_DATABASE_ENGINE; - extern const int UNKNOWN_TYPE_OF_QUERY; - extern const int UNKNOWN_ROLE; extern const int NO_ELEMENTS_IN_CONFIG; - extern const int QUERY_IS_TOO_LARGE; - - extern const int NOT_IMPLEMENTED; - extern const int SOCKET_TIMEOUT; - - extern const int UNKNOWN_USER; - extern const int WRONG_PASSWORD; - extern const int REQUIRED_PASSWORD; - extern const int AUTHENTICATION_FAILED; - extern const int SET_NON_GRANTED_ROLE; - extern const int INVALID_SESSION_TIMEOUT; extern const int HTTP_LENGTH_REQUIRED; - extern const int SUPPORT_IS_DISABLED; - - extern const int TIMEOUT_EXCEEDED; } namespace @@ -173,115 +105,6 @@ void processOptionsRequest(HTTPServerResponse & response, const Poco::Util::Laye } } -static String base64Decode(const String & encoded) -{ - String decoded; - Poco::MemoryInputStream istr(encoded.data(), encoded.size()); - Poco::Base64Decoder decoder(istr); - Poco::StreamCopier::copyToString(decoder, decoded); - return decoded; -} - -static String base64Encode(const String & decoded) -{ - std::ostringstream ostr; // STYLE_CHECK_ALLOW_STD_STRING_STREAM - ostr.exceptions(std::ios::failbit); - Poco::Base64Encoder encoder(ostr); - encoder.rdbuf()->setLineLength(0); - encoder << decoded; - encoder.close(); - return ostr.str(); -} - -static Poco::Net::HTTPResponse::HTTPStatus exceptionCodeToHTTPStatus(int exception_code) -{ - using namespace Poco::Net; - - if (exception_code == ErrorCodes::REQUIRED_PASSWORD) - { - return HTTPResponse::HTTP_UNAUTHORIZED; - } - else if (exception_code == ErrorCodes::UNKNOWN_USER || - exception_code == ErrorCodes::WRONG_PASSWORD || - exception_code == ErrorCodes::AUTHENTICATION_FAILED || - exception_code == ErrorCodes::SET_NON_GRANTED_ROLE) - { - return HTTPResponse::HTTP_FORBIDDEN; - } - else if (exception_code == ErrorCodes::BAD_ARGUMENTS || - exception_code == ErrorCodes::CANNOT_COMPILE_REGEXP || - exception_code == ErrorCodes::CANNOT_PARSE_TEXT || - exception_code == ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE || - exception_code == ErrorCodes::CANNOT_PARSE_QUOTED_STRING || - exception_code == ErrorCodes::CANNOT_PARSE_DATE || - exception_code == ErrorCodes::CANNOT_PARSE_DATETIME || - exception_code == ErrorCodes::CANNOT_PARSE_NUMBER || - exception_code == ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING || - exception_code == ErrorCodes::CANNOT_PARSE_IPV4 || - exception_code == ErrorCodes::CANNOT_PARSE_IPV6 || - exception_code == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED || - exception_code == ErrorCodes::CANNOT_PARSE_UUID || - exception_code == ErrorCodes::DUPLICATE_COLUMN || - exception_code == ErrorCodes::ILLEGAL_COLUMN || - exception_code == ErrorCodes::UNKNOWN_ELEMENT_IN_AST || - exception_code == ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE || - exception_code == ErrorCodes::THERE_IS_NO_COLUMN || - exception_code == ErrorCodes::TOO_DEEP_AST || - exception_code == ErrorCodes::TOO_BIG_AST || - exception_code == ErrorCodes::UNEXPECTED_AST_STRUCTURE || - exception_code == ErrorCodes::SYNTAX_ERROR || - exception_code == ErrorCodes::INCORRECT_DATA || - exception_code == ErrorCodes::TYPE_MISMATCH || - exception_code == ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE) - { - return HTTPResponse::HTTP_BAD_REQUEST; - } - else if (exception_code == ErrorCodes::UNKNOWN_TABLE || - exception_code == ErrorCodes::UNKNOWN_FUNCTION || - exception_code == ErrorCodes::UNKNOWN_IDENTIFIER || - exception_code == ErrorCodes::UNKNOWN_TYPE || - exception_code == ErrorCodes::UNKNOWN_STORAGE || - exception_code == ErrorCodes::UNKNOWN_DATABASE || - exception_code == ErrorCodes::UNKNOWN_SETTING || - exception_code == ErrorCodes::UNKNOWN_DIRECTION_OF_SORTING || - exception_code == ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION || - exception_code == ErrorCodes::UNKNOWN_FORMAT || - exception_code == ErrorCodes::UNKNOWN_DATABASE_ENGINE || - exception_code == ErrorCodes::UNKNOWN_TYPE_OF_QUERY || - exception_code == ErrorCodes::UNKNOWN_ROLE) - { - return HTTPResponse::HTTP_NOT_FOUND; - } - else if (exception_code == ErrorCodes::QUERY_IS_TOO_LARGE) - { - return HTTPResponse::HTTP_REQUESTENTITYTOOLARGE; - } - else if (exception_code == ErrorCodes::NOT_IMPLEMENTED) - { - return HTTPResponse::HTTP_NOT_IMPLEMENTED; - } - else if (exception_code == ErrorCodes::SOCKET_TIMEOUT || - exception_code == ErrorCodes::CANNOT_OPEN_FILE) - { - return HTTPResponse::HTTP_SERVICE_UNAVAILABLE; - } - else if (exception_code == ErrorCodes::HTTP_LENGTH_REQUIRED) - { - return HTTPResponse::HTTP_LENGTH_REQUIRED; - } - else if (exception_code == ErrorCodes::TIMEOUT_EXCEEDED) - { - return HTTPResponse::HTTP_REQUEST_TIMEOUT; - } - else if (exception_code == ErrorCodes::CANNOT_SCHEDULE_TASK) - { - return HTTPResponse::HTTP_SERVICE_UNAVAILABLE; - } - - return HTTPResponse::HTTP_INTERNAL_SERVER_ERROR; -} - - static std::chrono::steady_clock::duration parseSessionTimeout( const Poco::Util::AbstractConfiguration & config, const HTMLForm & params) @@ -358,204 +181,9 @@ HTTPHandler::HTTPHandler(IServer & server_, const std::string & name, const HTTP HTTPHandler::~HTTPHandler() = default; -bool HTTPHandler::authenticateUser( - HTTPServerRequest & request, - HTMLForm & params, - HTTPServerResponse & response) +bool HTTPHandler::authenticateUser(HTTPServerRequest & request, HTMLForm & params, HTTPServerResponse & response) { - using namespace Poco::Net; - - /// The user and password can be passed by headers (similar to X-Auth-*), - /// which is used by load balancers to pass authentication information. - std::string user = request.get("X-ClickHouse-User", ""); - std::string password = request.get("X-ClickHouse-Key", ""); - std::string quota_key = request.get("X-ClickHouse-Quota", ""); - - /// The header 'X-ClickHouse-SSL-Certificate-Auth: on' enables checking the common name - /// extracted from the SSL certificate used for this connection instead of checking password. - bool has_ssl_certificate_auth = (request.get("X-ClickHouse-SSL-Certificate-Auth", "") == "on"); - bool has_auth_headers = !user.empty() || !password.empty() || has_ssl_certificate_auth; - - /// User name and password can be passed using HTTP Basic auth or query parameters - /// (both methods are insecure). - bool has_http_credentials = request.hasCredentials(); - bool has_credentials_in_query_params = params.has("user") || params.has("password"); - - std::string spnego_challenge; - SSLCertificateSubjects certificate_subjects; - - if (has_auth_headers) - { - /// It is prohibited to mix different authorization schemes. - if (has_http_credentials) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, - "Invalid authentication: it is not allowed " - "to use SSL certificate authentication and Authorization HTTP header simultaneously"); - if (has_credentials_in_query_params) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, - "Invalid authentication: it is not allowed " - "to use SSL certificate authentication and authentication via parameters simultaneously simultaneously"); - - if (has_ssl_certificate_auth) - { -#if USE_SSL - if (!password.empty()) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, - "Invalid authentication: it is not allowed " - "to use SSL certificate authentication and authentication via password simultaneously"); - - if (request.havePeerCertificate()) - certificate_subjects = extractSSLCertificateSubjects(request.peerCertificate()); - - if (certificate_subjects.empty()) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, - "Invalid authentication: SSL certificate authentication requires nonempty certificate's Common Name or Subject Alternative Name"); -#else - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, - "SSL certificate authentication disabled because ClickHouse was built without SSL library"); -#endif - } - } - else if (has_http_credentials) - { - /// It is prohibited to mix different authorization schemes. - if (has_credentials_in_query_params) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, - "Invalid authentication: it is not allowed " - "to use Authorization HTTP header and authentication via parameters simultaneously"); - - std::string scheme; - std::string auth_info; - request.getCredentials(scheme, auth_info); - - if (Poco::icompare(scheme, "Basic") == 0) - { - HTTPBasicCredentials credentials(auth_info); - user = credentials.getUsername(); - password = credentials.getPassword(); - } - else if (Poco::icompare(scheme, "Negotiate") == 0) - { - spnego_challenge = auth_info; - - if (spnego_challenge.empty()) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: SPNEGO challenge is empty"); - } - else - { - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: '{}' HTTP Authorization scheme is not supported", scheme); - } - } - else - { - /// If the user name is not set we assume it's the 'default' user. - user = params.get("user", "default"); - password = params.get("password", ""); - } - - if (!certificate_subjects.empty()) - { - if (!request_credentials) - request_credentials = std::make_unique(user, std::move(certificate_subjects)); - - auto * certificate_credentials = dynamic_cast(request_credentials.get()); - if (!certificate_credentials) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: expected SSL certificate authorization scheme"); - } - else if (!spnego_challenge.empty()) - { - if (!request_credentials) - request_credentials = server.context()->makeGSSAcceptorContext(); - - auto * gss_acceptor_context = dynamic_cast(request_credentials.get()); - if (!gss_acceptor_context) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: unexpected 'Negotiate' HTTP Authorization scheme expected"); - -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wunreachable-code" - const auto spnego_response = base64Encode(gss_acceptor_context->processToken(base64Decode(spnego_challenge), log)); -#pragma clang diagnostic pop - - if (!spnego_response.empty()) - response.set("WWW-Authenticate", "Negotiate " + spnego_response); - - if (!gss_acceptor_context->isFailed() && !gss_acceptor_context->isReady()) - { - if (spnego_response.empty()) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: 'Negotiate' HTTP Authorization failure"); - - response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED); - response.send(); - return false; - } - } - else // I.e., now using user name and password strings ("Basic"). - { - if (!request_credentials) - request_credentials = std::make_unique(); - - auto * basic_credentials = dynamic_cast(request_credentials.get()); - if (!basic_credentials) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: expected 'Basic' HTTP Authorization scheme"); - - basic_credentials->setUserName(user); - basic_credentials->setPassword(password); - } - - if (params.has("quota_key")) - { - if (!quota_key.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Invalid authentication: it is not allowed " - "to use quota key as HTTP header and as parameter simultaneously"); - - quota_key = params.get("quota_key"); - } - - /// Set client info. It will be used for quota accounting parameters in 'setUser' method. - - session->setHTTPClientInfo(request); - session->setQuotaClientKey(quota_key); - - /// Extract the last entry from comma separated list of forwarded_for addresses. - /// Only the last proxy can be trusted (if any). - String forwarded_address = session->getClientInfo().getLastForwardedFor(); - try - { - if (!forwarded_address.empty() && server.config().getBool("auth_use_forwarded_address", false)) - session->authenticate(*request_credentials, Poco::Net::SocketAddress(forwarded_address, request.clientAddress().port())); - else - session->authenticate(*request_credentials, request.clientAddress()); - } - catch (const Authentication::Require & required_credentials) - { - request_credentials = std::make_unique(); - - if (required_credentials.getRealm().empty()) - response.set("WWW-Authenticate", "Basic"); - else - response.set("WWW-Authenticate", "Basic realm=\"" + required_credentials.getRealm() + "\""); - - response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED); - response.send(); - return false; - } - catch (const Authentication::Require & required_credentials) - { - request_credentials = server.context()->makeGSSAcceptorContext(); - - if (required_credentials.getRealm().empty()) - response.set("WWW-Authenticate", "Negotiate"); - else - response.set("WWW-Authenticate", "Negotiate realm=\"" + required_credentials.getRealm() + "\""); - - response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED); - response.send(); - return false; - } - - request_credentials.reset(); - return true; + return authenticateUserByHTTP(request, params, response, *session, request_credentials, server.context(), log); } @@ -727,10 +355,22 @@ void HTTPHandler::processQuery( std::unique_ptr in; - static const NameSet reserved_param_names{"compress", "decompress", "user", "password", "quota_key", "query_id", "stacktrace", "role", - "buffer_size", "wait_end_of_query", "session_id", "session_timeout", "session_check", "client_protocol_version", "close_session"}; + auto roles = params.getAll("role"); + if (!roles.empty()) + context->setCurrentRoles(roles); - Names reserved_param_suffixes; + std::string database = request.get("X-ClickHouse-Database", params.get("database", "")); + if (!database.empty()) + context->setCurrentDatabase(database); + + std::string default_format = request.get("X-ClickHouse-Format", params.get("default_format", "")); + if (!default_format.empty()) + context->setDefaultFormat(default_format); + + /// Anything else beside HTTP POST should be readonly queries. + setReadOnlyIfHTTPMethodIdempotent(context, request.getMethod()); + + bool has_external_data = startsWith(request.getContentType(), "multipart/form-data"); auto param_could_be_skipped = [&] (const String & name) { @@ -738,87 +378,36 @@ void HTTPHandler::processQuery( if (name.empty()) return true; + /// Some parameters (database, default_format, everything used in the code above) do not + /// belong to the Settings class. + static const NameSet reserved_param_names{"compress", "decompress", "user", "password", "quota_key", "query_id", "stacktrace", "role", + "buffer_size", "wait_end_of_query", "session_id", "session_timeout", "session_check", "client_protocol_version", "close_session", + "database", "default_format"}; + if (reserved_param_names.contains(name)) return true; - for (const String & suffix : reserved_param_suffixes) + /// For external data we also want settings. + if (has_external_data) { - if (endsWith(name, suffix)) - return true; + /// Skip unneeded parameters to avoid confusing them later with context settings or query parameters. + /// It is a bug and ambiguity with `date_time_input_format` and `low_cardinality_allow_in_native_format` formats/settings. + static const Names reserved_param_suffixes = {"_format", "_types", "_structure"}; + for (const String & suffix : reserved_param_suffixes) + { + if (endsWith(name, suffix)) + return true; + } } return false; }; - auto roles = params.getAll("role"); - if (!roles.empty()) - { - const auto & access_control = context->getAccessControl(); - const auto & user = context->getUser(); - std::vector roles_ids(roles.size()); - for (size_t i = 0; i < roles.size(); i++) - { - auto role_id = access_control.getID(roles[i]); - if (user->granted_roles.isGranted(role_id)) - roles_ids[i] = role_id; - else - throw Exception(ErrorCodes::SET_NON_GRANTED_ROLE, "Role {} should be granted to set as a current", roles[i].get()); - } - context->setCurrentRoles(roles_ids); - } - /// Settings can be overridden in the query. - /// Some parameters (database, default_format, everything used in the code above) do not - /// belong to the Settings class. - - /// 'readonly' setting values mean: - /// readonly = 0 - any query is allowed, client can change any setting. - /// readonly = 1 - only readonly queries are allowed, client can't change settings. - /// readonly = 2 - only readonly queries are allowed, client can change any setting except 'readonly'. - - /// In theory if initially readonly = 0, the client can change any setting and then set readonly - /// to some other value. - const auto & settings = context->getSettingsRef(); - - /// Anything else beside HTTP POST should be readonly queries. - if (request.getMethod() != HTTPServerRequest::HTTP_POST) - { - if (settings.readonly == 0) - context->setSetting("readonly", 2); - } - - bool has_external_data = startsWith(request.getContentType(), "multipart/form-data"); - - if (has_external_data) - { - /// Skip unneeded parameters to avoid confusing them later with context settings or query parameters. - reserved_param_suffixes.reserve(3); - /// It is a bug and ambiguity with `date_time_input_format` and `low_cardinality_allow_in_native_format` formats/settings. - reserved_param_suffixes.emplace_back("_format"); - reserved_param_suffixes.emplace_back("_types"); - reserved_param_suffixes.emplace_back("_structure"); - } - - std::string database = request.get("X-ClickHouse-Database", ""); - std::string default_format = request.get("X-ClickHouse-Format", ""); - SettingsChanges settings_changes; for (const auto & [key, value] : params) { - if (key == "database") - { - if (database.empty()) - database = value; - } - else if (key == "default_format") - { - if (default_format.empty()) - default_format = value; - } - else if (param_could_be_skipped(key)) - { - } - else + if (!param_could_be_skipped(key)) { /// Other than query parameters are treated as settings. if (!customizeQueryParam(context, key, value)) @@ -826,15 +415,9 @@ void HTTPHandler::processQuery( } } - if (!database.empty()) - context->setCurrentDatabase(database); - - if (!default_format.empty()) - context->setDefaultFormat(default_format); - - /// For external data we also want settings context->checkSettingsConstraints(settings_changes, SettingSource::QUERY); context->applySettingsChanges(settings_changes); + const auto & settings = context->getSettingsRef(); /// Set the query id supplied by the user, if any, and also update the OpenTelemetry fields. context->setCurrentQueryId(params.get("query_id", request.get("X-ClickHouse-Query-Id", ""))); @@ -936,7 +519,7 @@ void HTTPHandler::processQuery( { bool with_stacktrace = (params.getParsed("stacktrace", false) && server.config().getBool("enable_http_stacktrace", true)); ExecutionStatus status = ExecutionStatus::fromCurrentException("", with_stacktrace); - formatExceptionForClient(status.code, request, response, used_output); + setHTTPResponseStatusAndHeadersForException(status.code, request, response, used_output.out_holder.get(), log); current_output_format.setException(status.message); current_output_format.finalize(); used_output.exception_is_written = true; @@ -970,7 +553,7 @@ void HTTPHandler::trySendExceptionToClient( const std::string & s, int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, Output & used_output) try { - formatExceptionForClient(exception_code, request, response, used_output); + setHTTPResponseStatusAndHeadersForException(exception_code, request, response, used_output.out_holder.get(), log); if (!used_output.out_holder && !used_output.exception_is_written) { @@ -1032,38 +615,6 @@ catch (...) used_output.cancel(); } -void HTTPHandler::formatExceptionForClient(int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, Output & used_output) -{ - if (used_output.out_holder) - used_output.out_holder->setExceptionCode(exception_code); - else - response.set("X-ClickHouse-Exception-Code", toString(exception_code)); - - /// FIXME: make sure that no one else is reading from the same stream at the moment. - - /// If HTTP method is POST and Keep-Alive is turned on, we should try to read the whole request body - /// to avoid reading part of the current request body in the next request. - if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST && response.getKeepAlive() - && exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED) - { - try - { - if (!request.getStream().eof()) - request.getStream().ignoreAll(); - } - catch (...) - { - tryLogCurrentException(log, "Cannot read remaining request body during exception handling"); - response.setKeepAlive(false); - } - } - - if (exception_code == ErrorCodes::REQUIRED_PASSWORD) - response.requireAuthentication("ClickHouse server HTTP API"); - else - response.setStatusAndReason(exceptionCodeToHTTPStatus(exception_code)); -} - void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) { setThreadName("HTTPHandler"); diff --git a/src/Server/HTTPHandler.h b/src/Server/HTTPHandler.h index c78c45826f0..6580b317f6e 100644 --- a/src/Server/HTTPHandler.h +++ b/src/Server/HTTPHandler.h @@ -173,12 +173,6 @@ private: HTTPServerResponse & response, Output & used_output); - void formatExceptionForClient( - int exception_code, - HTTPServerRequest & request, - HTTPServerResponse & response, - Output & used_output); - static void pushDelayedResults(Output & used_output); }; diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp index 45c28babe3a..e46021c8e68 100644 --- a/src/Server/InterserverIOHTTPHandler.cpp +++ b/src/Server/InterserverIOHTTPHandler.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include #include diff --git a/src/Server/KeeperReadinessHandler.cpp b/src/Server/KeeperReadinessHandler.cpp index c973be040c8..1972c3263b2 100644 --- a/src/Server/KeeperReadinessHandler.cpp +++ b/src/Server/KeeperReadinessHandler.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include #include diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index 4612e2e9fa8..4849f5827c1 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -2,33 +2,37 @@ #if USE_NURAFT -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include -#ifdef POCO_HAVE_FD_EPOLL - #include -#else - #include -#endif +# ifdef POCO_HAVE_FD_EPOLL +# include +# else +# include +# endif + +namespace ProfileEvents +{ + extern const Event KeeperTotalElapsedMicroseconds; +} namespace DB @@ -397,13 +401,11 @@ void KeeperTCPHandler::runImpl() } auto response_fd = poll_wrapper->getResponseFD(); - auto response_callback = [responses_ = this->responses, response_fd](const Coordination::ZooKeeperResponsePtr & response) + auto response_callback = [my_responses = this->responses, + response_fd](const Coordination::ZooKeeperResponsePtr & response, Coordination::ZooKeeperRequestPtr request) { - if (!responses_->push(response)) - throw Exception(ErrorCodes::SYSTEM_ERROR, - "Could not push response with xid {} and zxid {}", - response->xid, - response->zxid); + if (!my_responses->push(RequestWithResponse{response, std::move(request)})) + throw Exception(ErrorCodes::SYSTEM_ERROR, "Could not push response with xid {} and zxid {}", response->xid, response->zxid); UInt8 single_byte = 1; [[maybe_unused]] ssize_t result = write(response_fd, &single_byte, sizeof(single_byte)); @@ -411,12 +413,12 @@ void KeeperTCPHandler::runImpl() keeper_dispatcher->registerSession(session_id, response_callback); Stopwatch logging_stopwatch; + auto operation_max_ms = keeper_dispatcher->getKeeperContext()->getCoordinationSettings()->log_slow_connection_operation_threshold_ms; auto log_long_operation = [&](const String & operation) { - constexpr UInt64 operation_max_ms = 500; auto elapsed_ms = logging_stopwatch.elapsedMilliseconds(); if (operation_max_ms < elapsed_ms) - LOG_TEST(log, "{} for session {} took {} ms", operation, session_id, elapsed_ms); + LOG_INFO(log, "{} for session {} took {} ms", operation, session_id, elapsed_ms); logging_stopwatch.restart(); }; @@ -467,19 +469,20 @@ void KeeperTCPHandler::runImpl() /// became inconsistent and race condition is possible. while (result.responses_count != 0) { - Coordination::ZooKeeperResponsePtr response; + RequestWithResponse request_with_response; - if (!responses->tryPop(response)) + if (!responses->tryPop(request_with_response)) throw Exception(ErrorCodes::LOGICAL_ERROR, "We must have ready response, but queue is empty. It's a bug."); log_long_operation("Waiting for response to be ready"); + auto & response = request_with_response.response; if (response->xid == close_xid) { LOG_DEBUG(log, "Session #{} successfully closed", session_id); return; } - updateStats(response); + updateStats(response, request_with_response.request); packageSent(); response->write(getWriteBuffer()); @@ -606,16 +609,28 @@ void KeeperTCPHandler::packageReceived() keeper_dispatcher->incrementPacketsReceived(); } -void KeeperTCPHandler::updateStats(Coordination::ZooKeeperResponsePtr & response) +void KeeperTCPHandler::updateStats(Coordination::ZooKeeperResponsePtr & response, const Coordination::ZooKeeperRequestPtr & request) { /// update statistics ignoring watch response and heartbeat. if (response->xid != Coordination::WATCH_XID && response->getOpNum() != Coordination::OpNum::Heartbeat) { - Int64 elapsed = (Poco::Timestamp() - operations[response->xid]) / 1000; - conn_stats.updateLatency(elapsed); + Int64 elapsed = (Poco::Timestamp() - operations[response->xid]); + ProfileEvents::increment(ProfileEvents::KeeperTotalElapsedMicroseconds, elapsed); + Int64 elapsed_ms = elapsed / 1000; + + if (request && elapsed_ms > static_cast(keeper_dispatcher->getKeeperContext()->getCoordinationSettings()->log_slow_total_threshold_ms)) + { + LOG_INFO( + log, + "Total time to process a request took too long ({}ms).\nRequest info: {}", + elapsed, + request->toString(/*short_format=*/true)); + } + + conn_stats.updateLatency(elapsed_ms); operations.erase(response->xid); - keeper_dispatcher->updateKeeperStatLatency(elapsed); + keeper_dispatcher->updateKeeperStatLatency(elapsed_ms); last_op.set(std::make_unique(LastOp{ .name = Coordination::toString(response->getOpNum()), diff --git a/src/Server/KeeperTCPHandler.h b/src/Server/KeeperTCPHandler.h index c1c522eee89..7c2b8acf624 100644 --- a/src/Server/KeeperTCPHandler.h +++ b/src/Server/KeeperTCPHandler.h @@ -26,7 +26,13 @@ namespace DB struct SocketInterruptablePollWrapper; using SocketInterruptablePollWrapperPtr = std::unique_ptr; -using ThreadSafeResponseQueue = ConcurrentBoundedQueue; +struct RequestWithResponse +{ + Coordination::ZooKeeperResponsePtr response; + Coordination::ZooKeeperRequestPtr request; /// it can be nullptr for some responses +}; + +using ThreadSafeResponseQueue = ConcurrentBoundedQueue; using ThreadSafeResponseQueuePtr = std::shared_ptr; struct LastOp; @@ -104,7 +110,7 @@ private: void packageSent(); void packageReceived(); - void updateStats(Coordination::ZooKeeperResponsePtr & response); + void updateStats(Coordination::ZooKeeperResponsePtr & response, const Coordination::ZooKeeperRequestPtr & request); Poco::Timestamp established; diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index 12438c7a05c..1858d6d025d 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Server/MySQLHandler.h b/src/Server/MySQLHandler.h index 2f891ebf810..f632ef51c00 100644 --- a/src/Server/MySQLHandler.h +++ b/src/Server/MySQLHandler.h @@ -8,6 +8,7 @@ #include #include #include +#include #include "IO/ReadBufferFromString.h" #include "IServer.h" diff --git a/src/Server/NotFoundHandler.cpp b/src/Server/NotFoundHandler.cpp index 38f56921c89..304021ee8dc 100644 --- a/src/Server/NotFoundHandler.cpp +++ b/src/Server/NotFoundHandler.cpp @@ -1,6 +1,7 @@ #include #include +#include #include namespace DB diff --git a/src/Server/PostgreSQLHandler.cpp b/src/Server/PostgreSQLHandler.cpp index 8ba8421e6f0..cde31b4c58a 100644 --- a/src/Server/PostgreSQLHandler.cpp +++ b/src/Server/PostgreSQLHandler.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #if USE_SSL # include diff --git a/src/Server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp index dff960f7031..1f3e038a1f5 100644 --- a/src/Server/PrometheusRequestHandler.cpp +++ b/src/Server/PrometheusRequestHandler.cpp @@ -18,9 +18,6 @@ void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPSe { try { - /// Raw config reference is used here to avoid dependency on Context and ServerSettings. - /// This is painful, because this class is also used in a build with CLICKHOUSE_KEEPER_STANDALONE_BUILD=1 - /// And there ordinary Context is replaced with a tiny clone. const auto & config = server.config(); unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT); diff --git a/src/Server/ProtocolServerAdapter.cpp b/src/Server/ProtocolServerAdapter.cpp index 8d14a849894..b41ad2376f1 100644 --- a/src/Server/ProtocolServerAdapter.cpp +++ b/src/Server/ProtocolServerAdapter.cpp @@ -1,7 +1,7 @@ #include #include -#if USE_GRPC && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_GRPC #include #endif @@ -37,7 +37,7 @@ ProtocolServerAdapter::ProtocolServerAdapter( { } -#if USE_GRPC && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_GRPC class ProtocolServerAdapter::GRPCServerAdapterImpl : public Impl { public: diff --git a/src/Server/ProtocolServerAdapter.h b/src/Server/ProtocolServerAdapter.h index dd11c1dfc58..76a6776ed9c 100644 --- a/src/Server/ProtocolServerAdapter.h +++ b/src/Server/ProtocolServerAdapter.h @@ -23,7 +23,7 @@ public: ProtocolServerAdapter & operator =(ProtocolServerAdapter && src) = default; ProtocolServerAdapter(const std::string & listen_host_, const char * port_name_, const std::string & description_, std::unique_ptr tcp_server_); -#if USE_GRPC && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_GRPC ProtocolServerAdapter(const std::string & listen_host_, const char * port_name_, const std::string & description_, std::unique_ptr grpc_server_); #endif diff --git a/src/Server/ProxyV1Handler.cpp b/src/Server/ProxyV1Handler.cpp index d5e6ab23360..5b33a119cef 100644 --- a/src/Server/ProxyV1Handler.cpp +++ b/src/Server/ProxyV1Handler.cpp @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/Server/ReplicasStatusHandler.cpp b/src/Server/ReplicasStatusHandler.cpp index 67823117758..f43357db0a8 100644 --- a/src/Server/ReplicasStatusHandler.cpp +++ b/src/Server/ReplicasStatusHandler.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -7,6 +8,7 @@ #include #include #include +#include #include #include diff --git a/src/Server/StaticRequestHandler.cpp b/src/Server/StaticRequestHandler.cpp index 331b7a84857..f3981dea9fb 100644 --- a/src/Server/StaticRequestHandler.cpp +++ b/src/Server/StaticRequestHandler.cpp @@ -4,13 +4,14 @@ #include "HTTPHandlerFactory.h" #include "HTTPResponseHeaderWriter.h" +#include #include #include #include -#include #include -#include +#include #include +#include #include diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 3a7ad08537a..27fe4948657 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1885,7 +1885,7 @@ void TCPHandler::receiveQuery() #endif } - query_context = session->makeQueryContext(std::move(client_info)); + query_context = session->makeQueryContext(client_info); /// Sets the default database if it wasn't set earlier for the session context. if (is_interserver_mode && !default_database.empty()) @@ -1900,6 +1900,16 @@ void TCPHandler::receiveQuery() /// /// Settings /// + + /// FIXME: Remove when allow_experimental_analyzer will become obsolete. + /// Analyzer became Beta in 24.3 and started to be enabled by default. + /// We have to disable it for ourselves to make sure we don't have different settings on + /// different servers. + if (query_kind == ClientInfo::QueryKind::SECONDARY_QUERY + && client_info.getVersionNumber() < VersionNumber(23, 3, 0) + && !passed_settings.allow_experimental_analyzer.changed) + passed_settings.set("allow_experimental_analyzer", false); + auto settings_changes = passed_settings.changes(); query_kind = query_context->getClientInfo().query_kind; if (query_kind == ClientInfo::QueryKind::INITIAL_QUERY) @@ -2107,6 +2117,7 @@ void TCPHandler::initBlockOutput(const Block & block) *state.maybe_compressed_out, client_tcp_protocol_version, block.cloneEmpty(), + std::nullopt, !query_settings.low_cardinality_allow_in_native_format); } } @@ -2121,6 +2132,7 @@ void TCPHandler::initLogsBlockOutput(const Block & block) *out, client_tcp_protocol_version, block.cloneEmpty(), + std::nullopt, !query_settings.low_cardinality_allow_in_native_format); } } @@ -2135,6 +2147,7 @@ void TCPHandler::initProfileEventsBlockOutput(const Block & block) *out, client_tcp_protocol_version, block.cloneEmpty(), + std::nullopt, !query_settings.low_cardinality_allow_in_native_format); } } diff --git a/src/Server/WebUIRequestHandler.cpp b/src/Server/WebUIRequestHandler.cpp index 68d3ff0b325..a3d098014e7 100644 --- a/src/Server/WebUIRequestHandler.cpp +++ b/src/Server/WebUIRequestHandler.cpp @@ -5,8 +5,9 @@ #include #include -#include +#include #include +#include #include #include diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 35a5e95e643..7891042bb96 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -33,9 +34,9 @@ #include #include #include +#include #include #include -#include #include diff --git a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp index 06d4c185840..e1facec5b40 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp @@ -196,6 +196,16 @@ void DistributedAsyncInsertBatch::readText(ReadBuffer & in) UInt64 idx; in >> idx >> "\n"; files.push_back(std::filesystem::absolute(fmt::format("{}/{}.bin", parent.path, idx)).string()); + + ReadBufferFromFile header_buffer(files.back()); + const DistributedAsyncInsertHeader & header = DistributedAsyncInsertHeader::read(header_buffer, parent.log); + total_bytes += total_bytes; + + if (header.rows) + { + total_rows += header.rows; + total_bytes += header.bytes; + } } recovered = true; diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index e556bda2561..a2bd7237854 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include diff --git a/src/Storages/FileLog/FileLogSettings.cpp b/src/Storages/FileLog/FileLogSettings.cpp index 8e245285b9a..cf2d6b64d7c 100644 --- a/src/Storages/FileLog/FileLogSettings.cpp +++ b/src/Storages/FileLog/FileLogSettings.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 28d8128e052..9063437c065 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -321,7 +321,7 @@ public: Chunk generateChunkFromMetadata() { - size_t num_rows = std::min(current_file_remained_rows, UInt64(getContext()->getSettings().max_block_size)); + size_t num_rows = std::min(current_file_remained_rows, UInt64(getContext()->getSettingsRef().max_block_size)); current_file_remained_rows -= num_rows; Block source_block; @@ -964,7 +964,7 @@ HiveFiles StorageHive::collectHiveFiles( /// Hive files to collect HiveFiles hive_files; Int64 hit_parttions_num = 0; - Int64 hive_max_query_partitions = context_->getSettings().max_partitions_to_read; + Int64 hive_max_query_partitions = context_->getSettingsRef().max_partitions_to_read; /// Mutext to protect hive_files, which maybe appended in multiple threads std::mutex hive_files_mutex; ThreadPool pool{CurrentMetrics::StorageHiveThreads, CurrentMetrics::StorageHiveThreadsActive, CurrentMetrics::StorageHiveThreadsScheduled, max_threads}; diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 1f7ac23eb82..3b9c6ff28fa 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 98afd844046..6217470780d 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -165,6 +165,8 @@ public: /// Returns true if the storage supports reading of subcolumns of complex types. virtual bool supportsSubcolumns() const { return false; } + /// Returns true if storage supports optimizations of functions by reading subcolumns. + virtual bool supportsOptimizationToSubcolumns() const { return supportsSubcolumns(); } /// Returns true if the storage supports transactions for SELECT, INSERT and ALTER queries. /// Storage may throw an exception later if some query kind is not fully supported. diff --git a/src/Storages/IStorageCluster.cpp b/src/Storages/IStorageCluster.cpp index 9c5b29ae265..63467603d16 100644 --- a/src/Storages/IStorageCluster.cpp +++ b/src/Storages/IStorageCluster.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include diff --git a/src/Storages/IStorageCluster.h b/src/Storages/IStorageCluster.h index f3283247672..893cf222556 100644 --- a/src/Storages/IStorageCluster.h +++ b/src/Storages/IStorageCluster.h @@ -37,7 +37,10 @@ public: QueryProcessingStage::Enum getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override; - bool isRemote() const override { return true; } + bool isRemote() const final { return true; } + bool supportsSubcolumns() const override { return true; } + bool supportsOptimizationToSubcolumns() const override { return false; } + bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } protected: virtual void updateBeforeRead(const ContextPtr &) {} diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 57a1ea302f9..f1323d45c26 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -27,6 +27,7 @@ limitations under the License. */ #include #include #include +#include #include #include diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp index bc8cb0ce69a..29631b95542 100644 --- a/src/Storages/MaterializedView/RefreshTask.cpp +++ b/src/Storages/MaterializedView/RefreshTask.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp index 69e54dd5f0c..7354243732c 100644 --- a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp +++ b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -54,7 +55,7 @@ ApproximateNearestNeighborInformation::Metric stringToMetric(std::string_view me ApproximateNearestNeighborCondition::ApproximateNearestNeighborCondition(const SelectQueryInfo & query_info, ContextPtr context) : block_with_constants(KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context)) , index_granularity(context->getMergeTreeSettings().index_granularity) - , max_limit_for_ann_queries(context->getSettings().max_limit_for_ann_queries) + , max_limit_for_ann_queries(context->getSettingsRef().max_limit_for_ann_queries) , index_is_useful(checkQueryStructure(query_info)) {} diff --git a/src/Storages/MergeTree/AsyncBlockIDsCache.cpp b/src/Storages/MergeTree/AsyncBlockIDsCache.cpp index 9d64592ed64..6606b00e287 100644 --- a/src/Storages/MergeTree/AsyncBlockIDsCache.cpp +++ b/src/Storages/MergeTree/AsyncBlockIDsCache.cpp @@ -1,7 +1,8 @@ +#include +#include +#include #include #include -#include -#include #include diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index b7fdcc63e1b..8e73021d3e7 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index e92a608eed0..70044db7336 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -375,6 +376,12 @@ void IMergeTreeDataPart::unloadIndex() index_loaded = false; } +bool IMergeTreeDataPart::isIndexLoaded() const +{ + std::scoped_lock lock(index_mutex); + return index_loaded; +} + void IMergeTreeDataPart::setName(const String & new_name) { mutable_name = new_name; @@ -1307,6 +1314,17 @@ void IMergeTreeDataPart::loadRowsCount() auto buf = metadata_manager->read("count.txt"); readIntText(rows_count, *buf); assertEOF(*buf); + + if (!index_granularity.empty() && rows_count < index_granularity.getTotalRows() && index_granularity_info.fixed_index_granularity) + { + /// Adjust last granule size to match the number of rows in the part in case of fixed index_granularity. + index_granularity.popMark(); + index_granularity.appendMark(rows_count % index_granularity_info.fixed_index_granularity); + if (rows_count != index_granularity.getTotalRows()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Index granularity total rows in part {} does not match rows_count: {}, instead of {}", + name, index_granularity.getTotalRows(), rows_count); + } }; if (index_granularity.empty()) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index c9b3ec26df0..571f1389e10 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -369,6 +369,7 @@ public: void setIndex(const Columns & cols_); void setIndex(Columns && cols_); void unloadIndex(); + bool isIndexLoaded() const; /// For data in RAM ('index') UInt64 getIndexSizeInBytes() const; diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp index 6152da78395..c87f66b64f3 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB { @@ -71,9 +72,21 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( Columns IMergeTreeDataPartWriter::releaseIndexColumns() { - return Columns( - std::make_move_iterator(index_columns.begin()), - std::make_move_iterator(index_columns.end())); + /// The memory for index was allocated without thread memory tracker. + /// We need to deallocate it in shrinkToFit without memory tracker as well. + MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; + + Columns result; + result.reserve(index_columns.size()); + + for (auto & column : index_columns) + { + column->shrinkToFit(); + result.push_back(std::move(column)); + } + + index_columns.clear(); + return result; } SerializationPtr IMergeTreeDataPartWriter::getSerialization(const String & column_name) const diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h index d9e9a433827..adaa4eddb98 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h @@ -1,11 +1,10 @@ #pragma once +#include #include -#include #include #include #include -#include #include #include @@ -13,6 +12,9 @@ namespace DB { +struct MergeTreeSettings; +using MergeTreeSettingsPtr = std::shared_ptr; + Block getBlockAndPermute(const Block & block, const Names & names, const IColumn::Permutation * permutation); Block permuteBlockIfNeeded(const Block & block, const IColumn::Permutation * permutation); diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp index 89c813ab233..70e838e666a 100644 --- a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.h b/src/Storages/MergeTree/IMergedBlockOutputStream.h index a9b058418ea..cfcfb177e05 100644 --- a/src/Storages/MergeTree/IMergedBlockOutputStream.h +++ b/src/Storages/MergeTree/IMergedBlockOutputStream.h @@ -1,16 +1,18 @@ #pragma once #include -#include -#include -#include #include #include +#include +#include #include namespace DB { +struct MergeTreeSettings; +using MergeTreeSettingsPtr = std::shared_ptr; + class IMergedBlockOutputStream { public: diff --git a/src/Storages/MergeTree/IPartMetadataManager.h b/src/Storages/MergeTree/IPartMetadataManager.h index cef1d10e4ad..e817421f7d0 100644 --- a/src/Storages/MergeTree/IPartMetadataManager.h +++ b/src/Storages/MergeTree/IPartMetadataManager.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 7e4b1db4c89..40eef8e7431 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -874,46 +875,6 @@ static Field applyFunctionForField( return (*col)[0]; } -/// The case when arguments may have types different than in the primary key. -static std::pair applyFunctionForFieldOfUnknownType( - const FunctionBasePtr & func, - const DataTypePtr & arg_type, - const Field & arg_value) -{ - ColumnsWithTypeAndName arguments{{ arg_type->createColumnConst(1, arg_value), arg_type, "x" }}; - DataTypePtr return_type = func->getResultType(); - - auto col = func->execute(arguments, return_type, 1); - - Field result = (*col)[0]; - - return {std::move(result), std::move(return_type)}; -} - - -/// Same as above but for binary operators -static std::pair applyBinaryFunctionForFieldOfUnknownType( - const FunctionOverloadResolverPtr & func, - const DataTypePtr & arg_type, - const Field & arg_value, - const DataTypePtr & arg_type2, - const Field & arg_value2) -{ - ColumnsWithTypeAndName arguments{ - {arg_type->createColumnConst(1, arg_value), arg_type, "x"}, {arg_type2->createColumnConst(1, arg_value2), arg_type2, "y"}}; - - FunctionBasePtr func_base = func->build(arguments); - - DataTypePtr return_type = func_base->getResultType(); - - auto col = func_base->execute(arguments, return_type, 1); - - Field result = (*col)[0]; - - return {std::move(result), std::move(return_type)}; -} - - static FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & current_type, const FieldRef & field) { /// Fallback for fields without block reference. @@ -940,164 +901,92 @@ static FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & return {field.columns, field.row_idx, result_idx}; } -/** When table's key has expression with these functions from a column, - * and when a column in a query is compared with a constant, such as: - * CREATE TABLE (x String) ORDER BY toDate(x) - * SELECT ... WHERE x LIKE 'Hello%' - * we want to apply the function to the constant for index analysis, - * but should modify it to pass on un-parsable values. - */ -static std::set date_time_parsing_functions = { - "toDate", - "toDate32", - "toDateTime", - "toDateTime64", - "parseDateTimeBestEffort", - "parseDateTimeBestEffortUS", - "parseDateTime32BestEffort", - "parseDateTime64BestEffort", - "parseDateTime", - "parseDateTimeInJodaSyntax", -}; - -/** The key functional expression constraint may be inferred from a plain column in the expression. - * For example, if the key contains `toStartOfHour(Timestamp)` and query contains `WHERE Timestamp >= now()`, - * it can be assumed that if `toStartOfHour()` is monotonic on [now(), inf), the `toStartOfHour(Timestamp) >= toStartOfHour(now())` - * condition also holds, so the index may be used to select only parts satisfying this condition. - * - * To check the assumption, we'd need to assert that the inverse function to this transformation is also monotonic, however the - * inversion isn't exported (or even viable for not strictly monotonic functions such as `toStartOfHour()`). - * Instead, we can qualify only functions that do not transform the range (for example rounding), - * which while not strictly monotonic, are monotonic everywhere on the input range. - */ -bool KeyCondition::transformConstantWithValidFunctions( - ContextPtr context, - const String & expr_name, - size_t & out_key_column_num, - DataTypePtr & out_key_column_type, - Field & out_value, - DataTypePtr & out_type, - std::function always_monotonic) const +/// Sequentially applies functions to the column, returns `true` +/// if all function arguments are compatible with functions +/// signatures, and none of the functions produce `NULL` output. +/// +/// After functions chain execution, fills result column and its type. +bool applyFunctionChainToColumn( + const ColumnPtr & in_column, + const DataTypePtr & in_data_type, + const std::vector & functions, + ColumnPtr & out_column, + DataTypePtr & out_data_type) { - const auto & sample_block = key_expr->getSampleBlock(); + // Remove LowCardinality from input column, and convert it to regular one + auto result_column = in_column->convertToFullIfNeeded(); + auto result_type = removeLowCardinality(in_data_type); - for (const auto & node : key_expr->getNodes()) + // In case function sequence is empty, return full non-LowCardinality column + if (functions.empty()) { - auto it = key_columns.find(node.result_name); - if (it != key_columns.end()) - { - std::stack chain; - - const auto * cur_node = &node; - bool is_valid_chain = true; - - while (is_valid_chain) - { - if (cur_node->result_name == expr_name) - break; - - chain.push(cur_node); - - if (cur_node->type == ActionsDAG::ActionType::FUNCTION && cur_node->children.size() <= 2) - { - is_valid_chain = always_monotonic(*cur_node->function_base, *cur_node->result_type); - - const ActionsDAG::Node * next_node = nullptr; - for (const auto * arg : cur_node->children) - { - if (arg->column && isColumnConst(*arg->column)) - continue; - - if (next_node) - is_valid_chain = false; - - next_node = arg; - } - - if (!next_node) - is_valid_chain = false; - - cur_node = next_node; - } - else if (cur_node->type == ActionsDAG::ActionType::ALIAS) - cur_node = cur_node->children.front(); - else - is_valid_chain = false; - } - - if (is_valid_chain) - { - out_type = removeLowCardinality(out_type); - auto const_type = removeLowCardinality(cur_node->result_type); - auto const_column = out_type->createColumnConst(1, out_value); - auto const_value = (*castColumnAccurateOrNull({const_column, out_type, ""}, const_type))[0]; - - if (const_value.isNull()) - return false; - - while (!chain.empty()) - { - const auto * func = chain.top(); - chain.pop(); - - if (func->type != ActionsDAG::ActionType::FUNCTION) - continue; - - const auto & func_name = func->function_base->getName(); - auto func_base = func->function_base; - const auto & arg_types = func_base->getArgumentTypes(); - if (date_time_parsing_functions.contains(func_name) && !arg_types.empty() && isStringOrFixedString(arg_types[0])) - { - auto func_or_null = FunctionFactory::instance().get(func_name + "OrNull", context); - ColumnsWithTypeAndName arguments; - int i = 0; - for (const auto & type : func->function_base->getArgumentTypes()) - arguments.push_back({nullptr, type, fmt::format("_{}", i++)}); - - func_base = func_or_null->build(arguments); - } - - if (func->children.size() == 1) - { - std::tie(const_value, const_type) - = applyFunctionForFieldOfUnknownType(func_base, const_type, const_value); - } - else if (func->children.size() == 2) - { - const auto * left = func->children[0]; - const auto * right = func->children[1]; - if (left->column && isColumnConst(*left->column)) - { - auto left_arg_type = left->result_type; - auto left_arg_value = (*left->column)[0]; - std::tie(const_value, const_type) = applyBinaryFunctionForFieldOfUnknownType( - FunctionFactory::instance().get(func_base->getName(), context), - left_arg_type, left_arg_value, const_type, const_value); - } - else - { - auto right_arg_type = right->result_type; - auto right_arg_value = (*right->column)[0]; - std::tie(const_value, const_type) = applyBinaryFunctionForFieldOfUnknownType( - FunctionFactory::instance().get(func_base->getName(), context), - const_type, const_value, right_arg_type, right_arg_value); - } - } - - if (const_value.isNull()) - return false; - } - - out_key_column_num = it->second; - out_key_column_type = sample_block.getByName(it->first).type; - out_value = const_value; - out_type = const_type; - return true; - } - } + out_column = result_column; + out_data_type = result_type; + return true; } - return false; + // If first function arguments are empty, cannot transform input column + if (functions[0]->getArgumentTypes().empty()) + { + return false; + } + + // And cast it to the argument type of the first function in the chain + auto in_argument_type = functions[0]->getArgumentTypes()[0]; + if (canBeSafelyCasted(result_type, in_argument_type)) + { + result_column = castColumnAccurate({result_column, result_type, ""}, in_argument_type); + result_type = in_argument_type; + } + // If column cannot be casted accurate, casting with OrNull, and in case all + // values has been casted (no nulls), unpacking nested column from nullable. + // In case any further functions require Nullable input, they'll be able + // to cast it. + else + { + result_column = castColumnAccurateOrNull({result_column, result_type, ""}, in_argument_type); + const auto & result_column_nullable = assert_cast(*result_column); + const auto & null_map_data = result_column_nullable.getNullMapData(); + for (char8_t i : null_map_data) + { + if (i != 0) + return false; + } + result_column = result_column_nullable.getNestedColumnPtr(); + result_type = removeNullable(in_argument_type); + } + + for (const auto & func : functions) + { + if (func->getArgumentTypes().empty()) + return false; + + auto argument_type = func->getArgumentTypes()[0]; + if (!canBeSafelyCasted(result_type, argument_type)) + return false; + + result_column = castColumnAccurate({result_column, result_type, ""}, argument_type); + result_column = func->execute({{result_column, argument_type, ""}}, func->getResultType(), result_column->size()); + result_type = func->getResultType(); + + // Transforming nullable columns to the nested ones, in case no nulls found + if (result_column->isNullable()) + { + const auto & result_column_nullable = assert_cast(*result_column); + const auto & null_map_data = result_column_nullable.getNullMapData(); + for (char8_t i : null_map_data) + { + if (i != 0) + return false; + } + result_column = result_column_nullable.getNestedColumnPtr(); + result_type = removeNullable(func->getResultType()); + } + } + out_column = result_column; + out_data_type = result_type; + + return true; } bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( @@ -1118,13 +1007,13 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( if (out_value.isNull()) return false; - return transformConstantWithValidFunctions( + MonotonicFunctionsChain transform_functions; + auto can_transform_constant = extractMonotonicFunctionsChainFromKey( node.getTreeContext().getQueryContext(), expr_name, out_key_column_num, out_key_column_type, - out_value, - out_type, + transform_functions, [](const IFunctionBase & func, const IDataType & type) { if (!func.hasInformationAboutMonotonicity()) @@ -1138,6 +1027,27 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( } return true; }); + + if (!can_transform_constant) + return false; + + auto const_column = out_type->createColumnConst(1, out_value); + + ColumnPtr transformed_const_column; + DataTypePtr transformed_const_type; + bool constant_transformed = applyFunctionChainToColumn( + const_column, + out_type, + transform_functions, + transformed_const_column, + transformed_const_type); + + if (!constant_transformed) + return false; + + out_value = (*transformed_const_column)[0]; + out_type = transformed_const_type; + return true; } /// Looking for possible transformation of `column = constant` into `partition_expr = function(constant)` @@ -1173,28 +1083,48 @@ bool KeyCondition::canConstantBeWrappedByFunctions( if (out_value.isNull()) return false; - return transformConstantWithValidFunctions( + MonotonicFunctionsChain transform_functions; + auto can_transform_constant = extractMonotonicFunctionsChainFromKey( node.getTreeContext().getQueryContext(), expr_name, out_key_column_num, out_key_column_type, - out_value, + transform_functions, + [](const IFunctionBase & func, const IDataType &) { return func.isDeterministic(); }); + + if (!can_transform_constant) + return false; + + auto const_column = out_type->createColumnConst(1, out_value); + + ColumnPtr transformed_const_column; + DataTypePtr transformed_const_type; + bool constant_transformed = applyFunctionChainToColumn( + const_column, out_type, - [](const IFunctionBase & func, const IDataType &) - { - return func.isDeterministic(); - }); + transform_functions, + transformed_const_column, + transformed_const_type); + + if (!constant_transformed) + return false; + + out_value = (*transformed_const_column)[0]; + out_type = transformed_const_type; + return true; } bool KeyCondition::tryPrepareSetIndex( const RPNBuilderFunctionTreeNode & func, RPNElement & out, - size_t & out_key_column_num) + size_t & out_key_column_num, + bool & is_constant_transformed) { const auto & left_arg = func.getArgumentAt(0); out_key_column_num = 0; std::vector indexes_mapping; + std::vector set_transforming_chains; DataTypes data_types; auto get_key_tuple_position_mapping = [&](const RPNBuilderTreeNode & node, size_t tuple_index) @@ -1203,6 +1133,7 @@ bool KeyCondition::tryPrepareSetIndex( index_mapping.tuple_index = tuple_index; DataTypePtr data_type; std::optional key_space_filling_curve_argument_pos; + MonotonicFunctionsChain set_transforming_chain; if (isKeyPossiblyWrappedByMonotonicFunctions( node, index_mapping.key_index, key_space_filling_curve_argument_pos, data_type, index_mapping.functions) && !key_space_filling_curve_argument_pos) /// We don't support the analysis of space-filling curves and IN set. @@ -1210,6 +1141,15 @@ bool KeyCondition::tryPrepareSetIndex( indexes_mapping.push_back(index_mapping); data_types.push_back(data_type); out_key_column_num = std::max(out_key_column_num, index_mapping.key_index); + set_transforming_chains.push_back(set_transforming_chain); + } + // For partition index, checking if set can be transformed to prune any partitions + else if (single_point && canSetValuesBeWrappedByFunctions(node, index_mapping.key_index, data_type, set_transforming_chain)) + { + indexes_mapping.push_back(index_mapping); + data_types.push_back(data_type); + out_key_column_num = std::max(out_key_column_num, index_mapping.key_index); + set_transforming_chains.push_back(set_transforming_chain); } }; @@ -1275,6 +1215,23 @@ bool KeyCondition::tryPrepareSetIndex( auto set_element_type = set_types[set_element_index]; auto set_column = set_columns[set_element_index]; + if (!set_transforming_chains[indexes_mapping_index].empty()) + { + ColumnPtr transformed_set_column; + DataTypePtr transformed_set_type; + if (!applyFunctionChainToColumn( + set_column, + set_element_type, + set_transforming_chains[indexes_mapping_index], + transformed_set_column, + transformed_set_type)) + return false; + + set_column = transformed_set_column; + set_element_type = transformed_set_type; + is_constant_transformed = true; + } + if (canBeSafelyCasted(set_element_type, key_column_type)) { set_columns[set_element_index] = castColumn({set_column, set_element_type, {}}, key_column_type); @@ -1571,6 +1528,191 @@ bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctionsImpl( return false; } +/** When table's key has expression with these functions from a column, + * and when a column in a query is compared with a constant, such as: + * CREATE TABLE (x String) ORDER BY toDate(x) + * SELECT ... WHERE x LIKE 'Hello%' + * we want to apply the function to the constant for index analysis, + * but should modify it to pass on un-parsable values. + */ +static std::set date_time_parsing_functions = { + "toDate", + "toDate32", + "toDateTime", + "toDateTime64", + "parseDateTimeBestEffort", + "parseDateTimeBestEffortUS", + "parseDateTime32BestEffort", + "parseDateTime64BestEffort", + "parseDateTime", + "parseDateTimeInJodaSyntax", +}; + +/** The key functional expression constraint may be inferred from a plain column in the expression. + * For example, if the key contains `toStartOfHour(Timestamp)` and query contains `WHERE Timestamp >= now()`, + * it can be assumed that if `toStartOfHour()` is monotonic on [now(), inf), the `toStartOfHour(Timestamp) >= toStartOfHour(now())` + * condition also holds, so the index may be used to select only parts satisfying this condition. + * + * To check the assumption, we'd need to assert that the inverse function to this transformation is also monotonic, however the + * inversion isn't exported (or even viable for not strictly monotonic functions such as `toStartOfHour()`). + * Instead, we can qualify only functions that do not transform the range (for example rounding), + * which while not strictly monotonic, are monotonic everywhere on the input range. + */ +bool KeyCondition::extractMonotonicFunctionsChainFromKey( + ContextPtr context, + const String & expr_name, + size_t & out_key_column_num, + DataTypePtr & out_key_column_type, + MonotonicFunctionsChain & out_functions_chain, + std::function always_monotonic) const +{ + const auto & sample_block = key_expr->getSampleBlock(); + + for (const auto & node : key_expr->getNodes()) + { + auto it = key_columns.find(node.result_name); + if (it != key_columns.end()) + { + std::stack chain; + + const auto * cur_node = &node; + bool is_valid_chain = true; + + while (is_valid_chain) + { + if (cur_node->result_name == expr_name) + break; + + chain.push(cur_node); + + if (cur_node->type == ActionsDAG::ActionType::FUNCTION && cur_node->children.size() <= 2) + { + is_valid_chain = always_monotonic(*cur_node->function_base, *cur_node->result_type); + + const ActionsDAG::Node * next_node = nullptr; + for (const auto * arg : cur_node->children) + { + if (arg->column && isColumnConst(*arg->column)) + continue; + + if (next_node) + is_valid_chain = false; + + next_node = arg; + } + + if (!next_node) + is_valid_chain = false; + + cur_node = next_node; + } + else if (cur_node->type == ActionsDAG::ActionType::ALIAS) + cur_node = cur_node->children.front(); + else + is_valid_chain = false; + } + + if (is_valid_chain) + { + while (!chain.empty()) + { + const auto * func = chain.top(); + chain.pop(); + + if (func->type != ActionsDAG::ActionType::FUNCTION) + continue; + + auto func_name = func->function_base->getName(); + auto func_base = func->function_base; + + ColumnsWithTypeAndName arguments; + ColumnWithTypeAndName const_arg; + FunctionWithOptionalConstArg::Kind kind = FunctionWithOptionalConstArg::Kind::NO_CONST; + + if (date_time_parsing_functions.contains(func_name)) + { + const auto & arg_types = func_base->getArgumentTypes(); + if (!arg_types.empty() && isStringOrFixedString(arg_types[0])) + { + func_name = func_name + "OrNull"; + } + + } + + auto func_builder = FunctionFactory::instance().tryGet(func_name, context); + + if (func->children.size() == 1) + { + arguments.push_back({nullptr, removeLowCardinality(func->children[0]->result_type), ""}); + } + else if (func->children.size() == 2) + { + const auto * left = func->children[0]; + const auto * right = func->children[1]; + if (left->column && isColumnConst(*left->column)) + { + const_arg = {left->result_type->createColumnConst(0, (*left->column)[0]), left->result_type, ""}; + arguments.push_back(const_arg); + arguments.push_back({nullptr, removeLowCardinality(right->result_type), ""}); + kind = FunctionWithOptionalConstArg::Kind::LEFT_CONST; + } + else + { + const_arg = {right->result_type->createColumnConst(0, (*right->column)[0]), right->result_type, ""}; + arguments.push_back({nullptr, removeLowCardinality(left->result_type), ""}); + arguments.push_back(const_arg); + kind = FunctionWithOptionalConstArg::Kind::RIGHT_CONST; + } + } + + auto out_func = func_builder->build(arguments); + if (kind == FunctionWithOptionalConstArg::Kind::NO_CONST) + out_functions_chain.push_back(out_func); + else + out_functions_chain.push_back(std::make_shared(out_func, const_arg, kind)); + } + + out_key_column_num = it->second; + out_key_column_type = sample_block.getByName(it->first).type; + return true; + } + } + } + + return false; +} + +bool KeyCondition::canSetValuesBeWrappedByFunctions( + const RPNBuilderTreeNode & node, + size_t & out_key_column_num, + DataTypePtr & out_key_res_column_type, + MonotonicFunctionsChain & out_functions_chain) +{ + // Checking if column name matches any of key subexpressions + String expr_name = node.getColumnName(); + + if (array_joined_column_names.contains(expr_name)) + return false; + + if (!key_subexpr_names.contains(expr_name)) + { + expr_name = node.getColumnNameWithModuloLegacy(); + + if (!key_subexpr_names.contains(expr_name)) + return false; + } + + return extractMonotonicFunctionsChainFromKey( + node.getTreeContext().getQueryContext(), + expr_name, + out_key_column_num, + out_key_res_column_type, + out_functions_chain, + [](const IFunctionBase & func, const IDataType &) + { + return func.isDeterministic(); + }); +} static void castValueToType(const DataTypePtr & desired_type, Field & src_value, const DataTypePtr & src_type, const String & node_column_name) { @@ -1649,7 +1791,7 @@ bool KeyCondition::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNEleme if (functionIsInOrGlobalInOperator(func_name)) { - if (tryPrepareSetIndex(func, out, key_column_num)) + if (tryPrepareSetIndex(func, out, key_column_num, is_constant_transformed)) { key_arg_pos = 0; is_set_const = true; diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index 6e5956706aa..9e2218d7a29 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -14,6 +14,7 @@ #include #include +#include "DataTypes/Serializations/ISerialization.h" namespace DB @@ -253,13 +254,12 @@ private: DataTypePtr & out_key_column_type, std::vector & out_functions_chain); - bool transformConstantWithValidFunctions( + bool extractMonotonicFunctionsChainFromKey( ContextPtr context, const String & expr_name, size_t & out_key_column_num, DataTypePtr & out_key_column_type, - Field & out_value, - DataTypePtr & out_type, + MonotonicFunctionsChain & out_functions_chain, std::function always_monotonic) const; bool canConstantBeWrappedByMonotonicFunctions( @@ -276,13 +276,25 @@ private: Field & out_value, DataTypePtr & out_type); + /// Checks if node is a subexpression of any of key columns expressions, + /// wrapped by deterministic functions, and if so, returns `true`, and + /// specifies key column position / type. Besides that it produces the + /// chain of functions which should be executed on set, to transform it + /// into key column values. + bool canSetValuesBeWrappedByFunctions( + const RPNBuilderTreeNode & node, + size_t & out_key_column_num, + DataTypePtr & out_key_res_column_type, + MonotonicFunctionsChain & out_functions_chain); + /// If it's possible to make an RPNElement /// that will filter values (possibly tuples) by the content of 'prepared_set', /// do it and return true. bool tryPrepareSetIndex( const RPNBuilderFunctionTreeNode & func, RPNElement & out, - size_t & out_key_column_num); + size_t & out_key_column_num, + bool & is_constant_transformed); /// Checks that the index can not be used. /// diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 79efb0ca8b3..4c947487d21 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 7ab8fa2430a..fc64fae9a58 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -16,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -422,6 +424,16 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() return false; } +bool MergeTask::enabledBlockNumberColumn(GlobalRuntimeContextPtr global_ctx) +{ + return global_ctx->data->getSettings()->enable_block_number_column && global_ctx->metadata_snapshot->getGroupByTTLs().empty(); +} + +bool MergeTask::enabledBlockOffsetColumn(GlobalRuntimeContextPtr global_ctx) +{ + return global_ctx->data->getSettings()->enable_block_offset_column && global_ctx->metadata_snapshot->getGroupByTTLs().empty(); +} + void MergeTask::addGatheringColumn(GlobalRuntimeContextPtr global_ctx, const String & name, const DataTypePtr & type) { if (global_ctx->storage_columns.contains(name)) @@ -555,18 +567,18 @@ bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const if (!reread_buf) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot read temporary file {}", ctx->rows_sources_uncompressed_write_buf->getFileName()); - auto * reread_buffer_raw = dynamic_cast(reread_buf.get()); + auto * reread_buffer_raw = dynamic_cast(reread_buf.get()); if (!reread_buffer_raw) { const auto & reread_buf_ref = *reread_buf; - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ReadBufferFromFile, but got {}", demangle(typeid(reread_buf_ref).name())); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ReadBufferFromFileBase, but got {}", demangle(typeid(reread_buf_ref).name())); } /// Move ownership from std::unique_ptr to std::unique_ptr for CompressedReadBufferFromFile. /// First, release ownership from unique_ptr to base type. reread_buf.release(); /// NOLINT(bugprone-unused-return-value,hicpp-ignored-remove-result): we already have the pointer value in `reread_buffer_raw` /// Then, move ownership to unique_ptr to concrete type. - std::unique_ptr reread_buffer_from_file(reread_buffer_raw); + std::unique_ptr reread_buffer_from_file(reread_buffer_raw); /// CompressedReadBufferFromFile expects std::unique_ptr as argument. ctx->rows_sources_read_buf = std::make_unique(std::move(reread_buffer_from_file)); diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index 56909d1b7a0..8b0f2130e8e 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -408,15 +408,8 @@ private: Stages::const_iterator stages_iterator = stages.begin(); - static bool enabledBlockNumberColumn(GlobalRuntimeContextPtr global_ctx) - { - return global_ctx->data->getSettings()->enable_block_number_column && global_ctx->metadata_snapshot->getGroupByTTLs().empty(); - } - - static bool enabledBlockOffsetColumn(GlobalRuntimeContextPtr global_ctx) - { - return global_ctx->data->getSettings()->enable_block_offset_column && global_ctx->metadata_snapshot->getGroupByTTLs().empty(); - } + static bool enabledBlockNumberColumn(GlobalRuntimeContextPtr global_ctx); + static bool enabledBlockOffsetColumn(GlobalRuntimeContextPtr global_ctx); static void addGatheringColumn(GlobalRuntimeContextPtr global_ctx, const String & name, const DataTypePtr & type); }; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 16758fa12e0..78a551591a6 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -73,6 +74,7 @@ #include #include #include +#include #include #include #include @@ -87,6 +89,7 @@ #include #include +#include #include #include @@ -473,7 +476,7 @@ StoragePolicyPtr MergeTreeData::getStoragePolicy() const ConditionSelectivityEstimator MergeTreeData::getConditionSelectivityEstimatorByPredicate( const StorageSnapshotPtr & storage_snapshot, const ActionsDAGPtr & filter_dag, ContextPtr local_context) const { - if (!local_context->getSettings().allow_statistics_optimize) + if (!local_context->getSettingsRef().allow_statistics_optimize) return {}; const auto & parts = assert_cast(*storage_snapshot->data).parts; @@ -2464,7 +2467,7 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force) } /// Grab only parts that are not used by anyone (SELECTs for example). - if (!part.unique()) + if (!isSharedPtrUnique(part)) { part->removal_state.store(DataPartRemovalState::NON_UNIQUE_OWNERSHIP, std::memory_order_relaxed); skipped_parts.push_back(part->info); @@ -4360,13 +4363,13 @@ bool MergeTreeData::tryRemovePartImmediately(DataPartPtr && part) part.reset(); - if (!((*it)->getState() == DataPartState::Outdated && it->unique())) + if (!((*it)->getState() == DataPartState::Outdated && isSharedPtrUnique(*it))) { if ((*it)->getState() != DataPartState::Outdated) LOG_WARNING(log, "Cannot immediately remove part {} because it's not in Outdated state " "usage counter {}", part_name_with_state, it->use_count()); - if (!it->unique()) + if (!isSharedPtrUnique(*it)) LOG_WARNING(log, "Cannot immediately remove part {} because someone using it right now " "usage counter {}", part_name_with_state, it->use_count()); return false; @@ -4432,7 +4435,7 @@ size_t MergeTreeData::getNumberOfOutdatedPartsWithExpiredRemovalTime() const for (const auto & part : outdated_parts_range) { auto part_remove_time = part->remove_time.load(std::memory_order_relaxed); - if (part_remove_time <= time_now && time_now - part_remove_time >= getSettings()->old_parts_lifetime.totalSeconds() && part.unique()) + if (part_remove_time <= time_now && time_now - part_remove_time >= getSettings()->old_parts_lifetime.totalSeconds() && isSharedPtrUnique(part)) ++res; } @@ -6172,6 +6175,11 @@ bool MergeTreeData::hasProjection() const return false; } +bool MergeTreeData::areAsynchronousInsertsEnabled() const +{ + return getSettings()->async_insert; +} + MergeTreeData::ProjectionPartsVector MergeTreeData::getAllProjectionPartsVector(MergeTreeData::DataPartStateVector * out_states) const { ProjectionPartsVector res; @@ -7077,6 +7085,20 @@ QueryProcessingStage::Enum MergeTreeData::getQueryProcessingStage( /// with new analyzer, Planner make decision regarding parallel replicas usage, and so about processing stage on reading if (!query_context->getSettingsRef().allow_experimental_analyzer) { + const auto & settings = query_context->getSettingsRef(); + if (query_context->canUseParallelReplicasCustomKey()) + { + if (query_context->getClientInfo().distributed_depth > 0) + return QueryProcessingStage::FetchColumns; + + if (!supportsReplication() && !settings.parallel_replicas_for_non_replicated_merge_tree) + return QueryProcessingStage::Enum::FetchColumns; + + if (to_stage >= QueryProcessingStage::WithMergeableState + && query_context->canUseParallelReplicasCustomKeyForCluster(*query_context->getClusterForParallelReplicas())) + return QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit; + } + if (query_context->getClientInfo().collaborate_with_initiator) return QueryProcessingStage::Enum::FetchColumns; @@ -7088,7 +7110,7 @@ QueryProcessingStage::Enum MergeTreeData::getQueryProcessingStage( return QueryProcessingStage::Enum::WithMergeableState; /// For non-replicated MergeTree we allow them only if parallel_replicas_for_non_replicated_merge_tree is enabled - if (query_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree) + if (settings.parallel_replicas_for_non_replicated_merge_tree) return QueryProcessingStage::Enum::WithMergeableState; } } @@ -7379,6 +7401,13 @@ std::pair MergeTreeData::cloneAn return std::make_pair(dst_data_part, std::move(temporary_directory_lock)); } +bool MergeTreeData::canUseAdaptiveGranularity() const +{ + const auto settings = getSettings(); + return settings->index_granularity_bytes != 0 + && (settings->enable_mixed_granularity_parts || !has_non_adaptive_index_granularity_parts); +} + String MergeTreeData::getFullPathOnDisk(const DiskPtr & disk) const { return disk->getPath() + relative_data_path; @@ -8608,6 +8637,38 @@ void MergeTreeData::unloadPrimaryKeys() } } +size_t MergeTreeData::unloadPrimaryKeysOfOutdatedParts() +{ + /// If the method is already called from another thread, then we don't need to do anything. + std::unique_lock lock(unload_primary_key_mutex, std::defer_lock); + if (!lock.try_lock()) + return 0; + + DataPartsVector parts_to_unload_index; + + { + auto parts_lock = lockParts(); + auto parts_range = getDataPartsStateRange(DataPartState::Outdated); + + for (const auto & part : parts_range) + { + /// Outdated part may be hold by SELECT query and still needs the index. + /// This check requires lock of index_mutex but if outdated part is unique then there is no + /// contention on it, so it's relatively cheap and it's ok to check under a global parts lock. + if (isSharedPtrUnique(part) && part->isIndexLoaded()) + parts_to_unload_index.push_back(part); + } + } + + for (const auto & part : parts_to_unload_index) + { + const_cast(*part).unloadIndex(); + LOG_TEST(log, "Unloaded primary key for outdated part {}", part->name); + } + + return parts_to_unload_index.size(); +} + void MergeTreeData::verifySortingKey(const KeyDescription & sorting_key) { /// Aggregate functions already forbidden, but SimpleAggregateFunction are not diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index ba227a00506..7076b680521 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -65,6 +64,7 @@ using BackupEntries = std::vector; +struct MergeTreeSettings; struct WriteSettings; /// Auxiliary struct holding information about the future merged or mutated part. @@ -441,7 +441,7 @@ public: bool hasProjection() const override; - bool areAsynchronousInsertsEnabled() const override { return getSettings()->async_insert; } + bool areAsynchronousInsertsEnabled() const override; bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override; @@ -860,12 +860,7 @@ public: /// Returns true if table can create new parts with adaptive granularity /// Has additional constraint in replicated version - virtual bool canUseAdaptiveGranularity() const - { - const auto settings = getSettings(); - return settings->index_granularity_bytes != 0 && - (settings->enable_mixed_granularity_parts || !has_non_adaptive_index_granularity_parts); - } + virtual bool canUseAdaptiveGranularity() const; /// Get constant pointer to storage settings. /// Copy this pointer into your scope and you will @@ -1096,8 +1091,13 @@ public: static VirtualColumnsDescription createVirtuals(const StorageInMemoryMetadata & metadata); + /// Unloads primary keys of all parts. void unloadPrimaryKeys(); + /// Unloads primary keys of outdated parts that are not used by any query. + /// Returns the number of parts for which index was unloaded. + size_t unloadPrimaryKeysOfOutdatedParts(); + protected: friend class IMergeTreeDataPart; friend class MergeTreeDataMergerMutator; @@ -1260,6 +1260,8 @@ protected: std::mutex grab_old_parts_mutex; /// The same for clearOldTemporaryDirectories. std::mutex clear_old_temporary_directories_mutex; + /// The same for unloadPrimaryKeysOfOutdatedParts. + std::mutex unload_primary_key_mutex; void checkProperties( const StorageInMemoryMetadata & new_metadata, @@ -1728,14 +1730,6 @@ struct CurrentlySubmergingEmergingTagger ~CurrentlySubmergingEmergingTagger(); }; - -/// TODO: move it somewhere -[[ maybe_unused ]] static bool needSyncPart(size_t input_rows, size_t input_bytes, const MergeTreeSettings & settings) -{ - return ((settings.min_rows_to_fsync_after_merge && input_rows >= settings.min_rows_to_fsync_after_merge) - || (settings.min_compressed_bytes_to_fsync_after_merge && input_bytes >= settings.min_compressed_bytes_to_fsync_after_merge)); -} - /// Look at MutationCommands if it contains mutations for AlterConversions, update the counter. /// Return true if the counter had been updated bool updateAlterConversionsMutations(const MutationCommands & commands, std::atomic & alter_conversions_mutations, bool remove); diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 2d49e1df19b..3c223b8d748 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 21d046c76f2..52d12c9db7d 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -154,7 +154,8 @@ void writeColumnSingleGranule( const SerializationPtr & serialization, ISerialization::OutputStreamGetter stream_getter, size_t from_row, - size_t number_of_rows) + size_t number_of_rows, + const MergeTreeWriterSettings & settings) { ISerialization::SerializeBinaryBulkStatePtr state; ISerialization::SerializeBinaryBulkSettings serialize_settings; @@ -162,6 +163,7 @@ void writeColumnSingleGranule( serialize_settings.getter = stream_getter; serialize_settings.position_independent_encoding = true; serialize_settings.low_cardinality_max_dictionary_size = 0; + serialize_settings.use_compact_variant_discriminators_serialization = settings.use_compact_variant_discriminators_serialization; serialize_settings.dynamic_write_statistics = ISerialization::SerializeBinaryBulkSettings::DynamicStatisticsMode::PREFIX; serialization->serializeBinaryBulkStatePrefix(*column.column, serialize_settings, state); @@ -259,7 +261,7 @@ void MergeTreeDataPartWriterCompact::writeDataBlock(const Block & block, const G writeColumnSingleGranule( block.getByName(name_and_type->name), getSerialization(name_and_type->name), - stream_getter, granule.start_row, granule.rows_to_write); + stream_getter, granule.start_row, granule.rows_to_write, settings); /// Each type always have at least one substream prev_stream->hashing_buf.next(); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index a576720294f..6dc7e649b06 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -254,6 +255,12 @@ void MergeTreeDataPartWriterOnDisk::initPrimaryIndex() index_compressor_stream = std::make_unique(*index_file_hashing_stream, primary_key_compression_codec, settings.primary_key_compress_block_size); index_source_hashing_stream = std::make_unique(*index_compressor_stream); } + + const auto & primary_key_types = metadata_snapshot->getPrimaryKey().data_types; + index_serializations.reserve(primary_key_types.size()); + + for (const auto & type : primary_key_types) + index_serializations.push_back(type->getDefaultSerialization()); } } @@ -299,22 +306,33 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices() store = std::make_shared(stream_name, data_part_storage, data_part_storage, storage_settings->max_digestion_size_per_segment); gin_index_stores[stream_name] = store; } + skip_indices_aggregators.push_back(skip_index->createIndexAggregatorForPart(store, settings)); skip_index_accumulated_marks.push_back(0); } } +void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndexRow(const Block & index_block, size_t row) +{ + chassert(index_block.columns() == index_serializations.size()); + auto & index_stream = compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream; + + for (size_t i = 0; i < index_block.columns(); ++i) + { + const auto & column = index_block.getByPosition(i).column; + + index_columns[i]->insertFrom(*column, row); + index_serializations[i]->serializeBinary(*column, row, index_stream, {}); + } +} + void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Block & primary_index_block, const Granules & granules_to_write) { - size_t primary_columns_num = primary_index_block.columns(); + if (!metadata_snapshot->hasPrimaryKey()) + return; + if (index_columns.empty()) - { - index_types = primary_index_block.getDataTypes(); - index_columns.resize(primary_columns_num); - last_block_index_columns.resize(primary_columns_num); - for (size_t i = 0; i < primary_columns_num; ++i) - index_columns[i] = primary_index_block.getByPosition(i).column->cloneEmpty(); - } + index_columns = primary_index_block.cloneEmptyColumns(); { /** While filling index (index_columns), disable memory tracker. @@ -328,22 +346,14 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Bloc /// Write index. The index contains Primary Key value for each `index_granularity` row. for (const auto & granule : granules_to_write) { - if (metadata_snapshot->hasPrimaryKey() && granule.mark_on_start) - { - for (size_t j = 0; j < primary_columns_num; ++j) - { - const auto & primary_column = primary_index_block.getByPosition(j); - index_columns[j]->insertFrom(*primary_column.column, granule.start_row); - primary_column.type->getDefaultSerialization()->serializeBinary( - *primary_column.column, granule.start_row, compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream, {}); - } - } + if (granule.mark_on_start) + calculateAndSerializePrimaryIndexRow(primary_index_block, granule.start_row); } } - /// store last index row to write final mark at the end of column - for (size_t j = 0; j < primary_columns_num; ++j) - last_block_index_columns[j] = primary_index_block.getByPosition(j).column; + /// Store block with last index row to write final mark at the end of column + if (with_final_mark) + last_index_block = primary_index_block; } void MergeTreeDataPartWriterOnDisk::calculateAndSerializeStatistics(const Block & block) @@ -420,17 +430,11 @@ void MergeTreeDataPartWriterOnDisk::fillPrimaryIndexChecksums(MergeTreeData::Dat if (index_file_hashing_stream) { - if (write_final_mark) + if (write_final_mark && last_index_block) { - for (size_t j = 0; j < index_columns.size(); ++j) - { - const auto & column = *last_block_index_columns[j]; - size_t last_row_number = column.size() - 1; - index_columns[j]->insertFrom(column, last_row_number); - index_types[j]->getDefaultSerialization()->serializeBinary( - column, last_row_number, compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream, {}); - } - last_block_index_columns.clear(); + MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; + calculateAndSerializePrimaryIndexRow(last_index_block, last_index_block.rows() - 1); + last_index_block.clear(); } if (compress_primary_key) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index bdf0fdb7f32..8d84442981e 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -173,10 +173,10 @@ protected: std::unique_ptr index_source_hashing_stream; bool compress_primary_key; - DataTypes index_types; - /// Index columns from the last block - /// It's written to index file in the `writeSuffixAndFinalizePart` method - Columns last_block_index_columns; + /// Last block with index columns. + /// It's written to index file in the `writeSuffixAndFinalizePart` method. + Block last_index_block; + Serializations index_serializations; bool data_written = false; @@ -193,6 +193,7 @@ private: void initStatistics(); virtual void fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) = 0; + void calculateAndSerializePrimaryIndexRow(const Block & index_block, size_t row); struct ExecutionStatistics { diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 5ba326cef0c..3fbabe1dd52 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB { @@ -433,6 +434,7 @@ void MergeTreeDataPartWriterWide::writeColumn( if (inserted) { ISerialization::SerializeBinaryBulkSettings serialize_settings; + serialize_settings.use_compact_variant_discriminators_serialization = settings.use_compact_variant_discriminators_serialization; serialize_settings.getter = createStreamGetter(name_and_type, offset_columns); serialization->serializeBinaryBulkStatePrefix(column, serialize_settings, it->second); } @@ -441,6 +443,7 @@ void MergeTreeDataPartWriterWide::writeColumn( serialize_settings.getter = createStreamGetter(name_and_type, offset_columns); serialize_settings.low_cardinality_max_dictionary_size = settings.low_cardinality_max_dictionary_size; serialize_settings.low_cardinality_use_single_dictionary_for_part = settings.low_cardinality_use_single_dictionary_for_part; + serialize_settings.use_compact_variant_discriminators_serialization = settings.use_compact_variant_discriminators_serialization; for (const auto & granule : granules) { @@ -558,7 +561,10 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai if (index_granularity_rows != index_granularity.getMarkRows(mark_num)) { - throw Exception( + /// With fixed granularity we can have last mark with less rows than granularity + const bool is_last_mark = (mark_num + 1 == index_granularity.getMarksCount()); + if (!index_granularity_info.fixed_index_granularity || !is_last_mark) + throw Exception( ErrorCodes::LOGICAL_ERROR, "Incorrect mark rows for part {} for mark #{}" " (compressed offset {}, decompressed offset {}), in-memory {}, on disk {}, total marks {}", @@ -627,6 +633,7 @@ void MergeTreeDataPartWriterWide::fillDataChecksums(MergeTreeDataPartChecksums & ISerialization::SerializeBinaryBulkSettings serialize_settings; serialize_settings.low_cardinality_max_dictionary_size = settings.low_cardinality_max_dictionary_size; serialize_settings.low_cardinality_use_single_dictionary_for_part = settings.low_cardinality_use_single_dictionary_for_part; + serialize_settings.use_compact_variant_discriminators_serialization = settings.use_compact_variant_discriminators_serialization; WrittenOffsetColumns offset_columns; if (rows_written_in_last_mark > 0) { @@ -821,7 +828,14 @@ void MergeTreeDataPartWriterWide::adjustLastMarkIfNeedAndFlushToDisk(size_t new_ /// Without offset rows_written_in_last_mark = 0; } + + if (compute_granularity) + { + index_granularity.popMark(); + index_granularity.appendMark(new_rows_in_last_mark); + } } + } } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 2e287ff3042..c5214f0f3d3 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -36,6 +37,7 @@ #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 5c8aa32949d..73244b714bf 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -12,12 +12,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.cpp b/src/Storages/MergeTree/MergeTreeIOSettings.cpp new file mode 100644 index 00000000000..58c3bd28d6a --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeIOSettings.cpp @@ -0,0 +1,37 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +MergeTreeWriterSettings::MergeTreeWriterSettings( + const Settings & global_settings, + const WriteSettings & query_write_settings_, + const MergeTreeSettingsPtr & storage_settings, + bool can_use_adaptive_granularity_, + bool rewrite_primary_key_, + bool blocks_are_granules_size_) + : min_compress_block_size( + storage_settings->min_compress_block_size ? storage_settings->min_compress_block_size : global_settings.min_compress_block_size) + , max_compress_block_size( + storage_settings->max_compress_block_size ? storage_settings->max_compress_block_size : global_settings.max_compress_block_size) + , marks_compression_codec(storage_settings->marks_compression_codec) + , marks_compress_block_size(storage_settings->marks_compress_block_size) + , compress_primary_key(storage_settings->compress_primary_key) + , primary_key_compression_codec(storage_settings->primary_key_compression_codec) + , primary_key_compress_block_size(storage_settings->primary_key_compress_block_size) + , can_use_adaptive_granularity(can_use_adaptive_granularity_) + , rewrite_primary_key(rewrite_primary_key_) + , blocks_are_granules_size(blocks_are_granules_size_) + , query_write_settings(query_write_settings_) + , max_threads_for_annoy_index_creation(global_settings.max_threads_for_annoy_index_creation) + , low_cardinality_max_dictionary_size(global_settings.low_cardinality_max_dictionary_size) + , low_cardinality_use_single_dictionary_for_part(global_settings.low_cardinality_use_single_dictionary_for_part != 0) + , use_compact_variant_discriminators_serialization(storage_settings->use_compact_variant_discriminators_serialization) +{ +} + +} diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.h b/src/Storages/MergeTree/MergeTreeIOSettings.h index a9125b4047e..c79ca1e66ee 100644 --- a/src/Storages/MergeTree/MergeTreeIOSettings.h +++ b/src/Storages/MergeTree/MergeTreeIOSettings.h @@ -1,15 +1,17 @@ #pragma once #include -#include -#include -#include #include #include +#include namespace DB { +struct MergeTreeSettings; +using MergeTreeSettingsPtr = std::shared_ptr; +struct Settings; + class MMappedFileCache; using MMappedFileCachePtr = std::shared_ptr; @@ -58,26 +60,7 @@ struct MergeTreeWriterSettings const MergeTreeSettingsPtr & storage_settings, bool can_use_adaptive_granularity_, bool rewrite_primary_key_, - bool blocks_are_granules_size_ = false) - : min_compress_block_size( - storage_settings->min_compress_block_size ? storage_settings->min_compress_block_size : global_settings.min_compress_block_size) - , max_compress_block_size( - storage_settings->max_compress_block_size ? storage_settings->max_compress_block_size - : global_settings.max_compress_block_size) - , marks_compression_codec(storage_settings->marks_compression_codec) - , marks_compress_block_size(storage_settings->marks_compress_block_size) - , compress_primary_key(storage_settings->compress_primary_key) - , primary_key_compression_codec(storage_settings->primary_key_compression_codec) - , primary_key_compress_block_size(storage_settings->primary_key_compress_block_size) - , can_use_adaptive_granularity(can_use_adaptive_granularity_) - , rewrite_primary_key(rewrite_primary_key_) - , blocks_are_granules_size(blocks_are_granules_size_) - , query_write_settings(query_write_settings_) - , max_threads_for_annoy_index_creation(global_settings.max_threads_for_annoy_index_creation) - , low_cardinality_max_dictionary_size(global_settings.low_cardinality_max_dictionary_size) - , low_cardinality_use_single_dictionary_for_part(global_settings.low_cardinality_use_single_dictionary_for_part != 0) - { - } + bool blocks_are_granules_size_ = false); size_t min_compress_block_size; size_t max_compress_block_size; @@ -98,6 +81,7 @@ struct MergeTreeWriterSettings size_t low_cardinality_max_dictionary_size; bool low_cardinality_use_single_dictionary_for_part; + bool use_compact_variant_discriminators_serialization; }; } diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp index e492ca0aec2..c66aa2373e7 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -230,7 +231,7 @@ MergeTreeIndexConditionAnnoy::MergeTreeIndexConditionAnnoy( ContextPtr context) : ann_condition(query, context) , distance_function(distance_function_) - , search_k(context->getSettings().annoy_index_search_k_nodes) + , search_k(context->getSettingsRef().annoy_index_search_k_nodes) {} bool MergeTreeIndexConditionAnnoy::mayBeTrueOnGranule(MergeTreeIndexGranulePtr /*idx_granule*/) const diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp index 62bb31dff68..067a692a3b5 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp @@ -1,5 +1,6 @@ -#include #include +#include +#include namespace fs = std::filesystem; diff --git a/src/Storages/MergeTree/MergeTreePartInfo.h b/src/Storages/MergeTree/MergeTreePartInfo.h index 9bb79e21144..f128722b03b 100644 --- a/src/Storages/MergeTree/MergeTreePartInfo.h +++ b/src/Storages/MergeTree/MergeTreePartInfo.h @@ -101,9 +101,8 @@ struct MergeTreePartInfo bool isFakeDropRangePart() const { - /// Another max level was previously used for REPLACE/MOVE PARTITION - auto another_max_level = std::numeric_limits::max(); - return level == MergeTreePartInfo::MAX_LEVEL || level == another_max_level; + /// LEGACY_MAX_LEVEL was previously used for REPLACE/MOVE PARTITION + return level == MergeTreePartInfo::MAX_LEVEL || level == MergeTreePartInfo::LEGACY_MAX_LEVEL; } String getPartNameAndCheckFormat(MergeTreeDataFormatVersion format_version) const; diff --git a/src/Storages/MergeTree/MergeTreePartsMover.cpp b/src/Storages/MergeTree/MergeTreePartsMover.cpp index 1db70162bff..9223d6fd5b1 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -1,5 +1,6 @@ -#include #include +#include +#include #include #include diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index 2c249f7b63b..b6f626c992b 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -14,6 +14,7 @@ #include #include #include +#include namespace ProfileEvents diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index dc1ba030f45..1daf46b6e25 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include @@ -335,4 +336,12 @@ void MergeTreeReadPool::fillPerThreadInfo(size_t threads, size_t sum_marks) } } +MergeTreeReadPool::BackoffSettings::BackoffSettings(const DB::Settings& settings) + : min_read_latency_ms(settings.read_backoff_min_latency_ms.totalMilliseconds()), + max_throughput(settings.read_backoff_max_throughput), + min_interval_between_events_ms(settings.read_backoff_min_interval_between_events_ms.totalMilliseconds()), + min_events(settings.read_backoff_min_events), + min_concurrency(settings.read_backoff_min_concurrency) +{} + } diff --git a/src/Storages/MergeTree/MergeTreeReadPool.h b/src/Storages/MergeTree/MergeTreeReadPool.h index cb0e8a9657f..cd12b9db9fd 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.h +++ b/src/Storages/MergeTree/MergeTreeReadPool.h @@ -64,12 +64,7 @@ public: size_t min_concurrency = 1; /// Constants above is just an example. - explicit BackoffSettings(const Settings & settings) - : min_read_latency_ms(settings.read_backoff_min_latency_ms.totalMilliseconds()), - max_throughput(settings.read_backoff_max_throughput), - min_interval_between_events_ms(settings.read_backoff_min_interval_between_events_ms.totalMilliseconds()), - min_events(settings.read_backoff_min_events), - min_concurrency(settings.read_backoff_min_concurrency) {} + explicit BackoffSettings(const Settings & settings); BackoffSettings() : min_read_latency_ms(0) {} }; diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 02f8d6f4f6a..ecf7ac08294 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp index 5d6f08d3c53..c968ad84936 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -115,6 +115,16 @@ void MergeTreeSettings::loadFromQuery(ASTStorage & storage_def, ContextPtr conte #undef ADD_IF_ABSENT } +bool MergeTreeSettings::isReadonlySetting(const String & name) +{ + return name == "index_granularity" || name == "index_granularity_bytes" || name == "enable_mixed_granularity_parts"; +} + +bool MergeTreeSettings::isPartFormatSetting(const String & name) +{ + return name == "min_bytes_for_wide_part" || name == "min_rows_for_wide_part"; +} + void MergeTreeSettings::sanityCheck(size_t background_pool_tasks) const { if (number_of_free_entries_in_pool_to_execute_mutation > background_pool_tasks) diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 1f8d6abebd2..f5ada81cf55 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -43,6 +43,7 @@ struct Settings; M(UInt64, compact_parts_max_granules_to_buffer, 128, "Only available in ClickHouse Cloud", 0) \ M(UInt64, compact_parts_merge_max_bytes_to_prefetch_part, 16 * 1024 * 1024, "Only available in ClickHouse Cloud", 0) \ M(Bool, load_existing_rows_count_for_old_parts, false, "Whether to load existing_rows_count for existing parts. If false, existing_rows_count will be equal to rows_count for existing parts.", 0) \ + M(Bool, use_compact_variant_discriminators_serialization, true, "Use compact version of Variant discriminators serialization.", 0) \ \ /** Merge settings. */ \ M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \ @@ -267,17 +268,8 @@ struct MergeTreeSettings : public BaseSettings, public /// NOTE: will rewrite the AST to add immutable settings. void loadFromQuery(ASTStorage & storage_def, ContextPtr context, bool is_attach); - /// We check settings after storage creation - static bool isReadonlySetting(const String & name) - { - return name == "index_granularity" || name == "index_granularity_bytes" - || name == "enable_mixed_granularity_parts"; - } - - static bool isPartFormatSetting(const String & name) - { - return name == "min_bytes_for_wide_part" || name == "min_rows_for_wide_part"; - } + static bool isReadonlySetting(const String & name); + static bool isPartFormatSetting(const String & name); /// Check that the values are sane taking also query-level settings into account. void sanityCheck(size_t background_pool_tasks) const; @@ -294,4 +286,10 @@ namespace MergeTreeColumnSettings void validate(const SettingsChanges & changes); } +[[maybe_unused]] static bool needSyncPart(size_t input_rows, size_t input_bytes, const MergeTreeSettings & settings) +{ + return ( + (settings.min_rows_to_fsync_after_merge && input_rows >= settings.min_rows_to_fsync_after_merge) + || (settings.min_compressed_bytes_to_fsync_after_merge && input_bytes >= settings.min_compressed_bytes_to_fsync_after_merge)); +} } diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 05751e0fa6f..5c71994d68d 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace ProfileEvents { diff --git a/src/Storages/MergeTree/MergeTreeSource.cpp b/src/Storages/MergeTree/MergeTreeSource.cpp index fcf2dd76e3f..e323b9f9ee7 100644 --- a/src/Storages/MergeTree/MergeTreeSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSource.cpp @@ -133,9 +133,8 @@ private: }; #endif -MergeTreeSource::MergeTreeSource(MergeTreeSelectProcessorPtr processor_) - : ISource(processor_->getHeader()) - , processor(std::move(processor_)) +MergeTreeSource::MergeTreeSource(MergeTreeSelectProcessorPtr processor_, const std::string & log_name_) + : ISource(processor_->getHeader()), processor(std::move(processor_)), log_name(log_name_) { #if defined(OS_LINUX) if (processor->getSettings().use_asynchronous_read_from_pool) @@ -207,7 +206,7 @@ std::optional MergeTreeSource::tryGenerate() try { - OpenTelemetry::SpanHolder span{"MergeTreeSource::tryGenerate()"}; + OpenTelemetry::SpanHolder span{fmt::format("MergeTreeSource({})::tryGenerate", log_name)}; holder->setResult(processor->read()); } catch (...) @@ -222,7 +221,7 @@ std::optional MergeTreeSource::tryGenerate() } #endif - OpenTelemetry::SpanHolder span{"MergeTreeSource::tryGenerate()"}; + OpenTelemetry::SpanHolder span{fmt::format("MergeTreeSource({})::tryGenerate", log_name)}; return processReadResult(processor->read()); } diff --git a/src/Storages/MergeTree/MergeTreeSource.h b/src/Storages/MergeTree/MergeTreeSource.h index 655f0ee6ebe..fc39b4f9b09 100644 --- a/src/Storages/MergeTree/MergeTreeSource.h +++ b/src/Storages/MergeTree/MergeTreeSource.h @@ -12,7 +12,7 @@ struct ChunkAndProgress; class MergeTreeSource final : public ISource { public: - explicit MergeTreeSource(MergeTreeSelectProcessorPtr processor_); + explicit MergeTreeSource(MergeTreeSelectProcessorPtr processor_, const std::string & log_name_); ~MergeTreeSource() override; std::string getName() const override; @@ -30,6 +30,7 @@ protected: private: MergeTreeSelectProcessorPtr processor; + const std::string log_name; #if defined(OS_LINUX) struct AsyncReadingState; diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index a9a5fddace4..e7b3585ecda 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 164658c914e..38869aebaa5 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB @@ -93,6 +94,7 @@ struct MergedBlockOutputStream::Finalizer::Impl void MergedBlockOutputStream::Finalizer::finish() { std::unique_ptr to_finish = std::move(impl); + impl.reset(); if (to_finish) to_finish->finish(); } @@ -130,7 +132,19 @@ MergedBlockOutputStream::Finalizer::Finalizer(Finalizer &&) noexcept = default; MergedBlockOutputStream::Finalizer & MergedBlockOutputStream::Finalizer::operator=(Finalizer &&) noexcept = default; MergedBlockOutputStream::Finalizer::Finalizer(std::unique_ptr impl_) : impl(std::move(impl_)) {} -MergedBlockOutputStream::Finalizer::~Finalizer() = default; +MergedBlockOutputStream::Finalizer::~Finalizer() +{ + try + { + if (impl) + finish(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + void MergedBlockOutputStream::finalizePart( const MergeTreeMutableDataPartPtr & new_part, diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp index 5ae6517a236..c167ac87317 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index 4c96cbf2c97..73084f487b9 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp index 20f387137e7..9aec074deae 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index a552ee89aee..f74cccd518b 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1,8 +1,10 @@ #include +#include #include #include #include +#include #include #include #include @@ -19,11 +21,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index 5a84c6fd684..f46b4de10b7 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -444,6 +444,9 @@ void DefaultCoordinator::doHandleInitialAllRangesAnnouncement(InitialAllRangesAn ErrorCodes::LOGICAL_ERROR, "Replica number ({}) is bigger than total replicas count ({})", replica_num, stats.size()); ++stats[replica_num].number_of_requests; + + if (replica_status[replica_num].is_announcement_received) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Duplicate announcement received for replica number {}", replica_num); replica_status[replica_num].is_announcement_received = true; LOG_DEBUG(log, "Sent initial requests: {} Replicas count: {}", sent_initial_requests, replicas_count); diff --git a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp index 4228d7b70b6..10c7bef72fc 100644 --- a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp +++ b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/RPNBuilder.cpp b/src/Storages/MergeTree/RPNBuilder.cpp index 4a18d606bb7..93087b0d3fe 100644 --- a/src/Storages/MergeTree/RPNBuilder.cpp +++ b/src/Storages/MergeTree/RPNBuilder.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include diff --git a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp index 2fc5238827d..24929365b72 100644 --- a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp @@ -1,8 +1,9 @@ #include -#include #include +#include #include +#include #include diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp index 336d19692d4..6e22a3515bc 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index e034918ef57..7aef249c366 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -1,13 +1,14 @@ +#include +#include #include #include -#include -#include #include #include #include #include +#include namespace DB @@ -175,6 +176,8 @@ Float32 ReplicatedMergeTreeCleanupThread::iterate() cleaned_part_like += storage.clearEmptyParts(); } + cleaned_part_like += storage.unloadPrimaryKeysOfOutdatedParts(); + /// We need to measure the number of removed objects somehow (for better scheduling), /// but just summing the number of removed async blocks, logs, and empty parts does not make any sense. /// So we are trying to (approximately) measure the number of inserted blocks/parts, so we will be able to compare apples to apples. diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp index 192f0d23f96..3e64a4c7c52 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp @@ -1,6 +1,7 @@ +#include +#include #include #include -#include #include #include diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index d7601e6e638..dc242a7b084 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 30ba95c46f0..627bda3f8bf 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index 35f355d1d9b..268cead2a0e 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 4b4f4c33e7d..83e8a7a3bc5 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -9,8 +9,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -574,6 +576,16 @@ bool ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData::Mutabl } } +template +bool ReplicatedMergeTreeSinkImpl::lastBlockIsDuplicate() const +{ + /// If MV is responsible for deduplication, block is not considered duplicating. + if (context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views) + return false; + + return last_block_is_duplicate; +} + template std::vector ReplicatedMergeTreeSinkImpl::detectConflictsInAsyncBlockIDs(const std::vector & ids) { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index 39623c20584..8a37d85a750 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -60,14 +60,7 @@ public: bool writeExistingPart(MergeTreeData::MutableDataPartPtr & part); /// For proper deduplication in MaterializedViews - bool lastBlockIsDuplicate() const override - { - /// If MV is responsible for deduplication, block is not considered duplicating. - if (context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views) - return false; - - return last_block_is_duplicate; - } + bool lastBlockIsDuplicate() const override; struct DelayedChunk; private: diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 287a4d20543..c4bae5352cb 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index a94508ad41f..82ebbf09988 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -1,25 +1,17 @@ #pragma once +#include #include #include #include #include -#include -#include -#include -#include #include -#include -#include namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} +class QueryPlan; /// A Storage that allows reading from a single MergeTree data part. class StorageFromMergeTreeDataPart final : public IStorage @@ -46,20 +38,7 @@ public: String getName() const override { return "FromMergeTreeDataPart"; } - StorageSnapshotPtr getStorageSnapshot( - const StorageMetadataPtr & metadata_snapshot, ContextPtr /*query_context*/) const override - { - const auto & storage_columns = metadata_snapshot->getColumns(); - if (!hasDynamicSubcolumns(storage_columns)) - return std::make_shared(*this, metadata_snapshot); - - auto data_parts = storage.getDataPartsVectorForInternalUsage(); - - auto object_columns = getConcreteObjectColumns( - data_parts.begin(), data_parts.end(), storage_columns, [](const auto & part) -> const auto & { return part->getColumns(); }); - - return std::make_shared(*this, metadata_snapshot, std::move(object_columns)); - } + StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr /*query_context*/) const override; void read( QueryPlan & query_plan, @@ -69,21 +48,7 @@ public: ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - size_t num_streams) override - { - query_plan.addStep(MergeTreeDataSelectExecutor(storage) - .readFromParts( - parts, - alter_conversions, - column_names, - storage_snapshot, - query_info, - context, - max_block_size, - num_streams, - nullptr, - analysis_result_ptr)); - } + size_t num_streams) override; bool supportsPrewhere() const override { return true; } @@ -102,12 +67,7 @@ public: return storage.getPartitionIDFromQuery(ast, context); } - bool materializeTTLRecalculateOnly() const - { - if (parts.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "parts must not be empty for materializeTTLRecalculateOnly"); - return parts.front()->storage.getSettings()->materialize_ttl_recalculate_only; - } + bool materializeTTLRecalculateOnly() const; bool hasLightweightDeletedMask() const override { diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 525960d5314..774fd95ebc6 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index d234103e52b..3f0603f6900 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -1,11 +1,14 @@ #include #include #include +#include #include #include #include #include +#include +#include #include #include #include diff --git a/src/Storages/MySQL/MySQLSettings.cpp b/src/Storages/MySQL/MySQLSettings.cpp index fd53174f4f6..1c88e4a94e9 100644 --- a/src/Storages/MySQL/MySQLSettings.cpp +++ b/src/Storages/MySQL/MySQLSettings.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB diff --git a/src/Storages/NamedCollectionsHelpers.cpp b/src/Storages/NamedCollectionsHelpers.cpp index ba90f21c907..b58aed573ae 100644 --- a/src/Storages/NamedCollectionsHelpers.cpp +++ b/src/Storages/NamedCollectionsHelpers.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -117,7 +118,7 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( if (asts.size() == 1) return collection_copy; - const auto allow_override_by_default = context->getSettings().allow_named_collection_override_by_default; + const auto allow_override_by_default = context->getSettingsRef().allow_named_collection_override_by_default; for (auto * it = std::next(asts.begin()); it != asts.end(); ++it) { @@ -162,7 +163,7 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( Poco::Util::AbstractConfiguration::Keys keys; config.keys(config_prefix, keys); - const auto allow_override_by_default = context->getSettings().allow_named_collection_override_by_default; + const auto allow_override_by_default = context->getSettingsRef().allow_named_collection_override_by_default; for (const auto & key : keys) { if (collection_copy->isOverridable(key, allow_override_by_default)) diff --git a/src/Storages/NamedCollectionsHelpers.h b/src/Storages/NamedCollectionsHelpers.h index b4aea096c59..f444a581eb6 100644 --- a/src/Storages/NamedCollectionsHelpers.h +++ b/src/Storages/NamedCollectionsHelpers.h @@ -9,14 +9,14 @@ #include +namespace DB +{ + namespace ErrorCodes { extern const int BAD_ARGUMENTS; } -namespace DB -{ - /// Helper function to get named collection for table engine. /// Table engines have collection name as first argument of ast and other arguments are key-value overrides. MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( diff --git a/src/Storages/ObjectStorage/Azure/Configuration.cpp b/src/Storages/ObjectStorage/Azure/Configuration.cpp index f763a997bfb..f0a0a562b92 100644 --- a/src/Storages/ObjectStorage/Azure/Configuration.cpp +++ b/src/Storages/ObjectStorage/Azure/Configuration.cpp @@ -1,4 +1,5 @@ #include +#include #if USE_AZURE_BLOB_STORAGE @@ -13,6 +14,7 @@ #include #include #include +#include #include #include @@ -40,72 +42,19 @@ const std::unordered_set optional_configuration_keys = { "storage_account_url", }; -using AzureClient = Azure::Storage::Blobs::BlobContainerClient; -using AzureClientPtr = std::unique_ptr; - -namespace -{ - bool isConnectionString(const std::string & candidate) - { - return !candidate.starts_with("http"); - } - - template - bool containerExists(T & blob_service_client, const std::string & container_name) - { - Azure::Storage::Blobs::ListBlobContainersOptions options; - options.Prefix = container_name; - options.PageSizeHint = 1; - - auto containers_list_response = blob_service_client.ListBlobContainers(options); - auto containers_list = containers_list_response.BlobContainers; - - auto it = std::find_if( - containers_list.begin(), containers_list.end(), - [&](const auto & c) { return c.Name == container_name; }); - return it != containers_list.end(); - } -} - -Poco::URI StorageAzureConfiguration::getConnectionURL() const -{ - if (!is_connection_string) - return Poco::URI(connection_url); - - auto parsed_connection_string = Azure::Storage::_internal::ParseConnectionString(connection_url); - return Poco::URI(parsed_connection_string.BlobServiceUrl.GetAbsoluteUrl()); -} - void StorageAzureConfiguration::check(ContextPtr context) const { - context->getGlobalContext()->getRemoteHostFilter().checkURL(getConnectionURL()); + auto url = Poco::URI(connection_params.getConnectionURL()); + context->getGlobalContext()->getRemoteHostFilter().checkURL(url); Configuration::check(context); } StorageAzureConfiguration::StorageAzureConfiguration(const StorageAzureConfiguration & other) : Configuration(other) { - connection_url = other.connection_url; - is_connection_string = other.is_connection_string; - account_name = other.account_name; - account_key = other.account_key; - container = other.container; blob_path = other.blob_path; blobs_paths = other.blobs_paths; -} - -AzureObjectStorage::SettingsPtr StorageAzureConfiguration::createSettings(ContextPtr context) -{ - const auto & context_settings = context->getSettingsRef(); - auto settings_ptr = std::make_unique(); - settings_ptr->max_single_part_upload_size = context_settings.azure_max_single_part_upload_size; - settings_ptr->max_single_read_retries = context_settings.azure_max_single_read_retries; - settings_ptr->list_object_keys_size = static_cast(context_settings.azure_list_object_keys_size); - settings_ptr->strict_upload_part_size = context_settings.azure_strict_upload_part_size; - settings_ptr->max_upload_part_size = context_settings.azure_max_upload_part_size; - settings_ptr->max_blocks_in_multipart_upload = context_settings.azure_max_blocks_in_multipart_upload; - settings_ptr->min_upload_part_size = context_settings.azure_min_upload_part_size; - return settings_ptr; + connection_params = other.connection_params; } StorageObjectStorage::QuerySettings StorageAzureConfiguration::getQuerySettings(const ContextPtr & context) const @@ -126,174 +75,59 @@ StorageObjectStorage::QuerySettings StorageAzureConfiguration::getQuerySettings( ObjectStoragePtr StorageAzureConfiguration::createObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT { assertInitialized(); - auto client = createClient(is_readonly, /* attempt_to_create_container */true); - auto settings = createSettings(context); + + auto settings = AzureBlobStorage::getRequestSettings(context->getSettingsRef()); + auto client = AzureBlobStorage::getContainerClient(connection_params, is_readonly); + return std::make_unique( - "AzureBlobStorage", std::move(client), std::move(settings), container, getConnectionURL().toString()); + "AzureBlobStorage", + connection_params.createForContainer(), + std::move(settings), + connection_params.getContainer(), + connection_params.getConnectionURL()); } -AzureClientPtr StorageAzureConfiguration::createClient(bool is_read_only, bool attempt_to_create_container) +static AzureBlobStorage::ConnectionParams getConnectionParams( + const String & connection_url, + const String & container_name, + const std::optional & account_name, + const std::optional & account_key, + const ContextPtr & local_context) { - using namespace Azure::Storage::Blobs; + AzureBlobStorage::ConnectionParams connection_params; + auto request_settings = AzureBlobStorage::getRequestSettings(local_context->getSettingsRef()); - AzureClientPtr result; - - if (is_connection_string) + if (account_name && account_key) { - auto managed_identity_credential = std::make_shared(); - auto blob_service_client = std::make_unique(BlobServiceClient::CreateFromConnectionString(connection_url)); - result = std::make_unique(BlobContainerClient::CreateFromConnectionString(connection_url, container)); - - if (attempt_to_create_container) - { - bool container_exists = containerExists(*blob_service_client, container); - if (!container_exists) - { - if (is_read_only) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "AzureBlobStorage container does not exist '{}'", - container); - - try - { - result->CreateIfNotExists(); - } - catch (const Azure::Storage::StorageException & e) - { - if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict - && e.ReasonPhrase == "The specified container already exists.")) - { - throw; - } - } - } - } + connection_params.endpoint.storage_account_url = connection_url; + connection_params.endpoint.container_name = container_name; + connection_params.auth_method = std::make_shared(*account_name, *account_key); + connection_params.client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ false); } else { - std::shared_ptr storage_shared_key_credential; - if (account_name.has_value() && account_key.has_value()) - { - storage_shared_key_credential - = std::make_shared(*account_name, *account_key); - } - - std::unique_ptr blob_service_client; - size_t pos = connection_url.find('?'); - std::shared_ptr managed_identity_credential; - if (storage_shared_key_credential) - { - blob_service_client = std::make_unique(connection_url, storage_shared_key_credential); - } - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - blob_service_client = std::make_unique(connection_url, workload_identity_credential); - } - else - { - managed_identity_credential = std::make_shared(); - blob_service_client = std::make_unique(connection_url, managed_identity_credential); - } - } - - std::string final_url; - if (pos != std::string::npos) - { - auto url_without_sas = connection_url.substr(0, pos); - final_url = url_without_sas + (url_without_sas.back() == '/' ? "" : "/") + container - + connection_url.substr(pos); - } - else - final_url - = connection_url + (connection_url.back() == '/' ? "" : "/") + container; - - if (!attempt_to_create_container) - { - if (storage_shared_key_credential) - return std::make_unique(final_url, storage_shared_key_credential); - else - return std::make_unique(final_url, managed_identity_credential); - } - - bool container_exists = containerExists(*blob_service_client, container); - if (container_exists) - { - if (storage_shared_key_credential) - result = std::make_unique(final_url, storage_shared_key_credential); - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - result = std::make_unique(final_url, workload_identity_credential); - } - else - result = std::make_unique(final_url, managed_identity_credential); - } - } - else - { - if (is_read_only) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "AzureBlobStorage container does not exist '{}'", - container); - try - { - result = std::make_unique(blob_service_client->CreateBlobContainer(container).Value); - } catch (const Azure::Storage::StorageException & e) - { - if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict - && e.ReasonPhrase == "The specified container already exists.") - { - if (storage_shared_key_credential) - result = std::make_unique(final_url, storage_shared_key_credential); - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - result = std::make_unique(final_url, workload_identity_credential); - } - else - result = std::make_unique(final_url, managed_identity_credential); - } - } - else - { - throw; - } - } - } + AzureBlobStorage::processURL(connection_url, container_name, connection_params.endpoint, connection_params.auth_method); + connection_params.client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ false); } - return result; + return connection_params; } - void StorageAzureConfiguration::fromNamedCollection(const NamedCollection & collection, ContextPtr) +void StorageAzureConfiguration::fromNamedCollection(const NamedCollection & collection, ContextPtr context) { validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); + String connection_url; + String container_name; + std::optional account_name; + std::optional account_key; + if (collection.has("connection_string")) - { connection_url = collection.get("connection_string"); - is_connection_string = true; - } - - if (collection.has("storage_account_url")) - { + else if (collection.has("storage_account_url")) connection_url = collection.get("storage_account_url"); - is_connection_string = false; - } - container = collection.get("container"); + container_name = collection.get("container"); blob_path = collection.get("blob_path"); if (collection.has("account_name")) @@ -307,6 +141,7 @@ AzureClientPtr StorageAzureConfiguration::createClient(bool is_read_only, bool a compression_method = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); blobs_paths = {blob_path}; + connection_params = getConnectionParams(connection_url, container_name, account_name, account_key, context); } void StorageAzureConfiguration::fromAST(ASTs & engine_args, ContextPtr context, bool with_structure) @@ -324,12 +159,14 @@ void StorageAzureConfiguration::fromAST(ASTs & engine_args, ContextPtr context, std::unordered_map engine_args_to_idx; - connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); - is_connection_string = isConnectionString(connection_url); - container = checkAndGetLiteralArgument(engine_args[1], "container"); + String connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); + String container_name = checkAndGetLiteralArgument(engine_args[1], "container"); blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); + std::optional account_name; + std::optional account_key; + auto is_format_arg = [] (const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().getAllFormats().contains(Poco::toLower(s)); @@ -386,7 +223,9 @@ void StorageAzureConfiguration::fromAST(ASTs & engine_args, ContextPtr context, account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); if (is_format_arg(sixth_arg)) + { format = sixth_arg; + } else { if (with_structure) @@ -428,6 +267,7 @@ void StorageAzureConfiguration::fromAST(ASTs & engine_args, ContextPtr context, } blobs_paths = {blob_path}; + connection_params = getConnectionParams(connection_url, container_name, account_name, account_key, context); } void StorageAzureConfiguration::addStructureAndFormatToArgs( diff --git a/src/Storages/ObjectStorage/Azure/Configuration.h b/src/Storages/ObjectStorage/Azure/Configuration.h index bbaa82c51ba..4e6bfbc0745 100644 --- a/src/Storages/ObjectStorage/Azure/Configuration.h +++ b/src/Storages/ObjectStorage/Azure/Configuration.h @@ -35,8 +35,8 @@ public: const Paths & getPaths() const override { return blobs_paths; } void setPaths(const Paths & paths) override { blobs_paths = paths; } - String getNamespace() const override { return container; } - String getDataSourceDescription() const override { return std::filesystem::path(connection_url) / container; } + String getNamespace() const override { return connection_params.getContainer(); } + String getDataSourceDescription() const override { return std::filesystem::path(connection_params.getConnectionURL()) / connection_params.getContainer(); } StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override; void check(ContextPtr context) const override; @@ -54,22 +54,9 @@ protected: void fromNamedCollection(const NamedCollection & collection, ContextPtr context) override; void fromAST(ASTs & args, ContextPtr context, bool with_structure) override; - using AzureClient = Azure::Storage::Blobs::BlobContainerClient; - using AzureClientPtr = std::unique_ptr; - - std::string connection_url; - bool is_connection_string; - - std::optional account_name; - std::optional account_key; - - std::string container; std::string blob_path; std::vector blobs_paths; - - AzureClientPtr createClient(bool is_read_only, bool attempt_to_create_container); - AzureObjectStorage::SettingsPtr createSettings(ContextPtr local_context); - Poco::URI getConnectionURL() const; + AzureBlobStorage::ConnectionParams connection_params; }; } diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp index 38bf3112ee2..c896a760597 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp @@ -4,19 +4,43 @@ #include #if USE_AWS_S3 && USE_PARQUET -#include + +#include +#include +#include +#include +#include + +#include +#include +#include + #include #include #include -#include -#include -#include -#include -#include -#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + #include +#include #include -#include +#include +#include +#include + +namespace fs = std::filesystem; namespace DB { @@ -25,10 +49,14 @@ namespace ErrorCodes { extern const int INCORRECT_DATA; extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; + extern const int NOT_IMPLEMENTED; } -struct DeltaLakeMetadata::Impl +struct DeltaLakeMetadataImpl { + using ConfigurationPtr = DeltaLakeMetadata::ConfigurationPtr; + ObjectStoragePtr object_storage; ConfigurationPtr configuration; ContextPtr context; @@ -37,7 +65,7 @@ struct DeltaLakeMetadata::Impl * Useful links: * - https://github.com/delta-io/delta/blob/master/PROTOCOL.md#data-files */ - Impl(ObjectStoragePtr object_storage_, + DeltaLakeMetadataImpl(ObjectStoragePtr object_storage_, ConfigurationPtr configuration_, ContextPtr context_) : object_storage(object_storage_) @@ -74,10 +102,18 @@ struct DeltaLakeMetadata::Impl * An action changes one aspect of the table's state, for example, adding or removing a file. * Note: it is not a valid json, but a list of json's, so we read it in a while cycle. */ - std::set processMetadataFiles() + struct DeltaLakeMetadata + { + NamesAndTypesList schema; + Strings data_files; + DataLakePartitionColumns partition_columns; + }; + DeltaLakeMetadata processMetadataFiles() { std::set result_files; - const auto checkpoint_version = getCheckpointIfExists(result_files); + NamesAndTypesList current_schema; + DataLakePartitionColumns current_partition_columns; + const auto checkpoint_version = getCheckpointIfExists(result_files, current_schema, current_partition_columns); if (checkpoint_version) { @@ -90,7 +126,7 @@ struct DeltaLakeMetadata::Impl if (!object_storage->exists(StoredObject(file_path))) break; - processMetadataFile(file_path, result_files); + processMetadataFile(file_path, current_schema, current_partition_columns, result_files); } LOG_TRACE( @@ -101,10 +137,10 @@ struct DeltaLakeMetadata::Impl { const auto keys = listFiles(*object_storage, *configuration, deltalake_metadata_directory, metadata_file_suffix); for (const String & key : keys) - processMetadataFile(key, result_files); + processMetadataFile(key, current_schema, current_partition_columns, result_files); } - return result_files; + return DeltaLakeMetadata{current_schema, Strings(result_files.begin(), result_files.end()), current_partition_columns}; } /** @@ -136,10 +172,20 @@ struct DeltaLakeMetadata::Impl * \"nullCount\":{\"col-6c990940-59bb-4709-8f2e-17083a82c01a\":0,\"col-763cd7e2-7627-4d8e-9fb7-9e85d0c8845b\":0}}"}} * " */ - void processMetadataFile(const String & key, std::set & result) const + + /// Read metadata file and fill `file_schema`, `file_parition_columns`, `result`. + /// `result` is a list of data files. + /// `file_schema` is a common schema for all files. + /// Schema evolution is not supported, so we check that all files have the same schema. + /// `file_partiion_columns` is information about partition columns of data files. + void processMetadataFile( + const String & metadata_file_path, + NamesAndTypesList & file_schema, + DataLakePartitionColumns & file_partition_columns, + std::set & result) { auto read_settings = context->getReadSettings(); - auto buf = object_storage->readObject(StoredObject(key), read_settings); + auto buf = object_storage->readObject(StoredObject(metadata_file_path), read_settings); char c; while (!buf->eof()) @@ -157,20 +203,250 @@ struct DeltaLakeMetadata::Impl if (json_str.empty()) continue; - const JSON json(json_str); - if (json.has("add")) + Poco::JSON::Parser parser; + Poco::Dynamic::Var json = parser.parse(json_str); + Poco::JSON::Object::Ptr object = json.extract(); + + std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + object->stringify(oss); + LOG_TEST(log, "Metadata: {}", oss.str()); + + if (object->has("metaData")) { - const auto path = json["add"]["path"].getString(); - result.insert(std::filesystem::path(configuration->getPath()) / path); + const auto metadata_object = object->get("metaData").extract(); + const auto schema_object = metadata_object->getValue("schemaString"); + + Poco::JSON::Parser p; + Poco::Dynamic::Var fields_json = parser.parse(schema_object); + const Poco::JSON::Object::Ptr & fields_object = fields_json.extract(); + + auto current_schema = parseMetadata(fields_object); + if (file_schema.empty()) + { + file_schema = current_schema; + } + else if (file_schema != current_schema) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Reading from files with different schema is not possible " + "({} is different from {})", + file_schema.toString(), current_schema.toString()); + } } - else if (json.has("remove")) + + if (object->has("add")) { - const auto path = json["remove"]["path"].getString(); - result.erase(std::filesystem::path(configuration->getPath()) / path); + auto add_object = object->get("add").extract(); + auto path = add_object->getValue("path"); + result.insert(fs::path(configuration->getPath()) / path); + + auto filename = fs::path(path).filename().string(); + auto it = file_partition_columns.find(filename); + if (it == file_partition_columns.end()) + { + if (add_object->has("partitionValues")) + { + auto partition_values = add_object->get("partitionValues").extract(); + if (partition_values->size()) + { + auto & current_partition_columns = file_partition_columns[filename]; + for (const auto & partition_name : partition_values->getNames()) + { + const auto value = partition_values->getValue(partition_name); + auto name_and_type = file_schema.tryGetByName(partition_name); + if (!name_and_type) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "No such column in schema: {} (schema: {})", + partition_name, file_schema.toNamesAndTypesDescription()); + } + + auto field = getFieldValue(value, name_and_type->type); + current_partition_columns.emplace_back(*name_and_type, field); + + LOG_TEST(log, "Partition {} value is {} (for {})", partition_name, value, filename); + } + } + } + } + } + else if (object->has("remove")) + { + auto path = object->get("remove").extract()->getValue("path"); + result.erase(fs::path(configuration->getPath()) / path); } } } + NamesAndTypesList parseMetadata(const Poco::JSON::Object::Ptr & metadata_json) + { + NamesAndTypesList schema; + const auto fields = metadata_json->get("fields").extract(); + for (size_t i = 0; i < fields->size(); ++i) + { + const auto field = fields->getObject(static_cast(i)); + auto column_name = field->getValue("name"); + auto type = field->getValue("type"); + auto is_nullable = field->getValue("nullable"); + + std::string physical_name; + auto schema_metadata_object = field->get("metadata").extract(); + if (schema_metadata_object->has("delta.columnMapping.physicalName")) + physical_name = schema_metadata_object->getValue("delta.columnMapping.physicalName"); + else + physical_name = column_name; + + LOG_TEST(log, "Found column: {}, type: {}, nullable: {}, physical name: {}", + column_name, type, is_nullable, physical_name); + + schema.push_back({physical_name, getFieldType(field, "type", is_nullable)}); + } + return schema; + } + + DataTypePtr getFieldType(const Poco::JSON::Object::Ptr & field, const String & type_key, bool is_nullable) + { + if (field->isObject(type_key)) + return getComplexTypeFromObject(field->getObject(type_key)); + + auto type = field->get(type_key); + if (type.isString()) + { + const String & type_name = type.extract(); + auto data_type = getSimpleTypeByName(type_name); + return is_nullable ? makeNullable(data_type) : data_type; + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected 'type' field: {}", type.toString()); + } + + Field getFieldValue(const String & value, DataTypePtr data_type) + { + DataTypePtr check_type; + if (data_type->isNullable()) + check_type = static_cast(data_type.get())->getNestedType(); + else + check_type = data_type; + + WhichDataType which(check_type->getTypeId()); + if (which.isStringOrFixedString()) + return value; + else if (which.isInt8()) + return parse(value); + else if (which.isUInt8()) + return parse(value); + else if (which.isInt16()) + return parse(value); + else if (which.isUInt16()) + return parse(value); + else if (which.isInt32()) + return parse(value); + else if (which.isUInt32()) + return parse(value); + else if (which.isInt64()) + return parse(value); + else if (which.isUInt64()) + return parse(value); + else if (which.isFloat32()) + return parse(value); + else if (which.isFloat64()) + return parse(value); + else if (which.isDate()) + return UInt16{LocalDate{std::string(value)}.getDayNum()}; + else if (which.isDate32()) + return Int32{LocalDate{std::string(value)}.getExtenedDayNum()}; + else if (which.isDateTime64()) + { + ReadBufferFromString in(value); + DateTime64 time = 0; + readDateTime64Text(time, 6, in, assert_cast(data_type.get())->getTimeZone()); + return time; + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported DeltaLake type for {}", check_type->getColumnType()); + } + + DataTypePtr getSimpleTypeByName(const String & type_name) + { + /// https://github.com/delta-io/delta/blob/master/PROTOCOL.md#primitive-types + + if (type_name == "string" || type_name == "binary") + return std::make_shared(); + if (type_name == "long") + return std::make_shared(); + if (type_name == "integer") + return std::make_shared(); + if (type_name == "short") + return std::make_shared(); + if (type_name == "byte") + return std::make_shared(); + if (type_name == "float") + return std::make_shared(); + if (type_name == "double") + return std::make_shared(); + if (type_name == "boolean") + return DataTypeFactory::instance().get("Bool"); + if (type_name == "date") + return std::make_shared(); + if (type_name == "timestamp") + return std::make_shared(6); + if (type_name.starts_with("decimal(") && type_name.ends_with(')')) + { + ReadBufferFromString buf(std::string_view(type_name.begin() + 8, type_name.end() - 1)); + size_t precision; + size_t scale; + readIntText(precision, buf); + skipWhitespaceIfAny(buf); + assertChar(',', buf); + skipWhitespaceIfAny(buf); + tryReadIntText(scale, buf); + return createDecimal(precision, scale); + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported DeltaLake type: {}", type_name); + } + + DataTypePtr getComplexTypeFromObject(const Poco::JSON::Object::Ptr & type) + { + String type_name = type->getValue("type"); + + if (type_name == "struct") + { + DataTypes element_types; + Names element_names; + auto fields = type->get("fields").extract(); + element_types.reserve(fields->size()); + element_names.reserve(fields->size()); + for (size_t i = 0; i != fields->size(); ++i) + { + auto field = fields->getObject(static_cast(i)); + element_names.push_back(field->getValue("name")); + auto required = field->getValue("required"); + element_types.push_back(getFieldType(field, "type", required)); + } + + return std::make_shared(element_types, element_names); + } + + if (type_name == "array") + { + bool is_nullable = type->getValue("containsNull"); + auto element_type = getFieldType(type, "elementType", is_nullable); + return std::make_shared(element_type); + } + + if (type_name == "map") + { + bool is_nullable = type->getValue("containsNull"); + auto key_type = getFieldType(type, "keyType", /* is_nullable */false); + auto value_type = getFieldType(type, "valueType", is_nullable); + return std::make_shared(key_type, value_type); + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported DeltaLake type: {}", type_name); + } + /** * Checkpoints in delta-lake are created each 10 commits by default. * Latest checkpoint is written in _last_checkpoint file: _delta_log/_last_checkpoint @@ -242,7 +518,10 @@ struct DeltaLakeMetadata::Impl throw Exception(ErrorCodes::BAD_ARGUMENTS, "Arrow error: {}", _s.ToString()); \ } while (false) - size_t getCheckpointIfExists(std::set & result) + size_t getCheckpointIfExists( + std::set & result, + NamesAndTypesList & file_schema, + DataLakePartitionColumns & file_partition_columns) { const auto version = readLastCheckpointIfExists(); if (!version) @@ -263,7 +542,8 @@ struct DeltaLakeMetadata::Impl auto columns = ParquetSchemaReader(*buf, format_settings).readSchema(); /// Read only columns that we need. - columns.filterColumns(NameSet{"add", "remove"}); + auto filter_column_names = NameSet{"add", "metaData"}; + columns.filterColumns(filter_column_names); Block header; for (const auto & column : columns) header.insert({column.type->createColumn(), column.type, column.name}); @@ -277,9 +557,6 @@ struct DeltaLakeMetadata::Impl ArrowMemoryPool::instance(), &reader)); - std::shared_ptr schema; - THROW_ARROW_NOT_OK(reader->GetSchema(&schema)); - ArrowColumnToCHColumn column_reader( header, "Parquet", format_settings.parquet.allow_missing_columns, @@ -290,29 +567,85 @@ struct DeltaLakeMetadata::Impl std::shared_ptr table; THROW_ARROW_NOT_OK(reader->ReadTable(&table)); - Chunk res = column_reader.arrowTableToCHChunk(table, reader->parquet_reader()->metadata()->num_rows()); - const auto & res_columns = res.getColumns(); + Chunk chunk = column_reader.arrowTableToCHChunk(table, reader->parquet_reader()->metadata()->num_rows()); + auto res_block = header.cloneWithColumns(chunk.detachColumns()); + res_block = Nested::flatten(res_block); - if (res_columns.size() != 2) - { - throw Exception( - ErrorCodes::INCORRECT_DATA, - "Unexpected number of columns: {} (having: {}, expected: {})", - res_columns.size(), res.dumpStructure(), header.dumpStructure()); - } + const auto * nullable_path_column = assert_cast(res_block.getByName("add.path").column.get()); + const auto & path_column = assert_cast(nullable_path_column->getNestedColumn()); + + const auto * nullable_schema_column = assert_cast(res_block.getByName("metaData.schemaString").column.get()); + const auto & schema_column = assert_cast(nullable_schema_column->getNestedColumn()); + + auto partition_values_column_raw = res_block.getByName("add.partitionValues").column; + const auto & partition_values_column = assert_cast(*partition_values_column_raw); - const auto * tuple_column = assert_cast(res_columns[0].get()); - const auto & nullable_column = assert_cast(tuple_column->getColumn(0)); - const auto & path_column = assert_cast(nullable_column.getNestedColumn()); for (size_t i = 0; i < path_column.size(); ++i) { - const auto filename = String(path_column.getDataAt(i)); - if (filename.empty()) + const auto metadata = String(schema_column.getDataAt(i)); + if (!metadata.empty()) + { + Poco::JSON::Parser parser; + Poco::Dynamic::Var json = parser.parse(metadata); + const Poco::JSON::Object::Ptr & object = json.extract(); + + auto current_schema = parseMetadata(object); + if (file_schema.empty()) + { + file_schema = current_schema; + LOG_TEST(log, "Processed schema from checkpoint: {}", file_schema.toString()); + } + else if (file_schema != current_schema) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Reading from files with different schema is not possible " + "({} is different from {})", + file_schema.toString(), current_schema.toString()); + } + } + } + + for (size_t i = 0; i < path_column.size(); ++i) + { + const auto path = String(path_column.getDataAt(i)); + if (path.empty()) continue; - LOG_TEST(log, "Adding {}", filename); - const auto [_, inserted] = result.insert(std::filesystem::path(configuration->getPath()) / filename); + + auto filename = fs::path(path).filename().string(); + auto it = file_partition_columns.find(filename); + if (it == file_partition_columns.end()) + { + Field map; + partition_values_column.get(i, map); + auto partition_values_map = map.safeGet(); + if (!partition_values_map.empty()) + { + auto & current_partition_columns = file_partition_columns[filename]; + for (const auto & map_value : partition_values_map) + { + const auto tuple = map_value.safeGet(); + const auto partition_name = tuple[0].safeGet(); + auto name_and_type = file_schema.tryGetByName(partition_name); + if (!name_and_type) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "No such column in schema: {} (schema: {})", + partition_name, file_schema.toString()); + } + const auto value = tuple[1].safeGet(); + auto field = getFieldValue(value, name_and_type->type); + current_partition_columns.emplace_back(std::move(name_and_type.value()), std::move(field)); + + LOG_TEST(log, "Partition {} value is {} (for {})", partition_name, value, filename); + } + } + } + + LOG_TEST(log, "Adding {}", path); + const auto [_, inserted] = result.insert(std::filesystem::path(configuration->getPath()) / path); if (!inserted) - throw Exception(ErrorCodes::INCORRECT_DATA, "File already exists {}", filename); + throw Exception(ErrorCodes::INCORRECT_DATA, "File already exists {}", path); } return version; @@ -325,18 +658,15 @@ DeltaLakeMetadata::DeltaLakeMetadata( ObjectStoragePtr object_storage_, ConfigurationPtr configuration_, ContextPtr context_) - : impl(std::make_unique(object_storage_, configuration_, context_)) { -} + auto impl = DeltaLakeMetadataImpl(object_storage_, configuration_, context_); + auto result = impl.processMetadataFiles(); + data_files = result.data_files; + schema = result.schema; + partition_columns = result.partition_columns; -Strings DeltaLakeMetadata::getDataFiles() const -{ - if (!data_files.empty()) - return data_files; - - auto result = impl->processMetadataFiles(); - data_files = Strings(result.begin(), result.end()); - return data_files; + LOG_TRACE(impl.log, "Found {} data files, {} partition files, schema: {}", + data_files.size(), partition_columns.size(), schema.toString()); } } diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h index e527721b29e..a479a3dd293 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h @@ -20,9 +20,13 @@ public: ConfigurationPtr configuration_, ContextPtr context_); - Strings getDataFiles() const override; + Strings getDataFiles() const override { return data_files; } - NamesAndTypesList getTableSchema() const override { return {}; } + NamesAndTypesList getTableSchema() const override { return schema; } + + const DataLakePartitionColumns & getPartitionColumns() const override { return partition_columns; } + + const std::unordered_map & getColumnNameToPhysicalNameMapping() const override { return column_name_to_physical_name; } bool operator ==(const IDataLakeMetadata & other) const override { @@ -41,9 +45,10 @@ public: } private: - struct Impl; - const std::shared_ptr impl; mutable Strings data_files; + NamesAndTypesList schema; + std::unordered_map column_name_to_physical_name; + DataLakePartitionColumns partition_columns; }; } diff --git a/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h index 3ab274b1fbf..b060b1b0d39 100644 --- a/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h @@ -26,6 +26,10 @@ public: NamesAndTypesList getTableSchema() const override { return {}; } + const DataLakePartitionColumns & getPartitionColumns() const override { return partition_columns; } + + const std::unordered_map & getColumnNameToPhysicalNameMapping() const override { return column_name_to_physical_name; } + bool operator ==(const IDataLakeMetadata & other) const override { const auto * hudi_metadata = dynamic_cast(&other); @@ -46,6 +50,8 @@ private: const ObjectStoragePtr object_storage; const ConfigurationPtr configuration; mutable Strings data_files; + std::unordered_map column_name_to_physical_name; + DataLakePartitionColumns partition_columns; Strings getDataFilesImpl() const; }; diff --git a/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h b/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h index a2bd5adb947..2954d50db91 100644 --- a/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h @@ -2,6 +2,7 @@ #include #include #include +#include "PartitionColumns.h" namespace DB { @@ -13,6 +14,8 @@ public: virtual Strings getDataFiles() const = 0; virtual NamesAndTypesList getTableSchema() const = 0; virtual bool operator==(const IDataLakeMetadata & other) const = 0; + virtual const DataLakePartitionColumns & getPartitionColumns() const = 0; + virtual const std::unordered_map & getColumnNameToPhysicalNameMapping() const = 0; }; using DataLakeMetadataPtr = std::unique_ptr; diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h index 83865c47eb8..c8603fccb86 100644 --- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h +++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h @@ -41,6 +41,7 @@ public: auto object_storage = base_configuration->createObjectStorage(context, /* is_readonly */true); DataLakeMetadataPtr metadata; NamesAndTypesList schema_from_metadata; + const bool use_schema_from_metadata = columns_.empty(); if (base_configuration->format == "auto") base_configuration->format = "Parquet"; @@ -50,8 +51,9 @@ public: try { metadata = DataLakeMetadata::create(object_storage, base_configuration, context); - schema_from_metadata = metadata->getTableSchema(); configuration->setPaths(metadata->getDataFiles()); + if (use_schema_from_metadata) + schema_from_metadata = metadata->getTableSchema(); } catch (...) { @@ -66,7 +68,7 @@ public: return std::make_shared>( base_configuration, std::move(metadata), configuration, object_storage, context, table_id_, - columns_.empty() ? ColumnsDescription(schema_from_metadata) : columns_, + use_schema_from_metadata ? ColumnsDescription(schema_from_metadata) : columns_, constraints_, comment_, format_settings_); } @@ -81,7 +83,7 @@ public: auto metadata = DataLakeMetadata::create(object_storage_, base_configuration, local_context); auto schema_from_metadata = metadata->getTableSchema(); - if (schema_from_metadata != NamesAndTypesList{}) + if (!schema_from_metadata.empty()) { return ColumnsDescription(std::move(schema_from_metadata)); } @@ -99,13 +101,13 @@ public: Storage::updateConfiguration(local_context); auto new_metadata = DataLakeMetadata::create(Storage::object_storage, base_configuration, local_context); - if (current_metadata && *current_metadata == *new_metadata) return; current_metadata = std::move(new_metadata); auto updated_configuration = base_configuration->clone(); updated_configuration->setPaths(current_metadata->getDataFiles()); + updated_configuration->setPartitionColumns(current_metadata->getPartitionColumns()); Storage::configuration = updated_configuration; } @@ -123,11 +125,42 @@ public: { base_configuration->format = Storage::configuration->format; } + + if (current_metadata) + { + const auto & columns = current_metadata->getPartitionColumns(); + base_configuration->setPartitionColumns(columns); + Storage::configuration->setPartitionColumns(columns); + } } private: ConfigurationPtr base_configuration; DataLakeMetadataPtr current_metadata; + + ReadFromFormatInfo prepareReadingFromFormat( + const Strings & requested_columns, + const StorageSnapshotPtr & storage_snapshot, + bool supports_subset_of_columns, + ContextPtr local_context) override + { + auto info = DB::prepareReadingFromFormat(requested_columns, storage_snapshot, supports_subset_of_columns); + if (!current_metadata) + { + Storage::updateConfiguration(local_context); + current_metadata = DataLakeMetadata::create(Storage::object_storage, base_configuration, local_context); + } + auto column_mapping = current_metadata->getColumnNameToPhysicalNameMapping(); + if (!column_mapping.empty()) + { + for (const auto & [column_name, physical_name] : column_mapping) + { + auto & column = info.format_header.getByName(column_name); + column.name = physical_name; + } + } + return info; + } }; using StorageIceberg = IStorageDataLake; diff --git a/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp index 0484f86542c..6d18b13df01 100644 --- a/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp @@ -3,7 +3,7 @@ #if USE_AWS_S3 && USE_AVRO #include - +#include #include #include #include diff --git a/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h index 06dbd373bf9..9476ac6e7d9 100644 --- a/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h @@ -82,6 +82,10 @@ public: /// Get table schema parsed from metadata. NamesAndTypesList getTableSchema() const override { return schema; } + const std::unordered_map & getColumnNameToPhysicalNameMapping() const override { return column_name_to_physical_name; } + + const DataLakePartitionColumns & getPartitionColumns() const override { return partition_columns; } + bool operator ==(const IDataLakeMetadata & other) const override { const auto * iceberg_metadata = dynamic_cast(&other); @@ -104,6 +108,8 @@ private: Int32 current_schema_id; NamesAndTypesList schema; mutable Strings data_files; + std::unordered_map column_name_to_physical_name; + DataLakePartitionColumns partition_columns; LoggerPtr log; }; diff --git a/src/Storages/ObjectStorage/DataLakes/PartitionColumns.h b/src/Storages/ObjectStorage/DataLakes/PartitionColumns.h new file mode 100644 index 00000000000..eb605559145 --- /dev/null +++ b/src/Storages/ObjectStorage/DataLakes/PartitionColumns.h @@ -0,0 +1,19 @@ +#pragma once +#include +#include + +namespace DB +{ + +struct DataLakePartitionColumn +{ + NameAndTypePair name_and_type; + Field value; + + bool operator ==(const DataLakePartitionColumn & other) const = default; +}; + +/// Data file -> partition columns +using DataLakePartitionColumns = std::unordered_map>; + +} diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp index 155f51adf61..e8071be6f02 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp +++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp @@ -2,6 +2,7 @@ #if USE_HDFS #include +#include #include #include #include diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp index 78cdc442f64..d739f7a28c9 100644 --- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp +++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp index b33d55105e9..094ca069e7a 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.cpp +++ b/src/Storages/ObjectStorage/S3/Configuration.cpp @@ -1,6 +1,7 @@ #include #if USE_AWS_S3 +#include #include #include #include diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index 90a97a9ea62..fe675a17d54 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -203,6 +204,15 @@ private: }; } +ReadFromFormatInfo StorageObjectStorage::prepareReadingFromFormat( + const Strings & requested_columns, + const StorageSnapshotPtr & storage_snapshot, + bool supports_subset_of_columns, + ContextPtr /* local_context */) +{ + return DB::prepareReadingFromFormat(requested_columns, storage_snapshot, supports_subset_of_columns); +} + void StorageObjectStorage::read( QueryPlan & query_plan, const Names & column_names, @@ -222,7 +232,7 @@ void StorageObjectStorage::read( } const auto read_from_format_info = prepareReadingFromFormat( - column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); + column_names, storage_snapshot, supportsSubsetOfColumns(local_context), local_context); const bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) && local_context->getSettingsRef().optimize_count_from_files; @@ -451,6 +461,7 @@ StorageObjectStorage::Configuration::Configuration(const Configuration & other) format = other.format; compression_method = other.compression_method; structure = other.structure; + partition_columns = other.partition_columns; } bool StorageObjectStorage::Configuration::withPartitionWildcard() const diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h index cf8ec113653..818ce055c77 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.h +++ b/src/Storages/ObjectStorage/StorageObjectStorage.h @@ -1,10 +1,12 @@ #pragma once #include #include +#include #include #include #include #include +#include namespace DB { @@ -117,6 +119,12 @@ public: protected: virtual void updateConfiguration(ContextPtr local_context); + virtual ReadFromFormatInfo prepareReadingFromFormat( + const Strings & requested_columns, + const StorageSnapshotPtr & storage_snapshot, + bool supports_subset_of_columns, + ContextPtr local_context); + static std::unique_ptr createReadBufferIterator( const ObjectStoragePtr & object_storage, const ConfigurationPtr & configuration, @@ -188,6 +196,9 @@ public: virtual ConfigurationPtr clone() = 0; virtual bool isStaticConfiguration() const { return true; } + void setPartitionColumns(const DataLakePartitionColumns & columns) { partition_columns = columns; } + const DataLakePartitionColumns & getPartitionColumns() const { return partition_columns; } + String format = "auto"; String compression_method = "auto"; String structure = "auto"; @@ -199,6 +210,7 @@ protected: void assertInitialized() const; bool initialized = false; + DataLakePartitionColumns partition_columns; }; } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h index 69fec2b3c77..108aa109616 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h @@ -24,12 +24,6 @@ public: std::string getName() const override; - bool supportsSubcolumns() const override { return true; } - - bool supportsDynamicSubcolumns() const override { return true; } - - bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } - RemoteQueryExecutor::Extension getTaskIteratorExtension( const ActionsDAG::Node * predicate, const ContextPtr & context) const override; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp index f2f6eac333c..000e73c70ce 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include namespace DB diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index aef783fc3c4..086482e330a 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -13,6 +13,7 @@ #include #include #include +#include namespace fs = std::filesystem; @@ -65,7 +66,6 @@ StorageObjectStorageSource::StorageObjectStorageSource( CurrentMetrics::StorageObjectStorageThreadsActive, CurrentMetrics::StorageObjectStorageThreadsScheduled, 1/* max_threads */)) - , columns_desc(info.columns_description) , file_iterator(file_iterator_) , schema_cache(StorageObjectStorage::getSchemaCache(context_, configuration->getTypeName())) , create_reader_scheduler(threadPoolCallbackRunnerUnsafe(*create_reader_pool, "Reader")) @@ -156,20 +156,20 @@ std::shared_ptr StorageObjectStorageSourc return iterator; } -void StorageObjectStorageSource::lazyInitialize(size_t processor) +void StorageObjectStorageSource::lazyInitialize() { if (initialized) return; - reader = createReader(processor); + reader = createReader(); if (reader) - reader_future = createReaderAsync(processor); + reader_future = createReaderAsync(); initialized = true; } Chunk StorageObjectStorageSource::generate() { - lazyInitialize(0); + lazyInitialize(); while (true) { @@ -196,13 +196,38 @@ Chunk StorageObjectStorageSource::generate() const auto & filename = object_info->getFileName(); chassert(object_info->metadata); VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk( - chunk, read_from_format_info.requested_virtual_columns, + chunk, + read_from_format_info.requested_virtual_columns, + {.path = getUniqueStoragePathIdentifier(*configuration, *object_info, false), + .size = object_info->isArchive() ? object_info->fileSizeInArchive() : object_info->metadata->size_bytes, + .filename = &filename, + .last_modified = object_info->metadata->last_modified}); + + const auto & partition_columns = configuration->getPartitionColumns(); + if (!partition_columns.empty() && chunk_size && chunk.hasColumns()) + { + auto partition_values = partition_columns.find(filename); + if (partition_values != partition_columns.end()) { - .path = getUniqueStoragePathIdentifier(*configuration, *object_info, false), - .size = object_info->metadata->size_bytes, - .filename = &filename, - .last_modified = object_info->metadata->last_modified - }); + for (const auto & [name_and_type, value] : partition_values->second) + { + if (!read_from_format_info.source_header.has(name_and_type.name)) + continue; + + const auto column_pos = read_from_format_info.source_header.getPositionByName(name_and_type.name); + auto partition_column = name_and_type.type->createColumnConst(chunk.getNumRows(), value)->convertToFullColumnIfConst(); + + /// This column is filled with default value now, remove it. + chunk.erase(column_pos); + + /// Add correct values. + if (column_pos < chunk.getNumColumns()) + chunk.addColumn(column_pos, std::move(partition_column)); + else + chunk.addColumn(std::move(partition_column)); + } + } + } return chunk; } @@ -236,27 +261,30 @@ void StorageObjectStorageSource::addNumRowsToCache(const ObjectInfo & object_inf schema_cache.addNumRows(cache_key, num_rows); } -std::optional StorageObjectStorageSource::tryGetNumRowsFromCache(const ObjectInfo & object_info) +StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReader() { - const auto cache_key = getKeyForSchemaCache( - getUniqueStoragePathIdentifier(*configuration, object_info), - configuration->format, - format_settings, - getContext()); - - auto get_last_mod_time = [&]() -> std::optional - { - return object_info.metadata - ? std::optional(object_info.metadata->last_modified.epochTime()) - : std::nullopt; - }; - return schema_cache.tryGetNumRows(cache_key, get_last_mod_time); + return createReader( + 0, file_iterator, configuration, object_storage, read_from_format_info, format_settings, + key_condition, getContext(), &schema_cache, log, max_block_size, max_parsing_threads, need_only_count); } -StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReader(size_t processor) +StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReader( + size_t processor, + const std::shared_ptr & file_iterator, + const ConfigurationPtr & configuration, + const ObjectStoragePtr & object_storage, + const ReadFromFormatInfo & read_from_format_info, + const std::optional & format_settings, + const std::shared_ptr & key_condition_, + const ContextPtr & context_, + SchemaCache * schema_cache, + const LoggerPtr & log, + size_t max_block_size, + size_t max_parsing_threads, + bool need_only_count) { ObjectInfoPtr object_info; - auto query_settings = configuration->getQuerySettings(getContext()); + auto query_settings = configuration->getQuerySettings(context_); do { @@ -277,9 +305,29 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade std::shared_ptr source; std::unique_ptr read_buf; + auto try_get_num_rows_from_cache = [&]() -> std::optional + { + if (!schema_cache) + return std::nullopt; + + const auto cache_key = getKeyForSchemaCache( + getUniqueStoragePathIdentifier(*configuration, *object_info), + configuration->format, + format_settings, + context_); + + auto get_last_mod_time = [&]() -> std::optional + { + return object_info->metadata + ? std::optional(object_info->metadata->last_modified.epochTime()) + : std::nullopt; + }; + return schema_cache->tryGetNumRows(cache_key, get_last_mod_time); + }; + std::optional num_rows_from_cache = need_only_count - && getContext()->getSettingsRef().use_cache_for_count_from_files - ? tryGetNumRowsFromCache(*object_info) + && context_->getSettingsRef().use_cache_for_count_from_files + ? try_get_num_rows_from_cache() : std::nullopt; if (num_rows_from_cache) @@ -304,14 +352,14 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade else { compression_method = chooseCompressionMethod(object_info->getFileName(), configuration->compression_method); - read_buf = createReadBuffer(*object_info); + read_buf = createReadBuffer(*object_info, object_storage, context_, log); } auto input_format = FormatFactory::instance().getInput( configuration->format, *read_buf, read_from_format_info.format_header, - getContext(), + context_, max_block_size, format_settings, need_only_count ? 1 : max_parsing_threads, @@ -320,20 +368,20 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade compression_method, need_only_count); - if (key_condition) - input_format->setKeyCondition(key_condition); + if (key_condition_) + input_format->setKeyCondition(key_condition_); if (need_only_count) input_format->needOnlyCount(); builder.init(Pipe(input_format)); - if (columns_desc.hasDefaults()) + if (read_from_format_info.columns_description.hasDefaults()) { builder.addSimpleTransform( [&](const Block & header) { - return std::make_shared(header, columns_desc, *input_format, getContext()); + return std::make_shared(header, read_from_format_info.columns_description, *input_format, context_); }); } @@ -356,21 +404,25 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade object_info, std::move(read_buf), std::move(source), std::move(pipeline), std::move(current_reader)); } -std::future StorageObjectStorageSource::createReaderAsync(size_t processor) +std::future StorageObjectStorageSource::createReaderAsync() { - return create_reader_scheduler([=, this] { return createReader(processor); }, Priority{}); + return create_reader_scheduler([=, this] { return createReader(); }, Priority{}); } -std::unique_ptr StorageObjectStorageSource::createReadBuffer(const ObjectInfo & object_info) +std::unique_ptr StorageObjectStorageSource::createReadBuffer( + const ObjectInfo & object_info, + const ObjectStoragePtr & object_storage, + const ContextPtr & context_, + const LoggerPtr & log) { const auto & object_size = object_info.metadata->size_bytes; - auto read_settings = getContext()->getReadSettings().adjustBufferSize(object_size); + auto read_settings = context_->getReadSettings().adjustBufferSize(object_size); read_settings.enable_filesystem_cache = false; /// FIXME: Changing this setting to default value breaks something around parquet reading read_settings.remote_read_min_bytes_for_seek = read_settings.remote_fs_buffer_size; - const bool object_too_small = object_size <= 2 * getContext()->getSettings().max_download_buffer_size; + const bool object_too_small = object_size <= 2 * context_->getSettingsRef().max_download_buffer_size; const bool use_prefetch = object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool; read_settings.remote_fs_method = use_prefetch ? RemoteFSReadMethod::threadpool : RemoteFSReadMethod::read; /// User's object may change, don't cache it. @@ -690,10 +742,9 @@ static IArchiveReader::NameFilter createArchivePathFilter(const std::string & ar StorageObjectStorageSource::ArchiveIterator::ObjectInfoInArchive::ObjectInfoInArchive( ObjectInfoPtr archive_object_, const std::string & path_in_archive_, - std::shared_ptr archive_reader_) - : archive_object(archive_object_) - , path_in_archive(path_in_archive_) - , archive_reader(archive_reader_) + std::shared_ptr archive_reader_, + IArchiveReader::FileInfo && file_info_) + : archive_object(archive_object_), path_in_archive(path_in_archive_), archive_reader(archive_reader_), file_info(file_info_) { } @@ -732,6 +783,7 @@ StorageObjectStorageSource::ObjectInfoPtr StorageObjectStorageSource::ArchiveIterator::nextImpl(size_t processor) { std::unique_lock lock{next_mutex}; + IArchiveReader::FileInfo current_file_info{}; while (true) { if (filter) @@ -756,6 +808,8 @@ StorageObjectStorageSource::ArchiveIterator::nextImpl(size_t processor) path_in_archive = file_enumerator->getFileName(); if (!filter(path_in_archive)) continue; + else + current_file_info = file_enumerator->getFileInfo(); } else { @@ -769,15 +823,19 @@ StorageObjectStorageSource::ArchiveIterator::nextImpl(size_t processor) archive_reader = createArchiveReader(archive_object); if (!archive_reader->fileExists(path_in_archive)) continue; + else + current_file_info = archive_reader->getFileInfo(path_in_archive); } - - auto object_in_archive = std::make_shared(archive_object, path_in_archive, archive_reader); - - if (read_keys != nullptr) - read_keys->push_back(object_in_archive); - - return object_in_archive; + break; } + + auto object_in_archive + = std::make_shared(archive_object, path_in_archive, archive_reader, std::move(current_file_info)); + + if (read_keys != nullptr) + read_keys->push_back(object_in_archive); + + return object_in_archive; } size_t StorageObjectStorageSource::ArchiveIterator::estimatedKeysCount() diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index d93097d2636..271b38fa75c 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -75,7 +76,6 @@ protected: const ReadFromFormatInfo read_from_format_info; const std::shared_ptr create_reader_pool; - ColumnsDescription columns_desc; std::shared_ptr file_iterator; SchemaCache & schema_cache; bool initialized = false; @@ -116,13 +116,32 @@ protected: std::future reader_future; /// Recreate ReadBuffer and Pipeline for each file. - ReaderHolder createReader(size_t processor = 0); - std::future createReaderAsync(size_t processor = 0); - std::unique_ptr createReadBuffer(const ObjectInfo & object_info); + static ReaderHolder createReader( + size_t processor, + const std::shared_ptr & file_iterator, + const ConfigurationPtr & configuration, + const ObjectStoragePtr & object_storage, + const ReadFromFormatInfo & read_from_format_info, + const std::optional & format_settings, + const std::shared_ptr & key_condition_, + const ContextPtr & context_, + SchemaCache * schema_cache, + const LoggerPtr & log, + size_t max_block_size, + size_t max_parsing_threads, + bool need_only_count); + + ReaderHolder createReader(); + + std::future createReaderAsync(); + static std::unique_ptr createReadBuffer( + const ObjectInfo & object_info, + const ObjectStoragePtr & object_storage, + const ContextPtr & context_, + const LoggerPtr & log); void addNumRowsToCache(const ObjectInfo & object_info, size_t num_rows); - std::optional tryGetNumRowsFromCache(const ObjectInfo & object_info); - void lazyInitialize(size_t processor); + void lazyInitialize(); }; class StorageObjectStorageSource::IIterator @@ -259,7 +278,8 @@ public: ObjectInfoInArchive( ObjectInfoPtr archive_object_, const std::string & path_in_archive_, - std::shared_ptr archive_reader_); + std::shared_ptr archive_reader_, + IArchiveReader::FileInfo && file_info_); std::string getFileName() const override { @@ -278,9 +298,12 @@ public: bool isArchive() const override { return true; } + size_t fileSizeInArchive() const override { return file_info.uncompressed_size; } + const ObjectInfoPtr archive_object; const std::string path_in_archive; const std::shared_ptr archive_reader; + const IArchiveReader::FileInfo file_info; }; private: diff --git a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp index bf595b2f5d4..b5f4cf5bb54 100644 --- a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp @@ -1,9 +1,10 @@ +#include +#include #include #include #include #include #include -#include namespace DB { diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp index 52ee0c9f8ed..6fac519849d 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp @@ -62,6 +62,11 @@ void ObjectStorageQueueIFileMetadata::FileStatus::onFailed(const std::string & e last_exception = exception; } +void ObjectStorageQueueIFileMetadata::FileStatus::updateState(State state_) +{ + state = state_; +} + std::string ObjectStorageQueueIFileMetadata::FileStatus::getException() const { std::lock_guard lock(last_exception_mutex); @@ -224,9 +229,14 @@ bool ObjectStorageQueueIFileMetadata::setProcessing() auto [success, file_state] = setProcessingImpl(); if (success) + { file_status->onProcessing(); + } else + { + LOG_TEST(log, "Updating state of {} from {} to {}", path, file_status->state.load(), file_state); file_status->updateState(file_state); + } LOG_TEST(log, "File {} has state `{}`: will {}process (processing id version: {})", path, file_state, success ? "" : "not ", diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h index 652b4742389..9bab9e7f075 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h @@ -23,7 +23,7 @@ public: void onProcessing(); void onProcessed(); void onFailed(const std::string & exception); - void updateState(State state_) { state = state_; } + void updateState(State state_); std::string getException() const; @@ -34,7 +34,6 @@ public: std::atomic processing_start_time = 0; std::atomic processing_end_time = 0; std::atomic retries = 0; - ProfileEvents::Counters profile_counters; private: mutable std::mutex last_exception_mutex; diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.h index 05060931b5a..e5fae047ac5 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.h @@ -2,21 +2,19 @@ #include "config.h" #include -#include -#include #include -#include +#include #include #include #include #include +#include namespace fs = std::filesystem; namespace Poco { class Logger; } namespace DB { -struct ObjectStorageQueueSettings; class StorageObjectStorageQueue; struct ObjectStorageQueueTableMetadata; struct StorageInMemoryMetadata; diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp index 955e49bc2bf..4d921003e04 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp @@ -111,10 +111,12 @@ void ObjectStorageQueueSource::FileIterator::returnForRetry(Source::ObjectInfoPt if (metadata->useBucketsForProcessing()) { const auto bucket = metadata->getBucketForPath(object_info->relative_path); + std::lock_guard lock(mutex); listed_keys_cache[bucket].keys.emplace_front(object_info); } else { + std::lock_guard lock(mutex); objects_to_retry.push_back(object_info); } } @@ -357,41 +359,38 @@ ObjectStorageQueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t proc ObjectStorageQueueSource::ObjectStorageQueueSource( String name_, size_t processor_id_, - const Block & header_, - std::unique_ptr internal_source_, + std::shared_ptr file_iterator_, + ConfigurationPtr configuration_, + ObjectStoragePtr object_storage_, + const ReadFromFormatInfo & read_from_format_info_, + const std::optional & format_settings_, + const ObjectStorageQueueSettings & queue_settings_, std::shared_ptr files_metadata_, - const ObjectStorageQueueAction & action_, - RemoveFileFunc remove_file_func_, - const NamesAndTypesList & requested_virtual_columns_, ContextPtr context_, + size_t max_block_size_, const std::atomic & shutdown_called_, const std::atomic & table_is_being_dropped_, std::shared_ptr system_queue_log_, const StorageID & storage_id_, LoggerPtr log_, - size_t max_processed_files_before_commit_, - size_t max_processed_rows_before_commit_, - size_t max_processed_bytes_before_commit_, - size_t max_processing_time_sec_before_commit_, bool commit_once_processed_) - : ISource(header_) + : ISource(read_from_format_info_.source_header) , WithContext(context_) , name(std::move(name_)) , processor_id(processor_id_) - , action(action_) + , file_iterator(file_iterator_) + , configuration(configuration_) + , object_storage(object_storage_) + , read_from_format_info(read_from_format_info_) + , format_settings(format_settings_) + , queue_settings(queue_settings_) , files_metadata(files_metadata_) - , internal_source(std::move(internal_source_)) - , requested_virtual_columns(requested_virtual_columns_) + , max_block_size(max_block_size_) , shutdown_called(shutdown_called_) , table_is_being_dropped(table_is_being_dropped_) , system_queue_log(system_queue_log_) , storage_id(storage_id_) - , max_processed_files_before_commit(max_processed_files_before_commit_) - , max_processed_rows_before_commit(max_processed_rows_before_commit_) - , max_processed_bytes_before_commit(max_processed_bytes_before_commit_) - , max_processing_time_sec_before_commit(max_processing_time_sec_before_commit_) , commit_once_processed(commit_once_processed_) - , remove_file_func(remove_file_func_) , log(log_) { } @@ -401,21 +400,6 @@ String ObjectStorageQueueSource::getName() const return name; } -void ObjectStorageQueueSource::lazyInitialize(size_t processor) -{ - if (initialized) - return; - - LOG_TEST(log, "Initializing a new reader"); - - internal_source->lazyInitialize(processor); - reader = std::move(internal_source->reader); - if (reader) - reader_future = std::move(internal_source->reader_future); - - initialized = true; -} - Chunk ObjectStorageQueueSource::generate() { Chunk chunk; @@ -440,19 +424,33 @@ Chunk ObjectStorageQueueSource::generate() Chunk ObjectStorageQueueSource::generateImpl() { - lazyInitialize(processor_id); - while (true) { if (!reader) { - LOG_TEST(log, "No reader"); - break; + if (shutdown_called) + { + LOG_TEST(log, "Shutdown called"); + break; + } + + const auto context = getContext(); + reader = StorageObjectStorageSource::createReader( + processor_id, file_iterator, configuration, object_storage, read_from_format_info, + format_settings, nullptr, context, nullptr, log, max_block_size, + context->getSettingsRef().max_parsing_threads.value, /* need_only_count */false); + + if (!reader) + { + LOG_TEST(log, "No reader"); + break; + } } const auto * object_info = dynamic_cast(reader.getObjectInfo().get()); auto file_metadata = object_info->file_metadata; auto file_status = file_metadata->getFileStatus(); + const auto & path = reader.getObjectInfo()->getPath(); if (isCancelled()) { @@ -475,8 +473,6 @@ Chunk ObjectStorageQueueSource::generateImpl() break; } - const auto & path = reader.getObjectInfo()->getPath(); - if (shutdown_called) { LOG_TEST(log, "Shutdown called"); @@ -509,10 +505,6 @@ Chunk ObjectStorageQueueSource::generateImpl() path, processed_rows_from_file); } - auto * prev_scope = CurrentThread::get().attachProfileCountersScope(&file_status->profile_counters); - SCOPE_EXIT({ CurrentThread::get().attachProfileCountersScope(prev_scope); }); - /// FIXME: if files are compressed, profile counters update does not work fully (object storage related counters are not saved). Why? - try { auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::ObjectStorageQueuePullMicroseconds); @@ -528,7 +520,7 @@ Chunk ObjectStorageQueueSource::generateImpl() total_processed_bytes += chunk.bytes(); VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk( - chunk, requested_virtual_columns, + chunk, read_from_format_info.requested_virtual_columns, { .path = path, .size = reader.getObjectInfo()->metadata->size_bytes @@ -547,9 +539,6 @@ Chunk ObjectStorageQueueSource::generateImpl() if (processed_rows_from_file == 0) { - auto * file_iterator = dynamic_cast(internal_source->file_iterator.get()); - chassert(file_iterator); - if (file_status->retries < file_metadata->getMaxTries()) file_iterator->returnForRetry(reader.getObjectInfo()); @@ -564,11 +553,13 @@ Chunk ObjectStorageQueueSource::generateImpl() file_status->setProcessingEndTime(); file_status.reset(); + reader = {}; processed_rows_from_file = 0; processed_files.push_back(file_metadata); - if (processed_files.size() == max_processed_files_before_commit) + if (queue_settings.max_processed_files_before_commit + && processed_files.size() == queue_settings.max_processed_files_before_commit) { LOG_TRACE(log, "Number of max processed files before commit reached " "(rows: {}, bytes: {}, files: {})", @@ -576,68 +567,30 @@ Chunk ObjectStorageQueueSource::generateImpl() break; } - bool rows_or_bytes_or_time_limit_reached = false; - if (max_processed_rows_before_commit - && total_processed_rows == max_processed_rows_before_commit) + if (queue_settings.max_processed_rows_before_commit + && total_processed_rows == queue_settings.max_processed_rows_before_commit) { LOG_TRACE(log, "Number of max processed rows before commit reached " "(rows: {}, bytes: {}, files: {})", total_processed_rows, total_processed_bytes, processed_files.size()); - - rows_or_bytes_or_time_limit_reached = true; + break; } - else if (max_processed_bytes_before_commit - && total_processed_bytes == max_processed_bytes_before_commit) + else if (queue_settings.max_processed_bytes_before_commit + && total_processed_bytes == queue_settings.max_processed_bytes_before_commit) { LOG_TRACE(log, "Number of max processed bytes before commit reached " "(rows: {}, bytes: {}, files: {})", total_processed_rows, total_processed_bytes, processed_files.size()); - - rows_or_bytes_or_time_limit_reached = true; + break; } - else if (max_processing_time_sec_before_commit - && total_stopwatch.elapsedSeconds() >= max_processing_time_sec_before_commit) + else if (queue_settings.max_processing_time_sec_before_commit + && total_stopwatch.elapsedSeconds() >= queue_settings.max_processing_time_sec_before_commit) { LOG_TRACE(log, "Max processing time before commit reached " "(rows: {}, bytes: {}, files: {})", total_processed_rows, total_processed_bytes, processed_files.size()); - - rows_or_bytes_or_time_limit_reached = true; - } - - if (rows_or_bytes_or_time_limit_reached) - { - if (!reader_future.valid()) - break; - - LOG_TRACE(log, "Rows or bytes limit reached, but we have one more file scheduled already, " - "will process it despite the limit"); - } - - if (shutdown_called) - { - LOG_TRACE(log, "Shutdown was called, stopping sync"); break; } - - chassert(reader_future.valid()); - reader = reader_future.get(); - - if (!reader) - { - LOG_TEST(log, "Reader finished"); - break; - } - - file_status = files_metadata->getFileStatus(reader.getObjectInfo()->getPath()); - - if (!rows_or_bytes_or_time_limit_reached && processed_files.size() + 1 < max_processed_files_before_commit) - { - /// Even if task is finished the thread may be not freed in pool. - /// So wait until it will be freed before scheduling a new task. - internal_source->create_reader_pool->wait(); - reader_future = internal_source->createReaderAsync(processor_id); - } } return {}; @@ -681,12 +634,11 @@ void ObjectStorageQueueSource::commit(bool success, const std::string & exceptio void ObjectStorageQueueSource::applyActionAfterProcessing(const String & path) { - switch (action) + switch (queue_settings.after_processing.value) { case ObjectStorageQueueAction::DELETE: { - assert(remove_file_func); - remove_file_func(path); + object_storage->removeObject(StoredObject(path)); break; } case ObjectStorageQueueAction::KEEP: @@ -714,7 +666,6 @@ void ObjectStorageQueueSource::appendLogElement( .file_name = filename, .rows_processed = processed_rows, .status = processed ? ObjectStorageQueueLogElement::ObjectStorageQueueStatus::Processed : ObjectStorageQueueLogElement::ObjectStorageQueueStatus::Failed, - .counters_snapshot = file_status_.profile_counters.getPartiallyAtomicSnapshot(), .processing_start_time = file_status_.processing_start_time, .processing_end_time = file_status_.processing_end_time, .exception = file_status_.getException(), diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h index ccd87e8a269..0f3d0ab2e92 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h @@ -21,7 +21,6 @@ class ObjectStorageQueueSource : public ISource, WithContext public: using Storage = StorageObjectStorage; using Source = StorageObjectStorageSource; - using RemoveFileFunc = std::function; using BucketHolderPtr = ObjectStorageQueueOrderedFileMetadata::BucketHolderPtr; using BucketHolder = ObjectStorageQueueOrderedFileMetadata::BucketHolder; @@ -97,22 +96,20 @@ public: ObjectStorageQueueSource( String name_, size_t processor_id_, - const Block & header_, - std::unique_ptr internal_source_, + std::shared_ptr file_iterator_, + ConfigurationPtr configuration_, + ObjectStoragePtr object_storage_, + const ReadFromFormatInfo & read_from_format_info_, + const std::optional & format_settings_, + const ObjectStorageQueueSettings & queue_settings_, std::shared_ptr files_metadata_, - const ObjectStorageQueueAction & action_, - RemoveFileFunc remove_file_func_, - const NamesAndTypesList & requested_virtual_columns_, ContextPtr context_, + size_t max_block_size_, const std::atomic & shutdown_called_, const std::atomic & table_is_being_dropped_, std::shared_ptr system_queue_log_, const StorageID & storage_id_, LoggerPtr log_, - size_t max_processed_files_before_commit_, - size_t max_processed_rows_before_commit_, - size_t max_processed_bytes_before_commit_, - size_t max_processing_time_sec_before_commit_, bool commit_once_processed_); static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); @@ -128,29 +125,27 @@ public: private: const String name; const size_t processor_id; - const ObjectStorageQueueAction action; + const std::shared_ptr file_iterator; + const ConfigurationPtr configuration; + const ObjectStoragePtr object_storage; + const ReadFromFormatInfo read_from_format_info; + const std::optional format_settings; + const ObjectStorageQueueSettings queue_settings; const std::shared_ptr files_metadata; - const std::shared_ptr internal_source; - const NamesAndTypesList requested_virtual_columns; + const size_t max_block_size; + const std::atomic & shutdown_called; const std::atomic & table_is_being_dropped; const std::shared_ptr system_queue_log; const StorageID storage_id; - const size_t max_processed_files_before_commit; - const size_t max_processed_rows_before_commit; - const size_t max_processed_bytes_before_commit; - const size_t max_processing_time_sec_before_commit; const bool commit_once_processed; - RemoveFileFunc remove_file_func; LoggerPtr log; std::vector processed_files; std::vector failed_during_read_files; Source::ReaderHolder reader; - std::future reader_future; - std::atomic initialized{false}; size_t processed_rows_from_file = 0; size_t total_processed_rows = 0; @@ -165,8 +160,6 @@ private: ObjectStorageQueueMetadata::FileStatus & file_status_, size_t processed_rows, bool processed); - - void lazyInitialize(size_t processor); }; } diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp index 95265cde9ea..4388864434e 100644 --- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp @@ -352,43 +352,14 @@ std::shared_ptr StorageObjectStorageQueue::createSourc ContextPtr local_context, bool commit_once_processed) { - auto internal_source = std::make_unique( - getName(), - object_storage, - configuration, - info, - format_settings, - local_context, - max_block_size, - file_iterator, - local_context->getSettingsRef().max_download_threads, - false); - - auto file_deleter = [=, this](const std::string & path) mutable - { - object_storage->removeObject(StoredObject(path)); - }; - return std::make_shared( - getName(), - processor_id, - info.source_header, - std::move(internal_source), - files_metadata, - queue_settings->after_processing, - file_deleter, - info.requested_virtual_columns, - local_context, - shutdown_called, - table_is_being_dropped, + getName(), processor_id, + file_iterator, configuration, object_storage, + info, format_settings, + *queue_settings, files_metadata, + local_context, max_block_size, shutdown_called, table_is_being_dropped, getQueueLog(object_storage, local_context, *queue_settings), - getStorageID(), - log, - queue_settings->max_processed_files_before_commit, - queue_settings->max_processed_rows_before_commit, - queue_settings->max_processed_bytes_before_commit, - queue_settings->max_processing_time_sec_before_commit, - commit_once_processed); + getStorageID(), log, commit_once_processed); } bool StorageObjectStorageQueue::hasDependencies(const StorageID & table_id) diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.h b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.h index 758721674fe..fc459c45f74 100644 --- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.h +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.h @@ -79,6 +79,7 @@ private: void drop() override; bool supportsSubsetOfColumns(const ContextPtr & context_) const; bool supportsSubcolumns() const override { return true; } + bool supportsOptimizationToSubcolumns() const override { return false; } bool supportsDynamicSubcolumns() const override { return true; } std::shared_ptr createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate); diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index f686fbda664..b9edff39b82 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -592,7 +592,8 @@ void registerStorageMaterializedPostgreSQL(StorageFactory & factory) auto configuration = StoragePostgreSQL::getConfiguration(args.engine_args, args.getContext()); auto connection_info = postgres::formatConnectionString( configuration.database, configuration.host, configuration.port, - configuration.username, configuration.password); + configuration.username, configuration.password, + args.getContext()->getSettingsRef().postgresql_connection_attempt_timeout); bool has_settings = args.storage_def->settings; auto postgresql_replication_settings = std::make_unique(); diff --git a/src/Storages/ReadFinalForExternalReplicaStorage.cpp b/src/Storages/ReadFinalForExternalReplicaStorage.cpp index e1d52eefc20..5070b4a17d8 100644 --- a/src/Storages/ReadFinalForExternalReplicaStorage.cpp +++ b/src/Storages/ReadFinalForExternalReplicaStorage.cpp @@ -2,6 +2,7 @@ #if USE_MYSQL || USE_LIBPQXX +#include #include #include #include diff --git a/src/Storages/ReadInOrderOptimizer.cpp b/src/Storages/ReadInOrderOptimizer.cpp index e0ef967d491..9c8c4c2fe79 100644 --- a/src/Storages/ReadInOrderOptimizer.cpp +++ b/src/Storages/ReadInOrderOptimizer.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp index 90792c59d38..b53f694bc5a 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include #include diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index b9d3e071b6c..365915cfd68 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/RocksDB/StorageSystemRocksDB.cpp b/src/Storages/RocksDB/StorageSystemRocksDB.cpp index 5105b190fd9..744d43e1c77 100644 --- a/src/Storages/RocksDB/StorageSystemRocksDB.cpp +++ b/src/Storages/RocksDB/StorageSystemRocksDB.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index bdf69b9be15..848aeb59dad 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -166,7 +166,7 @@ struct SelectQueryInfo /// It's guaranteed to be present in JOIN TREE of `query_tree` QueryTreeNodePtr table_expression; - bool analyzer_can_use_parallel_replicas_on_follower = false; + bool current_table_chosen_for_reading_with_parallel_replicas = false; /// Table expression modifiers for storage std::optional table_expression_modifiers; diff --git a/src/Storages/SetSettings.cpp b/src/Storages/SetSettings.cpp index baa3d231067..4e6dd6a0519 100644 --- a/src/Storages/SetSettings.cpp +++ b/src/Storages/SetSettings.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp index fed0bd61c03..28e75c6d244 100644 --- a/src/Storages/Statistics/Statistics.cpp +++ b/src/Storages/Statistics/Statistics.cpp @@ -1,15 +1,14 @@ -#include -#include - #include #include -#include -#include +#include +#include #include #include #include #include #include +#include + namespace DB { @@ -20,32 +19,57 @@ namespace ErrorCodes extern const int INCORRECT_QUERY; } -/// Version / bitmask of statistics / data of statistics / enum StatisticsFileVersion : UInt16 { V0 = 0, }; -IStatistics::IStatistics(const SingleStatisticsDescription & stat_) : stat(stat_) {} +IStatistics::IStatistics(const SingleStatisticsDescription & stat_) + : stat(stat_) +{ +} ColumnStatistics::ColumnStatistics(const ColumnStatisticsDescription & stats_desc_) - : stats_desc(stats_desc_), rows(0) + : stats_desc(stats_desc_) { } void ColumnStatistics::update(const ColumnPtr & column) { rows += column->size(); - for (const auto & iter : stats) - { - iter.second->update(column); - } + for (const auto & stat : stats) + stat.second->update(column); } +UInt64 IStatistics::estimateCardinality() const +{ + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cardinality estimation is not implemented for this type of statistics"); +} + +Float64 IStatistics::estimateEqual(Float64 /*val*/) const +{ + throw Exception(ErrorCodes::LOGICAL_ERROR, "Equality estimation is not implemented for this type of statistics"); +} + +Float64 IStatistics::estimateLess(Float64 /*val*/) const +{ + throw Exception(ErrorCodes::LOGICAL_ERROR, "Less-than estimation is not implemented for this type of statistics"); +} + +/// ------------------------------------- +/// Implementation of the estimation: +/// Note: Each statistics object supports certain types predicates natively, e.g. +/// - TDigest: '< X' (less-than predicates) +/// - Count-min sketches: '= X' (equal predicates) +/// - Uniq (HyperLogLog): 'count distinct(*)' (column cardinality) +/// If multiple statistics objects are available per column, it is sometimes also possible to combine them in a clever way. +/// For that reason, all estimation are performed in a central place (here), and we don't simply pass the predicate to the first statistics +/// object that supports it natively. + Float64 ColumnStatistics::estimateLess(Float64 val) const { if (stats.contains(StatisticsType::TDigest)) - return std::static_pointer_cast(stats.at(StatisticsType::TDigest))->estimateLess(val); + return stats.at(StatisticsType::TDigest)->estimateLess(val); return rows * ConditionSelectivityEstimator::default_normal_cond_factor; } @@ -58,14 +82,9 @@ Float64 ColumnStatistics::estimateEqual(Float64 val) const { if (stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest)) { - auto uniq_static = std::static_pointer_cast(stats.at(StatisticsType::Uniq)); - /// 2048 is the default number of buckets in TDigest. In this case, TDigest stores exactly one value (with many rows) - /// for every bucket. - if (uniq_static->getCardinality() < 2048) - { - auto tdigest_static = std::static_pointer_cast(stats.at(StatisticsType::TDigest)); - return tdigest_static->estimateEqual(val); - } + /// 2048 is the default number of buckets in TDigest. In this case, TDigest stores exactly one value (with many rows) for every bucket. + if (stats.at(StatisticsType::Uniq)->estimateCardinality() < 2048) + return stats.at(StatisticsType::TDigest)->estimateEqual(val); } if (val < - ConditionSelectivityEstimator::threshold || val > ConditionSelectivityEstimator::threshold) return rows * ConditionSelectivityEstimator::default_normal_cond_factor; @@ -73,17 +92,22 @@ Float64 ColumnStatistics::estimateEqual(Float64 val) const return rows * ConditionSelectivityEstimator::default_good_cond_factor; } +/// ------------------------------------- + void ColumnStatistics::serialize(WriteBuffer & buf) { writeIntBinary(V0, buf); + UInt64 stat_types_mask = 0; for (const auto & [type, _]: stats) stat_types_mask |= 1 << UInt8(type); writeIntBinary(stat_types_mask, buf); - /// We write some basic statistics + + /// as the column row count is always useful, save it in any case writeIntBinary(rows, buf); - /// We write complex statistics - for (const auto & [type, stat_ptr]: stats) + + /// write the actual statistics object + for (const auto & [type, stat_ptr] : stats) stat_ptr->serialize(buf); } @@ -96,7 +120,9 @@ void ColumnStatistics::deserialize(ReadBuffer &buf) UInt64 stat_types_mask = 0; readIntBinary(stat_types_mask, buf); + readIntBinary(rows, buf); + for (auto it = stats.begin(); it != stats.end();) { if (!(stat_types_mask & 1 << UInt8(it->first))) @@ -136,15 +162,15 @@ void MergeTreeStatisticsFactory::registerValidator(StatisticsType stats_type, Va { if (!validators.emplace(stats_type, std::move(validator)).second) throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeStatisticsFactory: the statistics validator type {} is not unique", stats_type); - } MergeTreeStatisticsFactory::MergeTreeStatisticsFactory() { - registerCreator(StatisticsType::TDigest, TDigestCreator); - registerCreator(StatisticsType::Uniq, UniqCreator); registerValidator(StatisticsType::TDigest, TDigestValidator); + registerCreator(StatisticsType::TDigest, TDigestCreator); + registerValidator(StatisticsType::Uniq, UniqValidator); + registerCreator(StatisticsType::Uniq, UniqCreator); } MergeTreeStatisticsFactory & MergeTreeStatisticsFactory::instance() @@ -159,9 +185,7 @@ void MergeTreeStatisticsFactory::validate(const ColumnStatisticsDescription & st { auto it = validators.find(type); if (it == validators.end()) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown Statistic type '{}'", type); - } + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown statistic type '{}'", type); it->second(desc, data_type); } } @@ -173,10 +197,7 @@ ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnStatisticsDescri { auto it = creators.find(type); if (it == creators.end()) - { - throw Exception(ErrorCodes::INCORRECT_QUERY, - "Unknown Statistic type '{}'. Available types: tdigest, uniq", type); - } + throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq'", type); auto stat_ptr = (it->second)(desc, stats.data_type); column_stat->stats[type] = stat_ptr; } diff --git a/src/Storages/Statistics/Statistics.h b/src/Storages/Statistics/Statistics.h index 2ab1337af02..d4364075d1c 100644 --- a/src/Storages/Statistics/Statistics.h +++ b/src/Storages/Statistics/Statistics.h @@ -1,19 +1,15 @@ #pragma once -#include -#include - #include -#include #include #include #include +#include namespace DB { -/// this is for user-defined statistic. constexpr auto STATS_FILE_PREFIX = "statistics_"; constexpr auto STATS_FILE_SUFFIX = ".stats"; @@ -25,14 +21,21 @@ class IStatistics { public: explicit IStatistics(const SingleStatisticsDescription & stat_); - virtual ~IStatistics() = default; - virtual void serialize(WriteBuffer & buf) = 0; + virtual void update(const ColumnPtr & column) = 0; + virtual void serialize(WriteBuffer & buf) = 0; virtual void deserialize(ReadBuffer & buf) = 0; - virtual void update(const ColumnPtr & column) = 0; + /// Estimate the cardinality of the column. + /// Throws if the statistics object is not able to do a meaningful estimation. + virtual UInt64 estimateCardinality() const; + + /// Per-value estimations. + /// Throws if the statistics object is not able to do a meaningful estimation. + virtual Float64 estimateEqual(Float64 val) const; /// cardinality of val in the column + virtual Float64 estimateLess(Float64 val) const; /// summarized cardinality of values < val in the column protected: SingleStatisticsDescription stat; @@ -43,11 +46,12 @@ using StatisticsPtr = std::shared_ptr; class ColumnStatistics { public: - explicit ColumnStatistics(const ColumnStatisticsDescription & stats_); + explicit ColumnStatistics(const ColumnStatisticsDescription & stats_desc_); + void serialize(WriteBuffer & buf); void deserialize(ReadBuffer & buf); - String getFileName() const; + String getFileName() const; const String & columnName() const; UInt64 rowCount() const; @@ -55,17 +59,14 @@ public: void update(const ColumnPtr & column); Float64 estimateLess(Float64 val) const; - Float64 estimateGreater(Float64 val) const; - Float64 estimateEqual(Float64 val) const; private: - friend class MergeTreeStatisticsFactory; ColumnStatisticsDescription stats_desc; std::map stats; - UInt64 rows; /// the number of rows of the column + UInt64 rows = 0; /// the number of rows in the column }; class ColumnsDescription; @@ -79,25 +80,23 @@ public: void validate(const ColumnStatisticsDescription & stats, DataTypePtr data_type) const; + using Validator = std::function; using Creator = std::function; - using Validator = std::function; - ColumnStatisticsPtr get(const ColumnStatisticsDescription & stats) const; - ColumnsStatistics getMany(const ColumnsDescription & columns) const; - void registerCreator(StatisticsType type, Creator creator); void registerValidator(StatisticsType type, Validator validator); + void registerCreator(StatisticsType type, Creator creator); protected: MergeTreeStatisticsFactory(); private: - using Creators = std::unordered_map; using Validators = std::unordered_map; - Creators creators; + using Creators = std::unordered_map; Validators validators; + Creators creators; }; } diff --git a/src/Storages/Statistics/StatisticsTDigest.cpp b/src/Storages/Statistics/StatisticsTDigest.cpp new file mode 100644 index 00000000000..0747197370c --- /dev/null +++ b/src/Storages/Statistics/StatisticsTDigest.cpp @@ -0,0 +1,60 @@ +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_STATISTICS; +} + +StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & stat_) + : IStatistics(stat_) +{ +} + +void StatisticsTDigest::update(const ColumnPtr & column) +{ + size_t rows = column->size(); + + for (size_t row = 0; row < rows; ++row) + { + /// TODO: support more types. + Float64 value = column->getFloat64(row); + t_digest.add(value, 1); + } +} + +void StatisticsTDigest::serialize(WriteBuffer & buf) +{ + t_digest.serialize(buf); +} + +void StatisticsTDigest::deserialize(ReadBuffer & buf) +{ + t_digest.deserialize(buf); +} + +Float64 StatisticsTDigest::estimateLess(Float64 val) const +{ + return t_digest.getCountLessThan(val); +} + +Float64 StatisticsTDigest::estimateEqual(Float64 val) const +{ + return t_digest.getCountEqual(val); +} + +void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type) +{ + data_type = removeNullable(data_type); + if (!data_type->isValueRepresentedByNumber()) + throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' do not support type {}", data_type->getName()); +} + +StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr) +{ + return std::make_shared(stat); +} + +} diff --git a/src/Storages/Statistics/TDigestStatistics.h b/src/Storages/Statistics/StatisticsTDigest.h similarity index 63% rename from src/Storages/Statistics/TDigestStatistics.h rename to src/Storages/Statistics/StatisticsTDigest.h index 7c361b8751f..d3a3bf115ee 100644 --- a/src/Storages/Statistics/TDigestStatistics.h +++ b/src/Storages/Statistics/StatisticsTDigest.h @@ -6,27 +6,24 @@ namespace DB { - -/// TDigestStatistic is a kind of histogram. -class TDigestStatistics : public IStatistics +class StatisticsTDigest : public IStatistics { public: - explicit TDigestStatistics(const SingleStatisticsDescription & stat_); - - Float64 estimateLess(Float64 val) const; - - Float64 estimateEqual(Float64 val) const; - - void serialize(WriteBuffer & buf) override; - - void deserialize(ReadBuffer & buf) override; + explicit StatisticsTDigest(const SingleStatisticsDescription & stat_); void update(const ColumnPtr & column) override; + + void serialize(WriteBuffer & buf) override; + void deserialize(ReadBuffer & buf) override; + + Float64 estimateLess(Float64 val) const override; + Float64 estimateEqual(Float64 val) const override; + private: - QuantileTDigest data; + QuantileTDigest t_digest; }; -StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr); void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type); +StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr); } diff --git a/src/Storages/Statistics/UniqStatistics.cpp b/src/Storages/Statistics/StatisticsUniq.cpp similarity index 76% rename from src/Storages/Statistics/UniqStatistics.cpp rename to src/Storages/Statistics/StatisticsUniq.cpp index fc748e769ca..bf9a40ea8cb 100644 --- a/src/Storages/Statistics/UniqStatistics.cpp +++ b/src/Storages/Statistics/StatisticsUniq.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -10,7 +10,7 @@ namespace ErrorCodes extern const int ILLEGAL_STATISTICS; } -UniqStatistics::UniqStatistics(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type) +StatisticsUniq::StatisticsUniq(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type) : IStatistics(stat_) { arena = std::make_unique(); @@ -20,29 +20,12 @@ UniqStatistics::UniqStatistics(const SingleStatisticsDescription & stat_, const collector->create(data); } -UniqStatistics::~UniqStatistics() +StatisticsUniq::~StatisticsUniq() { collector->destroy(data); } -UInt64 UniqStatistics::getCardinality() -{ - auto column = DataTypeUInt64().createColumn(); - collector->insertResultInto(data, *column, nullptr); - return column->getUInt(0); -} - -void UniqStatistics::serialize(WriteBuffer & buf) -{ - collector->serialize(data, buf); -} - -void UniqStatistics::deserialize(ReadBuffer & buf) -{ - collector->deserialize(data, buf); -} - -void UniqStatistics::update(const ColumnPtr & column) +void StatisticsUniq::update(const ColumnPtr & column) { /// TODO(hanfei): For low cardinality, it's very slow to convert to full column. We can read the dictionary directly. /// Here we intend to avoid crash in CI. @@ -51,16 +34,33 @@ void UniqStatistics::update(const ColumnPtr & column) collector->addBatchSinglePlace(0, column->size(), data, &(raw_ptr), nullptr); } +void StatisticsUniq::serialize(WriteBuffer & buf) +{ + collector->serialize(data, buf); +} + +void StatisticsUniq::deserialize(ReadBuffer & buf) +{ + collector->deserialize(data, buf); +} + +UInt64 StatisticsUniq::estimateCardinality() const +{ + auto column = DataTypeUInt64().createColumn(); + collector->insertResultInto(data, *column, nullptr); + return column->getUInt(0); +} + void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type) { data_type = removeNullable(data_type); if (!data_type->isValueRepresentedByNumber()) - throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' does not support type {}", data_type->getName()); + throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' do not support type {}", data_type->getName()); } StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) { - return std::make_shared(stat, data_type); + return std::make_shared(stat, data_type); } } diff --git a/src/Storages/Statistics/UniqStatistics.h b/src/Storages/Statistics/StatisticsUniq.h similarity index 77% rename from src/Storages/Statistics/UniqStatistics.h rename to src/Storages/Statistics/StatisticsUniq.h index 0d86a6e458a..5290585bd94 100644 --- a/src/Storages/Statistics/UniqStatistics.h +++ b/src/Storages/Statistics/StatisticsUniq.h @@ -7,30 +7,27 @@ namespace DB { -class UniqStatistics : public IStatistics +class StatisticsUniq : public IStatistics { public: - UniqStatistics(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type); - - ~UniqStatistics() override; - - UInt64 getCardinality(); - - void serialize(WriteBuffer & buf) override; - - void deserialize(ReadBuffer & buf) override; + StatisticsUniq(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type); + ~StatisticsUniq() override; void update(const ColumnPtr & column) override; -private: + void serialize(WriteBuffer & buf) override; + void deserialize(ReadBuffer & buf) override; + UInt64 estimateCardinality() const override; + +private: std::unique_ptr arena; AggregateFunctionPtr collector; AggregateDataPtr data; }; -StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type); void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type); +StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type); } diff --git a/src/Storages/Statistics/TDigestStatistics.cpp b/src/Storages/Statistics/TDigestStatistics.cpp deleted file mode 100644 index aa5662c979d..00000000000 --- a/src/Storages/Statistics/TDigestStatistics.cpp +++ /dev/null @@ -1,60 +0,0 @@ -#include -#include - -namespace DB -{ -namespace ErrorCodes -{ - extern const int ILLEGAL_STATISTICS; -} - -TDigestStatistics::TDigestStatistics(const SingleStatisticsDescription & stat_): - IStatistics(stat_) -{ -} - -Float64 TDigestStatistics::estimateLess(Float64 val) const -{ - return data.getCountLessThan(val); -} - -Float64 TDigestStatistics::estimateEqual(Float64 val) const -{ - return data.getCountEqual(val); -} - -void TDigestStatistics::serialize(WriteBuffer & buf) -{ - data.serialize(buf); -} - -void TDigestStatistics::deserialize(ReadBuffer & buf) -{ - data.deserialize(buf); -} - -void TDigestStatistics::update(const ColumnPtr & column) -{ - size_t size = column->size(); - - for (size_t i = 0; i < size; ++i) - { - /// TODO: support more types. - Float64 value = column->getFloat64(i); - data.add(value, 1); - } -} - -StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr) -{ - return std::make_shared(stat); -} - -void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type) -{ - data_type = removeNullable(data_type); - if (!data_type->isValueRepresentedByNumber()) - throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' does not support type {}", data_type->getName()); -} - -} diff --git a/src/Storages/Statistics/tests/gtest_stats.cpp b/src/Storages/Statistics/tests/gtest_stats.cpp index f94f310be56..c3c14632ba1 100644 --- a/src/Storages/Statistics/tests/gtest_stats.cpp +++ b/src/Storages/Statistics/tests/gtest_stats.cpp @@ -1,6 +1,6 @@ #include -#include +#include TEST(Statistics, TDigestLessThan) { diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index a3f6b6afc5d..c403287c43c 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -38,6 +38,7 @@ #include #include #include +#include namespace ProfileEvents diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 849fa5dbe0b..2cf3ced2904 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -426,7 +426,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage( query_info.cluster = cluster; - if (!local_context->canUseParallelReplicasCustomKey(*cluster)) + if (!local_context->canUseParallelReplicasCustomKeyForCluster(*cluster)) { if (nodes > 1 && settings.optimize_skip_unused_shards) { @@ -839,7 +839,9 @@ void StorageDistributed::read( SelectQueryInfo modified_query_info = query_info; - if (local_context->getSettingsRef().allow_experimental_analyzer) + const auto & settings = local_context->getSettingsRef(); + + if (settings.allow_experimental_analyzer) { StorageID remote_storage_id = StorageID::createEmpty(); if (!remote_table_function_ptr) @@ -864,7 +866,7 @@ void StorageDistributed::read( header = InterpreterSelectQuery(modified_query_info.query, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); } - if (!local_context->getSettingsRef().allow_experimental_analyzer) + if (!settings.allow_experimental_analyzer) { modified_query_info.query = ClusterProxy::rewriteSelectQuery( local_context, modified_query_info.query, @@ -874,7 +876,7 @@ void StorageDistributed::read( /// Return directly (with correct header) if no shard to query. if (modified_query_info.getCluster()->getShardsInfo().empty()) { - if (local_context->getSettingsRef().allow_experimental_analyzer) + if (settings.allow_experimental_analyzer) return; Pipe pipe(std::make_shared(header)); @@ -893,27 +895,8 @@ void StorageDistributed::read( storage_snapshot, processed_stage); - const auto & settings = local_context->getSettingsRef(); - - ClusterProxy::AdditionalShardFilterGenerator additional_shard_filter_generator; - if (local_context->canUseParallelReplicasCustomKey(*modified_query_info.getCluster())) - { - if (auto custom_key_ast = parseCustomKeyForTable(settings.parallel_replicas_custom_key, *local_context)) - { - additional_shard_filter_generator = - [my_custom_key_ast = std::move(custom_key_ast), - column_description = this->getInMemoryMetadataPtr()->columns, - custom_key_type = settings.parallel_replicas_custom_key_filter_type.value, - custom_key_range_lower = settings.parallel_replicas_custom_key_range_lower.value, - custom_key_range_upper = settings.parallel_replicas_custom_key_range_upper.value, - context = local_context, - replica_count = modified_query_info.getCluster()->getShardsInfo().front().per_replica_pools.size()](uint64_t replica_num) -> ASTPtr - { - return getCustomKeyFilterForParallelReplica( - replica_count, replica_num - 1, my_custom_key_ast, {custom_key_type, custom_key_range_lower, custom_key_range_upper}, column_description, context); - }; - } - } + auto shard_filter_generator = ClusterProxy::getShardFilterGeneratorForCustomKey( + *modified_query_info.getCluster(), local_context, getInMemoryMetadataPtr()->columns); ClusterProxy::executeQuery( query_plan, @@ -928,7 +911,7 @@ void StorageDistributed::read( sharding_key_expr, sharding_key_column_name, distributed_settings, - additional_shard_filter_generator, + shard_filter_generator, /* is_remote_function= */ static_cast(owned_cluster)); /// This is a bug, it is possible only when there is no shards to query, and this is handled earlier. diff --git a/src/Storages/StorageExecutable.cpp b/src/Storages/StorageExecutable.cpp index e475211deb3..381c20c616d 100644 --- a/src/Storages/StorageExecutable.cpp +++ b/src/Storages/StorageExecutable.cpp @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -147,7 +148,7 @@ void StorageExecutable::read( for (auto & input_query : input_queries) { QueryPipelineBuilder builder; - if (context->getSettings().allow_experimental_analyzer) + if (context->getSettingsRef().allow_experimental_analyzer) builder = InterpreterSelectQueryAnalyzer(input_query, context, {}).buildQueryPipeline(); else builder = InterpreterSelectWithUnionQuery(input_query, context, {}).buildQueryPipeline(); @@ -225,7 +226,7 @@ void registerStorageExecutable(StorageFactory & factory) { size_t max_command_execution_time = 10; - size_t max_execution_time_seconds = static_cast(args.getContext()->getSettings().max_execution_time.totalSeconds()); + size_t max_execution_time_seconds = static_cast(args.getContext()->getSettingsRef().max_execution_time.totalSeconds()); if (max_execution_time_seconds != 0 && max_command_execution_time > max_execution_time_seconds) max_command_execution_time = max_execution_time_seconds; diff --git a/src/Storages/StorageExternalDistributed.cpp b/src/Storages/StorageExternalDistributed.cpp index beb93afc972..951c87807bb 100644 --- a/src/Storages/StorageExternalDistributed.cpp +++ b/src/Storages/StorageExternalDistributed.cpp @@ -1,6 +1,6 @@ #include "StorageExternalDistributed.h" - +#include #include #include #include @@ -167,8 +167,9 @@ void registerStorageExternalDistributed(StorageFactory & factory) current_configuration, settings.postgresql_connection_pool_size, settings.postgresql_connection_pool_wait_timeout, - POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, - settings.postgresql_connection_pool_auto_close_connection); + settings.postgresql_connection_pool_retries, + settings.postgresql_connection_pool_auto_close_connection, + settings.postgresql_connection_attempt_timeout); shards.insert(std::make_shared( args.table_id, std::move(pool), configuration.table, args.columns, args.constraints, String{}, context)); } diff --git a/src/Storages/StorageFactory.cpp b/src/Storages/StorageFactory.cpp index 9d12a1569d8..060b271d8f4 100644 --- a/src/Storages/StorageFactory.cpp +++ b/src/Storages/StorageFactory.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 7f39ff615f0..8301bd6676d 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -53,6 +53,8 @@ #include #include +#include + #include #include @@ -366,12 +368,21 @@ Strings StorageFile::getPathsList(const String & table_path, const String & user } else if (path.find_first_of("*?{") == std::string::npos) { - std::error_code error; - size_t size = fs::file_size(path, error); - if (!error) - total_bytes_to_read += size; + if (!fs::is_directory(path)) + { + std::error_code error; + size_t size = fs::file_size(path, error); + if (!error) + total_bytes_to_read += size; - paths.push_back(path); + paths.push_back(path); + } + else + { + /// We list non-directory files under that directory. + paths = listFilesWithRegexpMatching(path / fs::path("*"), total_bytes_to_read); + can_be_directory = false; + } } else { diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index ac094aeb489..f955889185c 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -89,6 +89,7 @@ public: bool supportsSubsetOfColumns(const ContextPtr & context) const; bool supportsSubcolumns() const override { return true; } + bool supportsOptimizationToSubcolumns() const override { return false; } bool supportsDynamicSubcolumns() const override { return true; } diff --git a/src/Storages/StorageFileCluster.h b/src/Storages/StorageFileCluster.h index f5a4362901e..9549f3a035c 100644 --- a/src/Storages/StorageFileCluster.h +++ b/src/Storages/StorageFileCluster.h @@ -27,15 +27,8 @@ public: const ConstraintsDescription & constraints_); std::string getName() const override { return "FileCluster"; } - RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; - bool supportsSubcolumns() const override { return true; } - - bool supportsDynamicSubcolumns() const override { return true; } - - bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } - private: void updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) override; diff --git a/src/Storages/StorageFromMergeTreeDataPart.cpp b/src/Storages/StorageFromMergeTreeDataPart.cpp new file mode 100644 index 00000000000..481d9e66901 --- /dev/null +++ b/src/Storages/StorageFromMergeTreeDataPart.cpp @@ -0,0 +1,59 @@ +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int LOGICAL_ERROR; +} + +bool StorageFromMergeTreeDataPart::materializeTTLRecalculateOnly() const +{ + if (parts.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "parts must not be empty for materializeTTLRecalculateOnly"); + return parts.front()->storage.getSettings()->materialize_ttl_recalculate_only; +} + +void StorageFromMergeTreeDataPart::read( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum /*processed_stage*/, + size_t max_block_size, + size_t num_streams) +{ + query_plan.addStep(MergeTreeDataSelectExecutor(storage).readFromParts( + parts, + alter_conversions, + column_names, + storage_snapshot, + query_info, + context, + max_block_size, + num_streams, + nullptr, + analysis_result_ptr)); +} + +StorageSnapshotPtr +StorageFromMergeTreeDataPart::getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr /*query_context*/) const +{ + const auto & storage_columns = metadata_snapshot->getColumns(); + if (!hasDynamicSubcolumns(storage_columns)) + return std::make_shared(*this, metadata_snapshot); + + auto data_parts = storage.getDataPartsVectorForInternalUsage(); + + auto object_columns = getConcreteObjectColumns( + data_parts.begin(), data_parts.end(), storage_columns, [](const auto & part) -> const auto & { return part->getColumns(); }); + + return std::make_shared(*this, metadata_snapshot, std::move(object_columns)); +} + +} diff --git a/src/Storages/StorageFuzzQuery.cpp b/src/Storages/StorageFuzzQuery.cpp new file mode 100644 index 00000000000..6e8f425f8dc --- /dev/null +++ b/src/Storages/StorageFuzzQuery.cpp @@ -0,0 +1,169 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +ColumnPtr FuzzQuerySource::createColumn() +{ + auto column = ColumnString::create(); + ColumnString::Chars & data_to = column->getChars(); + ColumnString::Offsets & offsets_to = column->getOffsets(); + + offsets_to.resize(block_size); + IColumn::Offset offset = 0; + + auto fuzz_base = query; + size_t row_num = 0; + + while (row_num < block_size) + { + ASTPtr new_query = fuzz_base->clone(); + + auto base_before_fuzz = fuzz_base->formatForErrorMessage(); + fuzzer.fuzzMain(new_query); + auto fuzzed_text = new_query->formatForErrorMessage(); + + if (base_before_fuzz == fuzzed_text) + continue; + + /// AST is too long, will start from the original query. + if (config.max_query_length > 500) + { + fuzz_base = query; + continue; + } + + IColumn::Offset next_offset = offset + fuzzed_text.size() + 1; + data_to.resize(next_offset); + + std::copy(fuzzed_text.begin(), fuzzed_text.end(), &data_to[offset]); + + data_to[offset + fuzzed_text.size()] = 0; + offsets_to[row_num] = next_offset; + + offset = next_offset; + fuzz_base = new_query; + ++row_num; + } + + return column; +} + +StorageFuzzQuery::StorageFuzzQuery( + const StorageID & table_id_, const ColumnsDescription & columns_, const String & comment_, const Configuration & config_) + : IStorage(table_id_), config(config_) +{ + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + storage_metadata.setComment(comment_); + setInMemoryMetadata(storage_metadata); +} + +Pipe StorageFuzzQuery::read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & /*query_info*/, + ContextPtr /*context*/, + QueryProcessingStage::Enum /*processed_stage*/, + size_t max_block_size, + size_t num_streams) +{ + storage_snapshot->check(column_names); + + Pipes pipes; + pipes.reserve(num_streams); + + const ColumnsDescription & our_columns = storage_snapshot->metadata->getColumns(); + Block block_header; + for (const auto & name : column_names) + { + const auto & name_type = our_columns.get(name); + MutableColumnPtr column = name_type.type->createColumn(); + block_header.insert({std::move(column), name_type.type, name_type.name}); + } + + const char * begin = config.query.data(); + const char * end = begin + config.query.size(); + + ParserQuery parser(end, false); + auto query = parseQuery(parser, begin, end, "", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); + + for (UInt64 i = 0; i < num_streams; ++i) + pipes.emplace_back(std::make_shared(max_block_size, block_header, config, query)); + + return Pipe::unitePipes(std::move(pipes)); +} + +StorageFuzzQuery::Configuration StorageFuzzQuery::getConfiguration(ASTs & engine_args, ContextPtr local_context) +{ + StorageFuzzQuery::Configuration configuration{}; + + // Supported signatures: + // + // FuzzQuery(query) + // FuzzQuery(query, max_query_length) + // FuzzQuery(query, max_query_length, random_seed) + if (engine_args.empty() || engine_args.size() > 3) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "FuzzQuery requires 1 to 3 arguments: query, max_query_length, random_seed"); + + for (auto & engine_arg : engine_args) + engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); + + auto first_arg = checkAndGetLiteralArgument(engine_args[0], "query"); + configuration.query = std::move(first_arg); + + if (engine_args.size() >= 2) + { + const auto & literal = engine_args[1]->as(); + if (!literal.value.isNull()) + configuration.max_query_length = checkAndGetLiteralArgument(literal, "max_query_length"); + } + + if (engine_args.size() == 3) + { + const auto & literal = engine_args[2]->as(); + if (!literal.value.isNull()) + configuration.random_seed = checkAndGetLiteralArgument(literal, "random_seed"); + } + + return configuration; +} + +void registerStorageFuzzQuery(StorageFactory & factory) +{ + factory.registerStorage( + "FuzzQuery", + [](const StorageFactory::Arguments & args) -> std::shared_ptr + { + ASTs & engine_args = args.engine_args; + + if (engine_args.empty()) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Storage FuzzQuery must have arguments."); + + StorageFuzzQuery::Configuration configuration = StorageFuzzQuery::getConfiguration(engine_args, args.getLocalContext()); + + for (const auto& col : args.columns) + if (col.type->getTypeId() != TypeIndex::String) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "'StorageFuzzQuery' supports only columns of String type, got {}.", col.type->getName()); + + return std::make_shared(args.table_id, args.columns, args.comment, configuration); + }); +} + +} diff --git a/src/Storages/StorageFuzzQuery.h b/src/Storages/StorageFuzzQuery.h new file mode 100644 index 00000000000..125ef960e74 --- /dev/null +++ b/src/Storages/StorageFuzzQuery.h @@ -0,0 +1,88 @@ +#pragma once + +#include +#include +#include +#include + +#include "config.h" + +namespace DB +{ + +class NamedCollection; + +class StorageFuzzQuery final : public IStorage +{ +public: + struct Configuration : public StatelessTableEngineConfiguration + { + String query; + UInt64 max_query_length = 500; + UInt64 random_seed = randomSeed(); + }; + + StorageFuzzQuery( + const StorageID & table_id_, const ColumnsDescription & columns_, const String & comment_, const Configuration & config_); + + std::string getName() const override { return "FuzzQuery"; } + + Pipe read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) override; + + static StorageFuzzQuery::Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context); + +private: + const Configuration config; +}; + + +class FuzzQuerySource : public ISource +{ +public: + FuzzQuerySource( + UInt64 block_size_, Block block_header_, const StorageFuzzQuery::Configuration & config_, ASTPtr query_) + : ISource(block_header_) + , block_size(block_size_) + , block_header(std::move(block_header_)) + , config(config_) + , query(query_) + , fuzzer(config_.random_seed) + { + } + + String getName() const override { return "FuzzQuery"; } + +protected: + Chunk generate() override + { + Columns columns; + columns.reserve(block_header.columns()); + for (const auto & col : block_header) + { + chassert(col.type->getTypeId() == TypeIndex::String); + columns.emplace_back(createColumn()); + } + + return {std::move(columns), block_size}; + } + +private: + ColumnPtr createColumn(); + + UInt64 block_size; + Block block_header; + + StorageFuzzQuery::Configuration config; + ASTPtr query; + + QueryFuzzer fuzzer; +}; + +} diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index 754bc096958..f3eb88f3207 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -30,6 +30,7 @@ #include #include +#include #include #include diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index a5bae0acce5..2226de3e64f 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -3,6 +3,8 @@ #include #include +#include + #include #include #include diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index f27a76dc0dd..a0d6cf11b64 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -395,11 +396,14 @@ void registerStorageJoin(StorageFactory & factory) else if (kind_str == "full") { if (strictness == JoinStrictness::Any) - strictness = JoinStrictness::RightAny; + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ANY FULL JOINs are not implemented"); kind = JoinKind::Full; } } + if ((strictness == JoinStrictness::Semi || strictness == JoinStrictness::Anti) && (kind != JoinKind::Left && kind != JoinKind::Right)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, " SEMI|ANTI JOIN should be LEFT or RIGHT"); + if (kind == JoinKind::Comma) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter of storage Join must be LEFT or INNER or RIGHT or FULL (without quotes)."); diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index 20f99070000..ce760a8d63f 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index de0324d7998..ee0a37b50e3 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 316f398b476..57d95a98f11 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -161,6 +162,7 @@ StorageMaterializedView::StorageMaterializedView( manual_create_query->setDatabase(getStorageID().database_name); manual_create_query->setTable(generateInnerTableName(getStorageID())); manual_create_query->uuid = query.to_inner_uuid; + manual_create_query->has_uuid = query.to_inner_uuid != UUIDHelpers::Nil; auto new_columns_list = std::make_shared(); new_columns_list->set(new_columns_list->columns, query.columns_list->columns->ptr()); diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index f69c4adb552..cd892488b86 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -1,5 +1,5 @@ -#include #include +#include #include #include @@ -37,7 +37,6 @@ #include #include - namespace DB { @@ -408,7 +407,7 @@ namespace auto data_file_path = temp_dir / fs::path{file_paths[data_bin_pos]}.filename(); auto data_out_compressed = temp_disk->writeFile(data_file_path); auto data_out = std::make_unique(*data_out_compressed, CompressionCodecFactory::instance().getDefaultCodec(), max_compress_block_size); - NativeWriter block_out{*data_out, 0, metadata_snapshot->getSampleBlock(), false, &index}; + NativeWriter block_out{*data_out, 0, metadata_snapshot->getSampleBlock(), std::nullopt, false, &index}; for (const auto & block : *blocks) block_out.write(block); data_out->finalize(); diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 2dbe82c92d8..f5bc183931f 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 9255ee00340..40b3a12297b 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1,51 +1,53 @@ -#include "StorageMergeTree.h" -#include "Core/QueryProcessingStage.h" -#include "Storages/MergeTree/IMergeTreeDataPart.h" +#include #include #include -#include -#include #include +#include +#include #include -#include "Common/Exception.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include #include #include #include #include #include -#include #include +#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include +#include +#include +#include +#include +#include +#include namespace DB @@ -220,24 +222,50 @@ void StorageMergeTree::read( { ClusterProxy::executeQueryWithParallelReplicas( query_plan, getStorageID(), processed_stage, query_info.query, local_context, query_info.storage_limits); + return; } - else - { - const bool enable_parallel_reading = local_context->canUseParallelReplicasOnFollower() - && local_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree - && (!local_context->getSettingsRef().allow_experimental_analyzer || query_info.analyzer_can_use_parallel_replicas_on_follower); - if (auto plan = reader.read( - column_names, + if (local_context->canUseParallelReplicasCustomKey() && settings.parallel_replicas_for_non_replicated_merge_tree + && !settings.allow_experimental_analyzer && local_context->getClientInfo().distributed_depth == 0) + { + if (auto cluster = local_context->getClusterForParallelReplicas(); + local_context->canUseParallelReplicasCustomKeyForCluster(*cluster)) + { + auto modified_query_info = query_info; + modified_query_info.cluster = std::move(cluster); + ClusterProxy::executeQueryWithParallelReplicasCustomKey( + query_plan, + getStorageID(), + std::move(modified_query_info), + getInMemoryMetadataPtr()->getColumns(), storage_snapshot, - query_info, - local_context, - max_block_size, - num_streams, - nullptr, - enable_parallel_reading)) - query_plan = std::move(*plan); + processed_stage, + query_info.query, + local_context); + return; + } + else + LOG_WARNING( + log, + "Parallel replicas with custom key will not be used because cluster defined by 'cluster_for_parallel_replicas' ('{}') has " + "multiple shards", + cluster->getName()); } + + const bool enable_parallel_reading = local_context->canUseParallelReplicasOnFollower() + && local_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree + && (!local_context->getSettingsRef().allow_experimental_analyzer || query_info.current_table_chosen_for_reading_with_parallel_replicas); + + if (auto plan = reader.read( + column_names, + storage_snapshot, + query_info, + local_context, + max_block_size, + num_streams, + nullptr, + enable_parallel_reading)) + query_plan = std::move(*plan); } std::optional StorageMergeTree::totalRows(const Settings &) const @@ -630,12 +658,13 @@ void StorageMergeTree::mutate(const MutationCommands & commands, ContextPtr quer { /// It's important to serialize order of mutations with alter queries because /// they can depend on each other. - if (auto alter_lock = tryLockForAlter(query_context->getSettings().lock_acquire_timeout); alter_lock == std::nullopt) + if (auto alter_lock = tryLockForAlter(query_context->getSettingsRef().lock_acquire_timeout); alter_lock == std::nullopt) { - throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, - "Cannot start mutation in {}ms because some metadata-changing ALTER (MODIFY|RENAME|ADD|DROP) is currently executing. " - "You can change this timeout with `lock_acquire_timeout` setting", - query_context->getSettings().lock_acquire_timeout.totalMilliseconds()); + throw Exception( + ErrorCodes::TIMEOUT_EXCEEDED, + "Cannot start mutation in {}ms because some metadata-changing ALTER (MODIFY|RENAME|ADD|DROP) is currently executing. " + "You can change this timeout with `lock_acquire_timeout` setting", + query_context->getSettingsRef().lock_acquire_timeout.totalMilliseconds()); } version = startMutation(commands, query_context); } @@ -1457,6 +1486,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign cleared_count += clearOldPartsFromFilesystem(); cleared_count += clearOldMutations(); cleared_count += clearEmptyParts(); + cleared_count += unloadPrimaryKeysOfOutdatedParts(); return cleared_count; /// TODO maybe take into account number of cleared objects when calculating backoff }, common_assignee_trigger, getStorageID()), /* need_trigger */ false); @@ -1550,6 +1580,12 @@ bool StorageMergeTree::optimize( { assertNotReadonly(); + if (deduplicate && getInMemoryMetadataPtr()->hasProjections()) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "OPTIMIZE DEDUPLICATE query is not supported for table {} as it has projections. " + "User should drop all the projections manually before running the query", + getStorageID().getTableName()); + if (deduplicate) { if (deduplicate_by_columns.empty()) diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index da391909dff..f2e19ad4c80 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index a8713c61e4d..b5a388e8159 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -613,8 +613,9 @@ void registerStoragePostgreSQL(StorageFactory & factory) auto pool = std::make_shared(configuration, settings.postgresql_connection_pool_size, settings.postgresql_connection_pool_wait_timeout, - POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, - settings.postgresql_connection_pool_auto_close_connection); + settings.postgresql_connection_pool_retries, + settings.postgresql_connection_pool_auto_close_connection, + settings.postgresql_connection_attempt_timeout); return std::make_shared( args.table_id, diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index a127384c03c..72f725965e0 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5,22 +5,25 @@ #include #include +#include #include #include #include #include +#include #include #include #include #include +#include #include +#include #include #include -#include -#include -#include +#include #include +#include #include @@ -39,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -5272,6 +5276,8 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() if (shutdown_prepared_called.exchange(true)) return; + LOG_TRACE(log, "Start preparing for shutdown"); + try { auto settings_ptr = getSettings(); @@ -5282,7 +5288,11 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() stopBeingLeader(); if (attach_thread) + { attach_thread->shutdown(); + LOG_TRACE(log, "The attach thread is shutdown"); + } + restarting_thread.shutdown(/* part_of_full_shutdown */true); /// Explicitly set the event, because the restarting thread will not set it again @@ -5295,6 +5305,8 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() shutdown_deadline.emplace(std::chrono::system_clock::now()); throw; } + + LOG_TRACE(log, "Finished preparing for shutdown"); } void StorageReplicatedMergeTree::partialShutdown() @@ -5332,6 +5344,8 @@ void StorageReplicatedMergeTree::shutdown(bool) if (shutdown_called.exchange(true)) return; + LOG_TRACE(log, "Shutdown started"); + flushAndPrepareForShutdown(); if (!shutdown_deadline.has_value()) @@ -5374,6 +5388,7 @@ void StorageReplicatedMergeTree::shutdown(bool) /// Wait for all of them std::lock_guard lock(data_parts_exchange_ptr->rwlock); } + LOG_TRACE(log, "Shutdown finished"); } @@ -5460,13 +5475,45 @@ void StorageReplicatedMergeTree::read( /// 2. Do not read parts that have not yet been written to the quorum of the replicas. /// For this you have to synchronously go to ZooKeeper. if (settings.select_sequential_consistency) + { readLocalSequentialConsistencyImpl(query_plan, column_names, storage_snapshot, query_info, local_context, max_block_size, num_streams); + return; + } /// reading step for parallel replicas with new analyzer is built in Planner, so don't do it here - else if (local_context->canUseParallelReplicasOnInitiator() && !settings.allow_experimental_analyzer) + if (local_context->canUseParallelReplicasOnInitiator() && !settings.allow_experimental_analyzer) + { readParallelReplicasImpl(query_plan, column_names, query_info, local_context, processed_stage); - else - readLocalImpl(query_plan, column_names, storage_snapshot, query_info, local_context, max_block_size, num_streams); -} + return; + } + + if (local_context->canUseParallelReplicasCustomKey() && !settings.allow_experimental_analyzer + && local_context->getClientInfo().distributed_depth == 0) + { + if (auto cluster = local_context->getClusterForParallelReplicas(); + local_context->canUseParallelReplicasCustomKeyForCluster(*cluster)) + { + auto modified_query_info = query_info; + modified_query_info.cluster = std::move(cluster); + ClusterProxy::executeQueryWithParallelReplicasCustomKey( + query_plan, + getStorageID(), + std::move(modified_query_info), + getInMemoryMetadataPtr()->getColumns(), + storage_snapshot, + processed_stage, + query_info.query, + local_context); + return; + } + else + LOG_WARNING( + log, + "Parallel replicas with custom key will not be used because cluster defined by 'cluster_for_parallel_replicas' ('{}') has " + "multiple shards", + cluster->getName()); + } + + readLocalImpl(query_plan, column_names, storage_snapshot, query_info, local_context, max_block_size, num_streams); } void StorageReplicatedMergeTree::readLocalSequentialConsistencyImpl( QueryPlan & query_plan, @@ -5508,7 +5555,8 @@ void StorageReplicatedMergeTree::readLocalImpl( const size_t num_streams) { const bool enable_parallel_reading = local_context->canUseParallelReplicasOnFollower() - && (!local_context->getSettingsRef().allow_experimental_analyzer || query_info.analyzer_can_use_parallel_replicas_on_follower); + && (!local_context->getSettingsRef().allow_experimental_analyzer + || query_info.current_table_chosen_for_reading_with_parallel_replicas); auto plan = reader.read( column_names, storage_snapshot, query_info, @@ -5746,6 +5794,12 @@ bool StorageReplicatedMergeTree::optimize( if (!is_leader) throw Exception(ErrorCodes::NOT_A_LEADER, "OPTIMIZE cannot be done on this replica because it is not a leader"); + if (deduplicate && getInMemoryMetadataPtr()->hasProjections()) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "OPTIMIZE DEDUPLICATE query is not supported for table {} as it has projections. " + "User should drop all the projections manually before running the query", + getStorageID().getTableName()); + if (cleanup) { if (!getSettings()->allow_experimental_replacing_merge_with_cleanup) @@ -7309,7 +7363,7 @@ void StorageReplicatedMergeTree::fetchPartition( if (try_no) LOG_INFO(log, "Some of parts ({}) are missing. Will try to fetch covering parts.", missing_parts.size()); - if (try_no >= query_context->getSettings().max_fetch_partition_retries_count) + if (try_no >= query_context->getSettingsRef().max_fetch_partition_retries_count) throw Exception(ErrorCodes::TOO_MANY_RETRIES_TO_FETCH_PARTS, "Too many retries to fetch parts from {}:{}", from_zookeeper_name, best_replica_path); diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 5b7f9fc0ac2..22bd01e0071 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/Storages/StorageSet.h b/src/Storages/StorageSet.h index 67a9528ff5e..5b6d899b48d 100644 --- a/src/Storages/StorageSet.h +++ b/src/Storages/StorageSet.h @@ -2,7 +2,6 @@ #include #include -#include namespace DB diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 8df87d6290f..68eaa75b416 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -7,6 +7,8 @@ #include #include +#include + #include #include #include @@ -193,7 +195,7 @@ public: storage.saveFileSizes(lock); size_t initial_data_size = storage.file_checker.getFileSize(storage.data_file_path); - block_out = std::make_unique(*data_out, 0, metadata_snapshot->getSampleBlock(), false, &storage.indices, initial_data_size); + block_out = std::make_unique(*data_out, 0, metadata_snapshot->getSampleBlock(), std::nullopt, false, &storage.indices, initial_data_size); } String getName() const override { return "StripeLogSink"; } @@ -289,7 +291,7 @@ StorageStripeLog::StorageStripeLog( , data_file_path(table_path + "data.bin") , index_file_path(table_path + "index.mrk") , file_checker(disk, table_path + "sizes.json") - , max_compress_block_size(context_->getSettings().max_compress_block_size) + , max_compress_block_size(context_->getSettingsRef().max_compress_block_size) , log(getLogger("StorageStripeLog")) { StorageInMemoryMetadata storage_metadata; diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 895da028fc2..9cec8c75ebe 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include #include diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 3090f8db12e..fa7cc6eeeef 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -296,6 +296,7 @@ public: } bool supportsSubcolumns() const override { return true; } + bool supportsOptimizationToSubcolumns() const override { return false; } bool supportsDynamicSubcolumns() const override { return true; } diff --git a/src/Storages/StorageURLCluster.cpp b/src/Storages/StorageURLCluster.cpp index 2e7c63d0097..69041482fd8 100644 --- a/src/Storages/StorageURLCluster.cpp +++ b/src/Storages/StorageURLCluster.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include #include diff --git a/src/Storages/StorageURLCluster.h b/src/Storages/StorageURLCluster.h index a6334e7430d..31bffa06210 100644 --- a/src/Storages/StorageURLCluster.h +++ b/src/Storages/StorageURLCluster.h @@ -30,15 +30,8 @@ public: const StorageURL::Configuration & configuration_); std::string getName() const override { return "URLCluster"; } - RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; - bool supportsSubcolumns() const override { return true; } - - bool supportsDynamicSubcolumns() const override { return true; } - - bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } - private: void updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) override; diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 016de94c17c..5f768bce978 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -19,6 +19,8 @@ #include +#include + #include #include #include diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index fb8fa2d6da4..fa23a574ade 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -4,6 +4,8 @@ #include #include +#include +#include #include #include #include diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 8dd8d3b6154..6a7810b97f9 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/System/StorageSystemDDLWorkerQueue.cpp b/src/Storages/System/StorageSystemDDLWorkerQueue.cpp index f96b839a322..ae1a233ea81 100644 --- a/src/Storages/System/StorageSystemDDLWorkerQueue.cpp +++ b/src/Storages/System/StorageSystemDDLWorkerQueue.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index f48a8c67971..7e4c1de1c65 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -328,7 +329,7 @@ void ReadFromSystemDetachedParts::applyFilters(ActionDAGNodes added_filter_nodes filter = VirtualColumnUtils::splitFilterDagForAllowedInputs(predicate, &block); if (filter) - VirtualColumnUtils::buildSetsForDAG(filter, context); + VirtualColumnUtils::buildSetsForDAG(*filter, context); } } diff --git a/src/Storages/System/StorageSystemErrors.cpp b/src/Storages/System/StorageSystemErrors.cpp index 2da268305f8..2955aebeb7e 100644 --- a/src/Storages/System/StorageSystemErrors.cpp +++ b/src/Storages/System/StorageSystemErrors.cpp @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB diff --git a/src/Storages/System/StorageSystemEvents.cpp b/src/Storages/System/StorageSystemEvents.cpp index 822d5c77788..f74c316e3fc 100644 --- a/src/Storages/System/StorageSystemEvents.cpp +++ b/src/Storages/System/StorageSystemEvents.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include diff --git a/src/Storages/System/StorageSystemMergeTreeSettings.cpp b/src/Storages/System/StorageSystemMergeTreeSettings.cpp index 7781e3789a4..18730010e53 100644 --- a/src/Storages/System/StorageSystemMergeTreeSettings.cpp +++ b/src/Storages/System/StorageSystemMergeTreeSettings.cpp @@ -1,7 +1,9 @@ +#include #include #include #include #include +#include #include #include diff --git a/src/Storages/System/StorageSystemMergeTreeSettings.h b/src/Storages/System/StorageSystemMergeTreeSettings.h index e2913a7e55b..4d4150dcec2 100644 --- a/src/Storages/System/StorageSystemMergeTreeSettings.h +++ b/src/Storages/System/StorageSystemMergeTreeSettings.h @@ -1,6 +1,5 @@ #pragma once -#include #include diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 175c0834bcb..f7d1c1b3eb8 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -274,7 +274,7 @@ void ReadFromSystemPartsBase::applyFilters(ActionDAGNodes added_filter_nodes) filter_by_database = VirtualColumnUtils::splitFilterDagForAllowedInputs(predicate, &block); if (filter_by_database) - VirtualColumnUtils::buildSetsForDAG(filter_by_database, context); + VirtualColumnUtils::buildSetsForDAG(*filter_by_database, context); block.insert(ColumnWithTypeAndName({}, std::make_shared(), table_column_name)); block.insert(ColumnWithTypeAndName({}, std::make_shared(), engine_column_name)); @@ -283,7 +283,7 @@ void ReadFromSystemPartsBase::applyFilters(ActionDAGNodes added_filter_nodes) filter_by_other_columns = VirtualColumnUtils::splitFilterDagForAllowedInputs(predicate, &block); if (filter_by_other_columns) - VirtualColumnUtils::buildSetsForDAG(filter_by_other_columns, context); + VirtualColumnUtils::buildSetsForDAG(*filter_by_other_columns, context); } } diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h index be945162b39..8671fd850f8 100644 --- a/src/Storages/System/StorageSystemPartsBase.h +++ b/src/Storages/System/StorageSystemPartsBase.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include diff --git a/src/Storages/System/StorageSystemRemoteDataPaths.cpp b/src/Storages/System/StorageSystemRemoteDataPaths.cpp index a5c496db7e7..00b4824cc3d 100644 --- a/src/Storages/System/StorageSystemRemoteDataPaths.cpp +++ b/src/Storages/System/StorageSystemRemoteDataPaths.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/System/StorageSystemS3Queue.cpp b/src/Storages/System/StorageSystemS3Queue.cpp index c1d686067fd..a1c9380f616 100644 --- a/src/Storages/System/StorageSystemS3Queue.cpp +++ b/src/Storages/System/StorageSystemS3Queue.cpp @@ -32,7 +32,6 @@ ColumnsDescription StorageSystemS3Queue::getColumnsDescription() {"status", std::make_shared(), "Status of processing: Processed, Processing, Failed"}, {"processing_start_time", std::make_shared(std::make_shared()), "Time at which processing of the file started"}, {"processing_end_time", std::make_shared(std::make_shared()), "Time at which processing of the file ended"}, - {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared()), "Profile events collected during processing of the file"}, {"exception", std::make_shared(), "Exception which happened during processing"}, }; } @@ -65,8 +64,6 @@ void StorageSystemS3Queue::fillData(MutableColumns & res_columns, ContextPtr, co else res_columns[i++]->insertDefault(); - ProfileEvents::dumpToMapColumn(file_status->profile_counters.getPartiallyAtomicSnapshot(), res_columns[i++].get(), true); - res_columns[i++]->insert(file_status->getException()); } } diff --git a/src/Storages/System/StorageSystemSettings.cpp b/src/Storages/System/StorageSystemSettings.cpp index b437108b00e..f1e9d542fec 100644 --- a/src/Storages/System/StorageSystemSettings.cpp +++ b/src/Storages/System/StorageSystemSettings.cpp @@ -1,4 +1,6 @@ #include + +#include #include #include #include diff --git a/src/Storages/System/StorageSystemSettingsProfileElements.cpp b/src/Storages/System/StorageSystemSettingsProfileElements.cpp index 2af3e6dfd05..40a669351b1 100644 --- a/src/Storages/System/StorageSystemSettingsProfileElements.cpp +++ b/src/Storages/System/StorageSystemSettingsProfileElements.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/System/StorageSystemStackTrace.cpp b/src/Storages/System/StorageSystemStackTrace.cpp index ba7433fb9ae..cc221bbea69 100644 --- a/src/Storages/System/StorageSystemStackTrace.cpp +++ b/src/Storages/System/StorageSystemStackTrace.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/System/StorageSystemUsers.cpp b/src/Storages/System/StorageSystemUsers.cpp index 7bb1905ab57..e36c904ded7 100644 --- a/src/Storages/System/StorageSystemUsers.cpp +++ b/src/Storages/System/StorageSystemUsers.cpp @@ -18,8 +18,12 @@ #include #include #include -#include "base/types.h" + #include + +#include +#include + #include diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index cb46cd19517..ba9fac6d289 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index f831465277d..ac091e7cf3c 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 778c9e13adb..27c52124e9c 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -54,9 +54,9 @@ namespace DB namespace VirtualColumnUtils { -void buildSetsForDAG(const ActionsDAGPtr & dag, const ContextPtr & context) +void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context) { - for (const auto & node : dag->getNodes()) + for (const auto & node : dag.getNodes()) { if (node.type == ActionsDAG::ActionType::COLUMN) { @@ -79,7 +79,7 @@ void buildSetsForDAG(const ActionsDAGPtr & dag, const ContextPtr & context) void filterBlockWithDAG(ActionsDAGPtr dag, Block & block, ContextPtr context) { - buildSetsForDAG(dag, context); + buildSetsForDAG(*dag, context); auto actions = std::make_shared(dag); Block block_with_filter = block; actions->execute(block_with_filter, /*dry_run=*/ false, /*allow_duplicates_in_input=*/ true); diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index fbfbdd6c6cc..9045a2f5481 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -26,7 +26,7 @@ void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, void filterBlockWithDAG(ActionsDAGPtr dag, Block & block, ContextPtr context); /// Builds sets used by ActionsDAG inplace. -void buildSetsForDAG(const ActionsDAGPtr & dag, const ContextPtr & context); +void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context); /// Recursively checks if all functions used in DAG are deterministic in scope of query. bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 77e6ee9cb24..b842cdda022 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -48,6 +48,7 @@ #include #include #include +#include #include #include diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp index ed378169381..69e8f0a7880 100644 --- a/src/Storages/buildQueryTreeForShard.cpp +++ b/src/Storages/buildQueryTreeForShard.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -290,7 +291,7 @@ TableNodePtr executeSubqueryNode(const QueryTreeNodePtr & subquery_node, size_t min_block_size_rows = mutable_context->getSettingsRef().min_external_table_block_size_rows; size_t min_block_size_bytes = mutable_context->getSettingsRef().min_external_table_block_size_bytes; - auto squashing = std::make_shared(builder->getHeader(), min_block_size_rows, min_block_size_bytes); + auto squashing = std::make_shared(builder->getHeader(), min_block_size_rows, min_block_size_bytes); builder->resize(1); builder->addTransform(std::move(squashing)); diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp index 6ea7bdc312d..56071abaa95 100644 --- a/src/Storages/getStructureOfRemoteTable.cpp +++ b/src/Storages/getStructureOfRemoteTable.cpp @@ -1,4 +1,5 @@ #include "getStructureOfRemoteTable.h" +#include #include #include #include diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index 628e5a85437..adc1074b1fe 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -26,6 +26,7 @@ void registerStorageGenerateRandom(StorageFactory & factory); void registerStorageExecutable(StorageFactory & factory); void registerStorageWindowView(StorageFactory & factory); void registerStorageLoop(StorageFactory & factory); +void registerStorageFuzzQuery(StorageFactory & factory); #if USE_RAPIDJSON || USE_SIMDJSON void registerStorageFuzzJSON(StorageFactory & factory); #endif @@ -34,7 +35,6 @@ void registerStorageFuzzJSON(StorageFactory & factory); void registerStorageS3(StorageFactory & factory); void registerStorageHudi(StorageFactory & factory); void registerStorageS3Queue(StorageFactory & factory); -void registerStorageAzureQueue(StorageFactory & factory); #if USE_PARQUET void registerStorageDeltaLake(StorageFactory & factory); @@ -44,6 +44,10 @@ void registerStorageIceberg(StorageFactory & factory); #endif #endif +#if USE_AZURE_BLOB_STORAGE +void registerStorageAzureQueue(StorageFactory & factory); +#endif + #if USE_HDFS #if USE_HIVE void registerStorageHive(StorageFactory & factory); @@ -123,6 +127,7 @@ void registerStorages() registerStorageExecutable(factory); registerStorageWindowView(factory); registerStorageLoop(factory); + registerStorageFuzzQuery(factory); #if USE_RAPIDJSON || USE_SIMDJSON registerStorageFuzzJSON(factory); #endif diff --git a/src/Storages/transformQueryForExternalDatabase.cpp b/src/Storages/transformQueryForExternalDatabase.cpp index fc85bde11d9..f1b73e939b8 100644 --- a/src/Storages/transformQueryForExternalDatabase.cpp +++ b/src/Storages/transformQueryForExternalDatabase.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/src/TableFunctions/ITableFunction.cpp b/src/TableFunctions/ITableFunction.cpp index 137e1dc27fe..e5676c5c25d 100644 --- a/src/TableFunctions/ITableFunction.cpp +++ b/src/TableFunctions/ITableFunction.cpp @@ -36,7 +36,7 @@ StoragePtr ITableFunction::execute(const ASTPtr & ast_function, ContextPtr conte if (cached_columns.empty()) return executeImpl(ast_function, context, table_name, std::move(cached_columns), is_insert_query); - if (hasStaticStructure() && cached_columns == getActualTableStructure(context,is_insert_query)) + if (hasStaticStructure() && cached_columns == getActualTableStructure(context, is_insert_query)) return executeImpl(ast_function, context_to_use, table_name, std::move(cached_columns), is_insert_query); auto this_table_function = shared_from_this(); diff --git a/src/TableFunctions/ITableFunctionXDBC.cpp b/src/TableFunctions/ITableFunctionXDBC.cpp index a5c16b3a5aa..83d0ac4ef41 100644 --- a/src/TableFunctions/ITableFunctionXDBC.cpp +++ b/src/TableFunctions/ITableFunctionXDBC.cpp @@ -1,8 +1,10 @@ +#include +#include #include -#include -#include -#include #include +#include +#include +#include #include #include #include diff --git a/src/TableFunctions/TableFunctionExplain.cpp b/src/TableFunctions/TableFunctionExplain.cpp index df2835dd630..552b9fde986 100644 --- a/src/TableFunctions/TableFunctionExplain.cpp +++ b/src/TableFunctions/TableFunctionExplain.cpp @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/TableFunctions/TableFunctionFactory.cpp b/src/TableFunctions/TableFunctionFactory.cpp index ce3daff0785..6ecdeb47779 100644 --- a/src/TableFunctions/TableFunctionFactory.cpp +++ b/src/TableFunctions/TableFunctionFactory.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index 28bf72e07fb..1b6d86f8fa5 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -4,6 +4,7 @@ #include "registerTableFunctions.h" #include +#include #include #include #include diff --git a/src/TableFunctions/TableFunctionFileCluster.cpp b/src/TableFunctions/TableFunctionFileCluster.cpp index 3e53349b022..514fa3143d0 100644 --- a/src/TableFunctions/TableFunctionFileCluster.cpp +++ b/src/TableFunctions/TableFunctionFileCluster.cpp @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/TableFunctions/TableFunctionFormat.cpp b/src/TableFunctions/TableFunctionFormat.cpp index ad2a142a140..5a0f47653d5 100644 --- a/src/TableFunctions/TableFunctionFormat.cpp +++ b/src/TableFunctions/TableFunctionFormat.cpp @@ -1,5 +1,7 @@ #include +#include + #include #include diff --git a/src/TableFunctions/TableFunctionFuzzQuery.cpp b/src/TableFunctions/TableFunctionFuzzQuery.cpp new file mode 100644 index 00000000000..224f6666556 --- /dev/null +++ b/src/TableFunctions/TableFunctionFuzzQuery.cpp @@ -0,0 +1,54 @@ +#include + +#include +#include +#include +#include + +namespace DB +{ + + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +void TableFunctionFuzzQuery::parseArguments(const ASTPtr & ast_function, ContextPtr context) +{ + ASTs & args_func = ast_function->children; + + if (args_func.size() != 1) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments", getName()); + + auto args = args_func.at(0)->children; + configuration = StorageFuzzQuery::getConfiguration(args, context); +} + +StoragePtr TableFunctionFuzzQuery::executeImpl( + const ASTPtr & /*ast_function*/, + ContextPtr context, + const std::string & table_name, + ColumnsDescription /*cached_columns*/, + bool is_insert_query) const +{ + ColumnsDescription columns = getActualTableStructure(context, is_insert_query); + auto res = std::make_shared( + StorageID(getDatabaseName(), table_name), + columns, + /* comment */ String{}, + configuration); + res->startup(); + return res; +} + +void registerTableFunctionFuzzQuery(TableFunctionFactory & factory) +{ + factory.registerFunction( + {.documentation + = {.description = "Perturbs a query string with random variations.", + .returned_value = "A table object with a single column containing perturbed query strings."}, + .allow_readonly = true}); +} + +} diff --git a/src/TableFunctions/TableFunctionFuzzQuery.h b/src/TableFunctions/TableFunctionFuzzQuery.h new file mode 100644 index 00000000000..22d10341c4d --- /dev/null +++ b/src/TableFunctions/TableFunctionFuzzQuery.h @@ -0,0 +1,42 @@ +#pragma once + +#include + +#include +#include +#include + +#include "config.h" + +namespace DB +{ + +class TableFunctionFuzzQuery : public ITableFunction +{ +public: + static constexpr auto name = "fuzzQuery"; + std::string getName() const override { return name; } + + void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; + + ColumnsDescription getActualTableStructure(ContextPtr /* context */, bool /* is_insert_query */) const override + { + return ColumnsDescription{{"query", std::make_shared()}}; + } + +private: + StoragePtr executeImpl( + const ASTPtr & ast_function, + ContextPtr context, + const std::string & table_name, + ColumnsDescription cached_columns, + bool is_insert_query) const override; + + const char * getStorageTypeName() const override { return "fuzzQuery"; } + + String source; + std::optional random_seed; + StorageFuzzQuery::Configuration configuration; +}; + +} diff --git a/src/TableFunctions/TableFunctionMySQL.cpp b/src/TableFunctions/TableFunctionMySQL.cpp index b027cf42880..ffd318f36f7 100644 --- a/src/TableFunctions/TableFunctionMySQL.cpp +++ b/src/TableFunctions/TableFunctionMySQL.cpp @@ -3,6 +3,8 @@ #if USE_MYSQL #include + +#include #include #include #include diff --git a/src/TableFunctions/TableFunctionPostgreSQL.cpp b/src/TableFunctions/TableFunctionPostgreSQL.cpp index 8d94988cd65..d690da9949a 100644 --- a/src/TableFunctions/TableFunctionPostgreSQL.cpp +++ b/src/TableFunctions/TableFunctionPostgreSQL.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -80,8 +81,9 @@ void TableFunctionPostgreSQL::parseArguments(const ASTPtr & ast_function, Contex *configuration, settings.postgresql_connection_pool_size, settings.postgresql_connection_pool_wait_timeout, - POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, - settings.postgresql_connection_pool_auto_close_connection); + settings.postgresql_connection_pool_retries, + settings.postgresql_connection_pool_auto_close_connection, + settings.postgresql_connection_attempt_timeout); } } diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp index d7774bb6478..e60c31b2d77 100644 --- a/src/TableFunctions/TableFunctionRemote.cpp +++ b/src/TableFunctions/TableFunctionRemote.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "registerTableFunctions.h" diff --git a/src/TableFunctions/TableFunctionView.cpp b/src/TableFunctions/TableFunctionView.cpp index 2a50fb2d006..57501df6d4d 100644 --- a/src/TableFunctions/TableFunctionView.cpp +++ b/src/TableFunctions/TableFunctionView.cpp @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/TableFunctions/TableFunctionViewIfPermitted.cpp b/src/TableFunctions/TableFunctionViewIfPermitted.cpp index b1691803988..935be6c1987 100644 --- a/src/TableFunctions/TableFunctionViewIfPermitted.cpp +++ b/src/TableFunctions/TableFunctionViewIfPermitted.cpp @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index ca4913898f9..a6c90872f12 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -26,6 +26,7 @@ void registerTableFunctions() registerTableFunctionMongoDB(factory); registerTableFunctionRedis(factory); registerTableFunctionMergeTreeIndex(factory); + registerTableFunctionFuzzQuery(factory); #if USE_RAPIDJSON || USE_SIMDJSON registerTableFunctionFuzzJSON(factory); #endif diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index efde4d6dcdc..2a8864a9bfd 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -23,6 +23,7 @@ void registerTableFunctionGenerate(TableFunctionFactory & factory); void registerTableFunctionMongoDB(TableFunctionFactory & factory); void registerTableFunctionRedis(TableFunctionFactory & factory); void registerTableFunctionMergeTreeIndex(TableFunctionFactory & factory); +void registerTableFunctionFuzzQuery(TableFunctionFactory & factory); #if USE_RAPIDJSON || USE_SIMDJSON void registerTableFunctionFuzzJSON(TableFunctionFactory & factory); #endif diff --git a/src/configure_config.cmake b/src/configure_config.cmake index a3f6dae4b87..75f61baa854 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -170,5 +170,8 @@ endif() if (TARGET ch_contrib::pocketfft) set(USE_POCKETFFT 1) endif() +if (TARGET ch_contrib::prometheus_protobufs) + set(USE_PROMETHEUS_PROTOBUFS 1) +endif() set(SOURCE_DIR ${PROJECT_SOURCE_DIR}) diff --git a/tests/ci/.mypy.ini b/tests/ci/.mypy.ini index 9bc44025826..f12d27979ce 100644 --- a/tests/ci/.mypy.ini +++ b/tests/ci/.mypy.ini @@ -15,3 +15,4 @@ warn_return_any = True no_implicit_reexport = True strict_equality = True extra_checks = True +ignore_missing_imports = True \ No newline at end of file diff --git a/tests/ci/artifactory.py b/tests/ci/artifactory.py new file mode 100644 index 00000000000..1a062d05a23 --- /dev/null +++ b/tests/ci/artifactory.py @@ -0,0 +1,355 @@ +import argparse +import time +from pathlib import Path +from typing import Optional +from shutil import copy2 +from create_release import PackageDownloader, ReleaseInfo, ShellRunner +from ci_utils import WithIter + + +class MountPointApp(metaclass=WithIter): + RCLONE = "rclone" + S3FS = "s3fs" + + +class R2MountPoint: + _TEST_BUCKET_NAME = "repo-test" + _PROD_BUCKET_NAME = "packages" + _CACHE_MAX_SIZE_GB = 20 + MOUNT_POINT = "/home/ubuntu/mountpoint" + API_ENDPOINT = "https://d4fd593eebab2e3a58a599400c4cd64d.r2.cloudflarestorage.com" + LOG_FILE = "/home/ubuntu/fuse_mount.log" + # mod time is not required by reprepro and createrepo - disable to simplify bucket's mount sync (applicable fro rclone) + NOMODTIME = True + # enable debug messages in mount log + DEBUG = True + # enable cache for mountpoint + CACHE_ENABLED = False + # TODO: which mode is better: minimal/writes/full/off + _RCLONE_CACHE_MODE = "minimal" + UMASK = "0000" + + def __init__(self, app: str, dry_run: bool) -> None: + assert app in MountPointApp + self.app = app + if dry_run: + self.bucket_name = self._TEST_BUCKET_NAME + else: + self.bucket_name = self._PROD_BUCKET_NAME + + self.aux_mount_options = "" + self.async_mount = False + if self.app == MountPointApp.S3FS: + self.cache_dir = "/home/ubuntu/s3fs_cache" + # self.aux_mount_options += "-o nomodtime " if self.NOMODTIME else "" not for s3fs + self.aux_mount_options += "--debug " if self.DEBUG else "" + self.aux_mount_options += ( + f"-o use_cache={self.cache_dir} -o cache_size_mb={self._CACHE_MAX_SIZE_GB * 1024} " + if self.CACHE_ENABLED + else "" + ) + # without -o nomultipart there are errors like "Error 5 writing to /home/ubuntu/***.deb: Input/output error" + self.mount_cmd = f"s3fs {self.bucket_name} {self.MOUNT_POINT} -o url={self.API_ENDPOINT} -o use_path_request_style -o umask=0000 -o nomultipart -o logfile={self.LOG_FILE} {self.aux_mount_options}" + elif self.app == MountPointApp.RCLONE: + # run rclone mount process asynchronously, otherwise subprocess.run(daemonized command) will not return + self.async_mount = True + self.cache_dir = "/home/ubuntu/rclone_cache" + self.aux_mount_options += "--no-modtime " if self.NOMODTIME else "" + self.aux_mount_options += "-v " if self.DEBUG else "" # -vv too verbose + self.aux_mount_options += ( + f"--vfs-cache-mode {self._RCLONE_CACHE_MODE} --vfs-cache-max-size {self._CACHE_MAX_SIZE_GB}G" + if self.CACHE_ENABLED + else "--vfs-cache-mode off" + ) + # Use --no-modtime to try to avoid: ERROR : rpm/lts/clickhouse-client-24.3.6.5.x86_64.rpm: Failed to apply pending mod time + self.mount_cmd = f"rclone mount remote:{self.bucket_name} {self.MOUNT_POINT} --daemon --cache-dir {self.cache_dir} --umask 0000 --log-file {self.LOG_FILE} {self.aux_mount_options}" + else: + assert False + + def init(self): + print(f"Mount bucket [{self.bucket_name}] to [{self.MOUNT_POINT}]") + _CLEAN_LOG_FILE_CMD = f"tail -n 1000 {self.LOG_FILE} > {self.LOG_FILE}_tmp && mv {self.LOG_FILE}_tmp {self.LOG_FILE} ||:" + _MKDIR_CMD = f"mkdir -p {self.MOUNT_POINT}" + _MKDIR_FOR_CACHE = f"mkdir -p {self.cache_dir}" + _UNMOUNT_CMD = ( + f"mount | grep -q {self.MOUNT_POINT} && umount {self.MOUNT_POINT} ||:" + ) + + _TEST_MOUNT_CMD = f"mount | grep -q {self.MOUNT_POINT}" + ShellRunner.run(_CLEAN_LOG_FILE_CMD) + ShellRunner.run(_UNMOUNT_CMD) + ShellRunner.run(_MKDIR_CMD) + ShellRunner.run(_MKDIR_FOR_CACHE) + ShellRunner.run(self.mount_cmd, async_=self.async_mount) + if self.async_mount: + time.sleep(3) + ShellRunner.run(_TEST_MOUNT_CMD) + + @classmethod + def teardown(cls): + print(f"Unmount [{cls.MOUNT_POINT}]") + ShellRunner.run(f"umount {cls.MOUNT_POINT}") + + +class RepoCodenames(metaclass=WithIter): + LTS = "lts" + STABLE = "stable" + + +class DebianArtifactory: + _TEST_REPO_URL = "https://pub-73dd1910f4284a81a02a67018967e028.r2.dev/deb" + _PROD_REPO_URL = "https://packages.clickhouse.com/deb" + + def __init__(self, release_info: ReleaseInfo, dry_run: bool): + self.codename = release_info.codename + self.version = release_info.version + if dry_run: + self.repo_url = self._TEST_REPO_URL + else: + self.repo_url = self._PROD_REPO_URL + assert self.codename in RepoCodenames + self.pd = PackageDownloader( + release=release_info.release_branch, + commit_sha=release_info.commit_sha, + version=release_info.version, + ) + + def export_packages(self): + assert self.pd.local_deb_packages_ready(), "BUG: Packages are not downloaded" + print("Start adding packages") + paths = [ + self.pd.LOCAL_DIR + "/" + file for file in self.pd.get_deb_packages_files() + ] + REPREPRO_CMD_PREFIX = f"reprepro --basedir {R2MountPoint.MOUNT_POINT}/configs/deb --outdir {R2MountPoint.MOUNT_POINT}/deb --verbose" + cmd = f"{REPREPRO_CMD_PREFIX} includedeb {self.codename} {' '.join(paths)}" + print("Running export command:") + print(f" {cmd}") + ShellRunner.run(cmd) + ShellRunner.run("sync") + + if self.codename == RepoCodenames.LTS: + packages_with_version = [ + package + "=" + self.version for package in self.pd.get_packages_names() + ] + print( + f"Copy packages from {RepoCodenames.LTS} to {RepoCodenames.STABLE} repository" + ) + cmd = f"{REPREPRO_CMD_PREFIX} copy {RepoCodenames.STABLE} {RepoCodenames.LTS} {' '.join(packages_with_version)}" + print("Running copy command:") + print(f" {cmd}") + ShellRunner.run(cmd) + ShellRunner.run("sync") + + def test_packages(self): + ShellRunner.run("docker pull ubuntu:latest") + print(f"Test packages installation, version [{self.version}]") + cmd = f"docker run --rm ubuntu:latest bash -c \"apt update -y; apt install -y sudo gnupg ca-certificates; apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754; echo 'deb {self.repo_url} stable main' | tee /etc/apt/sources.list.d/clickhouse.list; apt update -y; apt-get install -y clickhouse-client={self.version}\"" + print("Running test command:") + print(f" {cmd}") + ShellRunner.run(cmd) + + +def _copy_if_not_exists(src: Path, dst: Path) -> Path: + if dst.is_dir(): + dst = dst / src.name + if not dst.exists(): + return copy2(src, dst) # type: ignore + if src.stat().st_size == dst.stat().st_size: + return dst + return copy2(src, dst) # type: ignore + + +class RpmArtifactory: + _TEST_REPO_URL = ( + "https://pub-73dd1910f4284a81a02a67018967e028.r2.dev/rpm/clickhouse.repo" + ) + _PROD_REPO_URL = "https://packages.clickhouse.com/rpm/clickhouse.repo" + _SIGN_KEY = "885E2BDCF96B0B45ABF058453E4AD4719DDE9A38" + + def __init__(self, release_info: ReleaseInfo, dry_run: bool): + self.codename = release_info.codename + self.version = release_info.version + if dry_run: + self.repo_url = self._TEST_REPO_URL + else: + self.repo_url = self._PROD_REPO_URL + assert self.codename in RepoCodenames + self.pd = PackageDownloader( + release=release_info.release_branch, + commit_sha=release_info.commit_sha, + version=release_info.version, + ) + + def export_packages(self, codename: Optional[str] = None) -> None: + assert self.pd.local_rpm_packages_ready(), "BUG: Packages are not downloaded" + codename = codename or self.codename + print(f"Start adding packages to [{codename}]") + paths = [ + self.pd.LOCAL_DIR + "/" + file for file in self.pd.get_rpm_packages_files() + ] + + dest_dir = Path(R2MountPoint.MOUNT_POINT) / "rpm" / codename + + for package in paths: + _copy_if_not_exists(Path(package), dest_dir) + + commands = ( + f"createrepo_c --local-sqlite --workers=2 --update --verbose {dest_dir}", + f"gpg --sign-with {self._SIGN_KEY} --detach-sign --batch --yes --armor {dest_dir / 'repodata' / 'repomd.xml'}", + ) + print(f"Exporting RPM packages into [{codename}]") + + for command in commands: + print("Running command:") + print(f" {command}") + ShellRunner.run(command) + + update_public_key = f"gpg --armor --export {self._SIGN_KEY}" + pub_key_path = dest_dir / "repodata" / "repomd.xml.key" + print("Updating repomd.xml.key") + pub_key_path.write_text(ShellRunner.run(update_public_key)[1]) + if codename == RepoCodenames.LTS: + self.export_packages(RepoCodenames.STABLE) + ShellRunner.run("sync") + + def test_packages(self): + ShellRunner.run("docker pull fedora:latest") + print(f"Test package installation, version [{self.version}]") + cmd = f'docker run --rm fedora:latest /bin/bash -c "dnf -y install dnf-plugins-core && dnf config-manager --add-repo={self.repo_url} && dnf makecache && dnf -y install clickhouse-client-{self.version}-1"' + print("Running test command:") + print(f" {cmd}") + ShellRunner.run(cmd) + + +class TgzArtifactory: + _TEST_REPO_URL = "https://pub-73dd1910f4284a81a02a67018967e028.r2.dev/tgz" + _PROD_REPO_URL = "https://packages.clickhouse.com/tgz" + + def __init__(self, release_info: ReleaseInfo, dry_run: bool): + self.codename = release_info.codename + self.version = release_info.version + if dry_run: + self.repo_url = self._TEST_REPO_URL + else: + self.repo_url = self._PROD_REPO_URL + assert self.codename in RepoCodenames + self.pd = PackageDownloader( + release=release_info.release_branch, + commit_sha=release_info.commit_sha, + version=release_info.version, + ) + + def export_packages(self, codename: Optional[str] = None) -> None: + assert self.pd.local_tgz_packages_ready(), "BUG: Packages are not downloaded" + codename = codename or self.codename + + paths = [ + self.pd.LOCAL_DIR + "/" + file for file in self.pd.get_tgz_packages_files() + ] + + dest_dir = Path(R2MountPoint.MOUNT_POINT) / "tgz" / codename + + print(f"Exporting TGZ packages into [{codename}]") + + for package in paths: + _copy_if_not_exists(Path(package), dest_dir) + + if codename == RepoCodenames.LTS: + self.export_packages(RepoCodenames.STABLE) + ShellRunner.run("sync") + + def test_packages(self): + tgz_file = "/tmp/tmp.tgz" + tgz_sha_file = "/tmp/tmp.tgz.sha512" + ShellRunner.run( + f"curl -o {tgz_file} -f0 {self.repo_url}/stable/clickhouse-client-{self.version}-arm64.tgz" + ) + ShellRunner.run( + f"curl -o {tgz_sha_file} -f0 {self.repo_url}/stable/clickhouse-client-{self.version}-arm64.tgz.sha512" + ) + expected_checksum = ShellRunner.run(f"cut -d ' ' -f 1 {tgz_sha_file}") + actual_checksum = ShellRunner.run(f"sha512sum {tgz_file} | cut -d ' ' -f 1") + assert ( + expected_checksum == actual_checksum + ), f"[{actual_checksum} != {expected_checksum}]" + ShellRunner.run("rm /tmp/tmp.tgz*") + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description="Adds release packages to the repository", + ) + parser.add_argument( + "--infile", + type=str, + required=True, + help="input file with release info", + ) + parser.add_argument( + "--export-debian", + action="store_true", + help="Export debian packages to repository", + ) + parser.add_argument( + "--export-rpm", + action="store_true", + help="Export rpm packages to repository", + ) + parser.add_argument( + "--export-tgz", + action="store_true", + help="Export tgz packages to repository", + ) + parser.add_argument( + "--test-debian", + action="store_true", + help="Test debian packages installation", + ) + parser.add_argument( + "--test-rpm", + action="store_true", + help="Test rpm packages installation", + ) + parser.add_argument( + "--test-tgz", + action="store_true", + help="Test tgz packages installation", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Dry run mode", + ) + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + assert args.dry_run + + release_info = ReleaseInfo.from_file(args.infile) + """ + Use S3FS. RCLONE has some errors with r2 remote which I didn't figure out how to resolve: + ERROR : IO error: NotImplemented: versionId not implemented + Failed to copy: NotImplemented: versionId not implemented + """ + mp = R2MountPoint(MountPointApp.S3FS, dry_run=args.dry_run) + if args.export_debian: + mp.init() + DebianArtifactory(release_info, dry_run=args.dry_run).export_packages() + mp.teardown() + if args.export_rpm: + mp.init() + RpmArtifactory(release_info, dry_run=args.dry_run).export_packages() + mp.teardown() + if args.export_tgz: + mp.init() + TgzArtifactory(release_info, dry_run=args.dry_run).export_packages() + mp.teardown() + if args.test_debian: + DebianArtifactory(release_info, dry_run=args.dry_run).test_packages() + if args.test_tgz: + TgzArtifactory(release_info, dry_run=args.dry_run).test_packages() + if args.test_rpm: + RpmArtifactory(release_info, dry_run=args.dry_run).test_packages() diff --git a/tests/ci/ci.py b/tests/ci/ci.py index af2f4c0a1fc..8dcf3fc4c69 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -6,6 +6,7 @@ import os import re import subprocess import sys +import time from dataclasses import dataclass from pathlib import Path from typing import Any, Dict, List, Optional @@ -15,7 +16,7 @@ import upload_result_helper from build_check import get_release_or_pr from ci_config import CI from ci_metadata import CiMetadata -from ci_utils import GHActions, normalize_string +from ci_utils import GHActions, normalize_string, Utils from clickhouse_helper import ( CiLogsCredentials, ClickHouseHelper, @@ -53,6 +54,7 @@ from stopwatch import Stopwatch from tee_popen import TeePopen from ci_cache import CiCache from ci_settings import CiSettings +from ci_buddy import CIBuddy from version_helper import get_version_from_repo # pylint: disable=too-many-lines @@ -262,6 +264,8 @@ def check_missing_images_on_dockerhub( def _pre_action(s3, indata, pr_info): + print("Clear dmesg") + Utils.clear_dmesg() CommitStatusData.cleanup() JobReport.cleanup() BuildResult.cleanup() @@ -322,8 +326,8 @@ def _mark_success_action( # do nothing, exit without failure print(f"ERROR: no status file for job [{job}]") - if job_config.run_always or job_config.run_by_label: - print(f"Job [{job}] runs always or by label in CI - do not cache") + if job_config.run_by_label or not job_config.has_digest(): + print(f"Job [{job}] has no digest or run by label in CI - do not cache") else: if pr_info.is_master: pass @@ -547,7 +551,17 @@ def _update_gh_statuses_action(indata: Dict, s3: S3Helper) -> None: except Exception as e: raise e print("Going to update overall CI report") - set_status_comment(commit, pr_info) + for retry in range(2): + try: + set_status_comment(commit, pr_info) + break + except Exception as e: + print( + f"WARNING: Failed to update CI Running status, attempt [{retry + 1}], exception [{e}]" + ) + time.sleep(1) + else: + print("ERROR: All retry attempts failed.") print("... CI report update - done") @@ -992,7 +1006,11 @@ def main() -> int: ci_settings, args.skip_jobs, ) + ci_cache.print_status() + if IS_CI and pr_info.is_pr and not ci_settings.no_ci_cache: + ci_cache.filter_out_not_affected_jobs() + ci_cache.print_status() if IS_CI and not pr_info.is_merge_queue: # wait for pending jobs to be finished, await_jobs is a long blocking call @@ -1028,6 +1046,7 @@ def main() -> int: elif args.pre: assert indata, "Run config must be provided via --infile" _pre_action(s3, indata, pr_info) + JobReport.create_pre_report().dump() ### RUN action: start elif args.run: @@ -1079,6 +1098,16 @@ def main() -> int: print(status) print("::endgroup::") previous_status = status.state + print("Create dummy job report with job_skipped flag") + JobReport( + status=status.state, + description="", + test_results=[], + start_time="", + duration=0.0, + additional_files=[], + job_skipped=True, + ).dump() # ci cache check if not previous_status and not ci_settings.no_ci_cache: @@ -1114,12 +1143,22 @@ def main() -> int: exit_code = 1 else: exit_code = _run_test(check_name, args.run_command) + job_report = JobReport.load() if JobReport.exist() else None + assert ( + job_report + ), "BUG. There must be job report either real report, or pre-report if job was killed" + job_report.exit_code = exit_code + job_report.dump() ### RUN action: end ### POST action: start elif args.post: job_report = JobReport.load() if JobReport.exist() else None - if job_report: + assert ( + job_report + ), "BUG. There must be job report either real report, or pre-report if job was killed" + if not job_report.job_skipped and not job_report.pre_report: + # it's a real job report ch_helper = ClickHouseHelper() check_url = "" @@ -1219,9 +1258,32 @@ def main() -> int: indata["build"], ch_helper, ) - else: - # no job report - print(f"No job report for {[args.job_name]} - do nothing") + elif job_report.job_skipped: + print(f"Skipped after rerun check {[args.job_name]} - do nothing") + elif job_report.job_skipped: + print(f"Job was skipped {[args.job_name]} - do nothing") + elif job_report.pre_report: + print(f"ERROR: Job was killed - generate evidence") + job_report.update_duration() + # Job was killed! + if Utils.is_killed_with_oom(): + print("WARNING: OOM while job execution") + error = f"Out Of Memory, exit_code {job_report.exit_code}, after {job_report.duration}s" + else: + error = f"Unknown, exit_code {job_report.exit_code}, after {job_report.duration}s" + CIBuddy().post_error(error, job_name=_get_ext_check_name(args.job_name)) + if CI.is_test_job(args.job_name): + gh = GitHub(get_best_robot_token(), per_page=100) + commit = get_commit(gh, pr_info.sha) + post_commit_status( + commit, + ERROR, + "", + "Error: " + error, + _get_ext_check_name(args.job_name), + pr_info, + dump_to_file=True, + ) ### POST action: end ### MARK SUCCESS action: start diff --git a/tests/ci/ci_buddy.py b/tests/ci/ci_buddy.py new file mode 100644 index 00000000000..ea690bb602c --- /dev/null +++ b/tests/ci/ci_buddy.py @@ -0,0 +1,88 @@ +import json +import os + +import boto3 +import requests +from botocore.exceptions import ClientError + +from pr_info import PRInfo +from ci_utils import Shell + + +class CIBuddy: + _HEADERS = {"Content-Type": "application/json"} + + def __init__(self, dry_run=False): + self.repo = os.getenv("GITHUB_REPOSITORY", "") + self.dry_run = dry_run + res = self._get_webhooks() + self.test_channel = "" + self.dev_ci_channel = "" + if res: + self.test_channel = json.loads(res)["test_channel"] + self.dev_ci_channel = json.loads(res)["ci_channel"] + self.job_name = os.getenv("CHECK_NAME", "unknown") + pr_info = PRInfo() + self.pr_number = pr_info.number + self.head_ref = pr_info.head_ref + self.commit_url = pr_info.commit_html_url + + @staticmethod + def _get_webhooks(): + name = "ci_buddy_web_hooks" + + session = boto3.Session(region_name="us-east-1") # Replace with your region + ssm_client = session.client("ssm") + json_string = None + try: + response = ssm_client.get_parameter( + Name=name, + WithDecryption=True, # Set to True if the parameter is a SecureString + ) + json_string = response["Parameter"]["Value"] + except ClientError as e: + print(f"An error occurred: {e}") + + return json_string + + def post(self, message, dry_run=None): + if dry_run is None: + dry_run = self.dry_run + print(f"Posting slack message, dry_run [{dry_run}]") + if dry_run: + url = self.test_channel + else: + url = self.dev_ci_channel + data = {"text": message} + try: + requests.post(url, headers=self._HEADERS, data=json.dumps(data), timeout=10) + except Exception as e: + print(f"ERROR: Failed to post message, ex {e}") + + def post_error(self, error_description, job_name="", with_instance_info=True): + instance_id, instance_type = "unknown", "unknown" + if with_instance_info: + instance_id = Shell.run("ec2metadata --instance-id") or instance_id + instance_type = Shell.run("ec2metadata --instance-type") or instance_type + if not job_name: + job_name = os.getenv("CHECK_NAME", "unknown") + line_err = f":red_circle: *Error: {error_description}*\n\n" + line_ghr = f" *Runner:* `{instance_type}`, `{instance_id}`\n" + line_job = f" *Job:* `{job_name}`\n" + line_pr_ = f" *PR:* \n" + line_br_ = f" *Branch:* `{self.head_ref}`, <{self.commit_url}|commit>\n" + message = line_err + message += line_job + if with_instance_info: + message += line_ghr + if self.pr_number > 0: + message += line_pr_ + else: + message += line_br_ + self.post(message) + + +if __name__ == "__main__": + # test + buddy = CIBuddy(dry_run=True) + buddy.post_error("Out of memory") diff --git a/tests/ci/ci_cache.py b/tests/ci/ci_cache.py index 8ee0ae54385..417d5dbc262 100644 --- a/tests/ci/ci_cache.py +++ b/tests/ci/ci_cache.py @@ -520,6 +520,35 @@ class CiCache: self.RecordType.SUCCESSFUL, job, batch, num_batches, release_branch ) + def has_evidence(self, job: str, job_config: CI.JobConfig) -> bool: + """ + checks if the job has been seen in master/release CI + function is to be used to check if change did not affect the job + :param job_config: + :param job: + :return: + """ + return ( + self.is_successful( + job=job, + batch=0, + num_batches=job_config.num_batches, + release_branch=True, + ) + or self.is_pending( + job=job, + batch=0, + num_batches=job_config.num_batches, + release_branch=True, + ) + or self.is_failed( + job=job, + batch=0, + num_batches=job_config.num_batches, + release_branch=True, + ) + ) + def is_failed( self, job: str, batch: int, num_batches: int, release_branch: bool ) -> bool: @@ -609,7 +638,7 @@ class CiCache: pushes pending records for all jobs that supposed to be run """ for job, job_config in self.jobs_to_do.items(): - if job_config.run_always: + if not job_config.has_digest(): continue pending_state = PendingState(time.time(), run_url=GITHUB_RUN_URL) assert job_config.batches @@ -674,6 +703,52 @@ class CiCache: bucket=S3_BUILDS_BUCKET, file_path=result_json_path, s3_path=s3_path ) + def filter_out_not_affected_jobs(self): + """ + Filter is to be applied in PRs to remove jobs that are not affected by the change + :return: + """ + remove_from_to_do = [] + required_builds = [] + has_test_jobs_to_skip = False + for job_name, job_config in self.jobs_to_do.items(): + if CI.is_test_job(job_name) and job_name != CI.JobNames.BUILD_CHECK: + if job_config.reference_job_name: + reference_name = job_config.reference_job_name + reference_config = CI.JOB_CONFIGS[reference_name] + else: + reference_name = job_name + reference_config = job_config + if self.has_evidence( + job=reference_name, + job_config=reference_config, + ): + remove_from_to_do.append(job_name) + has_test_jobs_to_skip = True + else: + required_builds += ( + job_config.required_builds if job_config.required_builds else [] + ) + if has_test_jobs_to_skip: + # If there are tests to skip, it means build digest has not been changed. + # No need to test builds. Let's keep all builds required for test jobs and skip the others + for job_name, job_config in self.jobs_to_do.items(): + if CI.is_build_job(job_name): + if job_name not in required_builds: + remove_from_to_do.append(job_name) + + if not required_builds: + remove_from_to_do.append(CI.JobNames.BUILD_CHECK) + + for job in remove_from_to_do: + print(f"Filter job [{job}] - not affected by the change") + if job in self.jobs_to_do: + del self.jobs_to_do[job] + if job in self.jobs_to_wait: + del self.jobs_to_wait[job] + if job in self.jobs_to_skip: + self.jobs_to_skip.remove(job) + def await_pending_jobs(self, is_release: bool, dry_run: bool = False) -> None: """ await pending jobs to be finished @@ -691,14 +766,19 @@ class CiCache: MAX_JOB_NUM_TO_WAIT = 3 round_cnt = 0 - # FIXME: temporary experiment: lets enable await for PR' workflows but for a shorter time + def _has_build_job(): + for job in self.jobs_to_wait: + if CI.is_build_job(job): + return True + return False + if not is_release: - MAX_ROUNDS_TO_WAIT = 3 + # in PRs we can wait only for builds, TIMEOUT*MAX_ROUNDS_TO_WAIT=100min is enough + MAX_ROUNDS_TO_WAIT = 2 while ( - len(self.jobs_to_wait) > MAX_JOB_NUM_TO_WAIT - and round_cnt < MAX_ROUNDS_TO_WAIT - ): + len(self.jobs_to_wait) > MAX_JOB_NUM_TO_WAIT or _has_build_job() + ) and round_cnt < MAX_ROUNDS_TO_WAIT: round_cnt += 1 GHActions.print_in_group( f"Wait pending jobs, round [{round_cnt}/{MAX_ROUNDS_TO_WAIT}]:", diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index bef43083a35..9b9ddee5326 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -13,6 +13,9 @@ class CI: each config item in the below dicts should be an instance of JobConfig class or inherited from it """ + MAX_TOTAL_FAILURES_BEFORE_BLOCKING_CI = 5 + MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI = 2 + # reimport types to CI class so that they visible as CI.* and mypy is happy # pylint:disable=useless-import-alias,reimported,import-outside-toplevel from ci_definitions import BuildConfig as BuildConfig @@ -311,42 +314,42 @@ class CI: random_bucket="parrepl_with_sanitizer", ), JobNames.STATELESS_TEST_ASAN: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_ASAN], num_batches=4 + required_builds=[BuildNames.PACKAGE_ASAN], num_batches=2 ), JobNames.STATELESS_TEST_TSAN: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_TSAN], num_batches=5 + required_builds=[BuildNames.PACKAGE_TSAN], num_batches=2 ), JobNames.STATELESS_TEST_MSAN: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_MSAN], num_batches=6 + required_builds=[BuildNames.PACKAGE_MSAN], num_batches=3 ), JobNames.STATELESS_TEST_UBSAN: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_UBSAN], num_batches=2 + required_builds=[BuildNames.PACKAGE_UBSAN], num_batches=1 ), JobNames.STATELESS_TEST_DEBUG: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=5 + required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=2 ), JobNames.STATELESS_TEST_RELEASE: CommonJobConfigs.STATELESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_RELEASE], ), JobNames.STATELESS_TEST_RELEASE_COVERAGE: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_RELEASE_COVERAGE], num_batches=6 + required_builds=[BuildNames.PACKAGE_RELEASE_COVERAGE], num_batches=5 ), JobNames.STATELESS_TEST_AARCH64: CommonJobConfigs.STATELESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_AARCH64], runner_type=Runners.FUNC_TESTER_ARM, ), JobNames.STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_RELEASE], num_batches=4 + required_builds=[BuildNames.PACKAGE_RELEASE], num_batches=3 ), JobNames.STATELESS_TEST_S3_DEBUG: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=6 + required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=2 ), JobNames.STATELESS_TEST_AZURE_ASAN: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_ASAN], num_batches=4, release_only=True + required_builds=[BuildNames.PACKAGE_ASAN], num_batches=2, release_only=True ), JobNames.STATELESS_TEST_S3_TSAN: CommonJobConfigs.STATELESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_TSAN], - num_batches=5, + num_batches=3, ), JobNames.STRESS_TEST_DEBUG: CommonJobConfigs.STRESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_DEBUG], @@ -410,7 +413,9 @@ class CI: release_only=True, ), JobNames.INTEGRATION_TEST_FLAKY: CommonJobConfigs.INTEGRATION_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_ASAN], pr_only=True + required_builds=[BuildNames.PACKAGE_ASAN], + pr_only=True, + reference_job_name=JobNames.INTEGRATION_TEST_TSAN, ), JobNames.COMPATIBILITY_TEST: CommonJobConfigs.COMPATIBILITY_TEST.with_properties( required_builds=[BuildNames.PACKAGE_RELEASE], @@ -452,7 +457,10 @@ class CI: required_builds=[BuildNames.PACKAGE_UBSAN], ), JobNames.STATELESS_TEST_FLAKY_ASAN: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_ASAN], pr_only=True, timeout=3600 + required_builds=[BuildNames.PACKAGE_ASAN], + pr_only=True, + timeout=3600, + reference_job_name=JobNames.STATELESS_TEST_RELEASE, ), JobNames.JEPSEN_KEEPER: JobConfig( required_builds=[BuildNames.BINARY_RELEASE], @@ -637,7 +645,7 @@ class CI: @classmethod def is_test_job(cls, job: str) -> bool: - return not cls.is_build_job(job) and job != cls.JobNames.STYLE_CHECK + return not cls.is_build_job(job) @classmethod def is_docs_job(cls, job: str) -> bool: diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index 48e1280d939..acd9b7fa904 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -284,8 +284,12 @@ class JobConfig: # GH Runner type (tag from @Runners) runner_type: str - # used for config validation in ci unittests + # used in ci unittests for config validation job_name_keyword: str = "" + # name of another job that (if provided) should be used to check if job was affected by the change or not (in CiCache.has_evidence(job=@reference_job_name) call) + # for example: "Stateless flaky check" can use reference_job_name="Stateless tests (release)". "Stateless flaky check" does not run on master + # and there cannot be an evidence for it, so instead "Stateless tests (release)" job name can be used to check the evidence + reference_job_name: str = "" # builds required for the job (applicable for test jobs) required_builds: Optional[List[str]] = None # build config for the build job (applicable for builds) @@ -327,6 +331,9 @@ class JobConfig: assert self.required_builds return self.required_builds[0] + def has_digest(self) -> bool: + return self.digest != DigestConfig() + class CommonJobConfigs: """ @@ -378,7 +385,7 @@ class CommonJobConfigs: ), run_command='functional_test_check.py "$CHECK_NAME"', runner_type=Runners.FUNC_TESTER, - timeout=10800, + timeout=7200, ) STATEFUL_TEST = JobConfig( job_name_keyword="stateful", @@ -440,7 +447,12 @@ class CommonJobConfigs: ) ASTFUZZER_TEST = JobConfig( job_name_keyword="ast", - digest=DigestConfig(), + digest=DigestConfig( + include_paths=[ + "./tests/ci/ast_fuzzer_check.py", + ], + docker=["clickhouse/fuzzer"], + ), run_command="ast_fuzzer_check.py", run_always=True, runner_type=Runners.FUZZER_UNIT_TESTER, diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index e7034d0b104..44bd37fe260 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -1,7 +1,9 @@ import os +import re +import subprocess from contextlib import contextmanager from pathlib import Path -from typing import Any, Iterator, List, Union +from typing import Any, Iterator, List, Union, Optional class WithIter(type): @@ -42,3 +44,67 @@ class GHActions: for line in lines: print(line) print("::endgroup::") + + +class Shell: + @classmethod + def run_strict(cls, command): + subprocess.run( + command + " 2>&1", + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=True, + ) + + @classmethod + def run(cls, command): + res = "" + result = subprocess.run( + command, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=False, + ) + if result.returncode == 0: + res = result.stdout + return res.strip() + + @classmethod + def check(cls, command): + result = subprocess.run( + command + " 2>&1", + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=False, + ) + return result.returncode == 0 + + +class Utils: + @staticmethod + def get_failed_tests_number(description: str) -> Optional[int]: + description = description.lower() + + pattern = r"fail:\s*(\d+)\s*(?=,|$)" + match = re.search(pattern, description) + if match: + return int(match.group(1)) + return None + + @staticmethod + def is_killed_with_oom(): + if Shell.check( + "sudo dmesg -T | grep -q -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE'" + ): + return True + return False + + @staticmethod + def clear_dmesg(): + Shell.run("sudo dmesg --clear ||:") diff --git a/tests/ci/create_release.py b/tests/ci/create_release.py new file mode 100755 index 00000000000..7f4cf8c787a --- /dev/null +++ b/tests/ci/create_release.py @@ -0,0 +1,710 @@ +import argparse +import dataclasses +import json +import os +import subprocess + +from contextlib import contextmanager +from copy import copy +from pathlib import Path +from typing import Iterator, List + +from git_helper import Git, GIT_PREFIX +from ssh import SSHAgent +from env_helper import GITHUB_REPOSITORY, S3_BUILDS_BUCKET +from s3_helper import S3Helper +from autoscale_runners_lambda.lambda_shared.pr import Labels +from version_helper import ( + FILE_WITH_VERSION_PATH, + GENERATED_CONTRIBUTORS, + get_abs_path, + get_version_from_repo, + update_cmake_version, + update_contributors, + VersionType, +) +from ci_config import CI + +CMAKE_PATH = get_abs_path(FILE_WITH_VERSION_PATH) +CONTRIBUTORS_PATH = get_abs_path(GENERATED_CONTRIBUTORS) + + +class ShellRunner: + + @classmethod + def run( + cls, command, check_retcode=True, print_output=True, async_=False, dry_run=False + ): + if dry_run: + print(f"Dry-run: Would run shell command: [{command}]") + return 0, "" + print(f"Running shell command: [{command}]") + if async_: + subprocess.Popen(command.split(" ")) # pylint:disable=consider-using-with + return 0, "" + result = subprocess.run( + command + " 2>&1", + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=True, + ) + if print_output: + print(result.stdout) + if check_retcode: + assert result.returncode == 0, f"Return code [{result.returncode}]" + return result.returncode, result.stdout + + +@dataclasses.dataclass +class ReleaseInfo: + version: str + release_tag: str + release_branch: str + commit_sha: str + # lts or stable + codename: str + + @staticmethod + def from_file(file_path: str) -> "ReleaseInfo": + with open(file_path, "r", encoding="utf-8") as json_file: + res = json.load(json_file) + return ReleaseInfo(**res) + + @staticmethod + def prepare(commit_ref: str, release_type: str, outfile: str) -> None: + Path(outfile).parent.mkdir(parents=True, exist_ok=True) + Path(outfile).unlink(missing_ok=True) + version = None + release_branch = None + release_tag = None + codename = None + assert release_type in ("patch", "new") + if release_type == "new": + # check commit_ref is right and on a right branch + ShellRunner.run( + f"git merge-base --is-ancestor origin/{commit_ref} origin/master" + ) + with checkout(commit_ref): + _, commit_sha = ShellRunner.run(f"git rev-parse {commit_ref}") + # Git() must be inside "with checkout" contextmanager + git = Git() + version = get_version_from_repo(git=git) + release_branch = "master" + expected_prev_tag = f"v{version.major}.{version.minor}.1.1-new" + version.bump().with_description(VersionType.NEW) + assert ( + git.latest_tag == expected_prev_tag + ), f"BUG: latest tag [{git.latest_tag}], expected [{expected_prev_tag}]" + release_tag = version.describe + codename = ( + VersionType.STABLE + ) # dummy value (artifactory won't be updated for new release) + if release_type == "patch": + with checkout(commit_ref): + _, commit_sha = ShellRunner.run(f"git rev-parse {commit_ref}") + # Git() must be inside "with checkout" contextmanager + git = Git() + version = get_version_from_repo(git=git) + codename = version.get_stable_release_type() + version.with_description(codename) + release_branch = f"{version.major}.{version.minor}" + release_tag = version.describe + ShellRunner.run(f"{GIT_PREFIX} fetch origin {release_branch} --tags") + # check commit is right and on a right branch + ShellRunner.run( + f"git merge-base --is-ancestor {commit_ref} origin/{release_branch}" + ) + if version.patch == 1: + expected_version = copy(version) + expected_version.bump() + expected_tag_prefix = ( + f"v{expected_version.major}.{expected_version.minor}-" + ) + expected_tag_suffix = "-new" + else: + expected_tag_prefix = ( + f"v{version.major}.{version.minor}.{version.patch-1}." + ) + expected_tag_suffix = f"-{version.get_stable_release_type()}" + if git.latest_tag.startswith( + expected_tag_prefix + ) and git.latest_tag.endswith(expected_tag_suffix): + pass + else: + assert ( + False + ), f"BUG: Unexpected latest tag [{git.latest_tag}] expected [{expected_tag_prefix}*{expected_tag_suffix}]" + + assert ( + release_branch + and commit_sha + and release_tag + and version + and codename in ("lts", "stable") + ) + res = ReleaseInfo( + release_branch=release_branch, + commit_sha=commit_sha, + release_tag=release_tag, + version=version.string, + codename=codename, + ) + with open(outfile, "w", encoding="utf-8") as f: + print(json.dumps(dataclasses.asdict(res), indent=2), file=f) + + def push_release_tag(self, dry_run: bool) -> None: + if dry_run: + # remove locally created tag from prev run + ShellRunner.run( + f"{GIT_PREFIX} tag -l | grep -q {self.release_tag} && git tag -d {self.release_tag} ||:" + ) + # Create release tag + print( + f"Create and push release tag [{self.release_tag}], commit [{self.commit_sha}]" + ) + tag_message = f"Release {self.release_tag}" + ShellRunner.run( + f"{GIT_PREFIX} tag -a -m '{tag_message}' {self.release_tag} {self.commit_sha}" + ) + cmd_push_tag = f"{GIT_PREFIX} push origin {self.release_tag}:{self.release_tag}" + ShellRunner.run(cmd_push_tag, dry_run=dry_run) + + @staticmethod + def _create_gh_label(label: str, color_hex: str, dry_run: bool) -> None: + cmd = f"gh api repos/{GITHUB_REPOSITORY}/labels -f name={label} -f color={color_hex}" + ShellRunner.run(cmd, dry_run=dry_run) + + def push_new_release_branch(self, dry_run: bool) -> None: + assert ( + self.release_branch == "master" + ), "New release branch can be created only for release type [new]" + git = Git() + version = get_version_from_repo(git=git) + new_release_branch = f"{version.major}.{version.minor}" + stable_release_type = version.get_stable_release_type() + version_after_release = copy(version) + version_after_release.bump() + assert ( + version_after_release.string == self.version + ), f"Unexpected current version in git, must precede [{self.version}] by one step, actual [{version.string}]" + if dry_run: + # remove locally created branch from prev run + ShellRunner.run( + f"{GIT_PREFIX} branch -l | grep -q {new_release_branch} && git branch -d {new_release_branch} ||:" + ) + print( + f"Create and push new release branch [{new_release_branch}], commit [{self.commit_sha}]" + ) + with checkout(self.release_branch): + with checkout_new(new_release_branch): + pr_labels = f"--label {Labels.RELEASE}" + if stable_release_type == VersionType.LTS: + pr_labels += f" --label {Labels.RELEASE_LTS}" + cmd_push_branch = ( + f"{GIT_PREFIX} push --set-upstream origin {new_release_branch}" + ) + ShellRunner.run(cmd_push_branch, dry_run=dry_run) + + print("Create and push backport tags for new release branch") + ReleaseInfo._create_gh_label( + f"v{new_release_branch}-must-backport", "10dbed", dry_run=dry_run + ) + ReleaseInfo._create_gh_label( + f"v{new_release_branch}-affected", "c2bfff", dry_run=dry_run + ) + ShellRunner.run( + f"""gh pr create --repo {GITHUB_REPOSITORY} --title 'Release pull request for branch {new_release_branch}' + --head {new_release_branch} {pr_labels} + --body 'This PullRequest is a part of ClickHouse release cycle. It is used by CI system only. Do not perform any changes with it.' + """, + dry_run=dry_run, + ) + + def update_version_and_contributors_list(self, dry_run: bool) -> None: + # Bump version, update contributors list, create PR + branch_upd_version_contributors = f"bump_version_{self.version}" + with checkout(self.commit_sha): + git = Git() + version = get_version_from_repo(git=git) + if self.release_branch == "master": + version.bump() + version.with_description(VersionType.TESTING) + else: + version.with_description(version.get_stable_release_type()) + assert ( + version.string == self.version + ), f"BUG: version in release info does not match version in git commit, expected [{self.version}], got [{version.string}]" + with checkout(self.release_branch): + with checkout_new(branch_upd_version_contributors): + update_cmake_version(version) + update_contributors(raise_error=True) + cmd_commit_version_upd = f"{GIT_PREFIX} commit '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}' -m 'Update autogenerated version to {self.version} and contributors'" + cmd_push_branch = f"{GIT_PREFIX} push --set-upstream origin {branch_upd_version_contributors}" + body_file = get_abs_path(".github/PULL_REQUEST_TEMPLATE.md") + actor = os.getenv("GITHUB_ACTOR", "") or "me" + cmd_create_pr = f"gh pr create --repo {GITHUB_REPOSITORY} --title 'Update version after release' --head {branch_upd_version_contributors} --base {self.release_branch} --body-file '{body_file} --label 'do not test' --assignee @{actor}" + ShellRunner.run(cmd_commit_version_upd, dry_run=dry_run) + ShellRunner.run(cmd_push_branch, dry_run=dry_run) + ShellRunner.run(cmd_create_pr, dry_run=dry_run) + if dry_run: + ShellRunner.run( + f"{GIT_PREFIX} diff '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'" + ) + ShellRunner.run( + f"{GIT_PREFIX} checkout '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'" + ) + + def create_gh_release(self, packages_files: List[str], dry_run: bool) -> None: + repo = os.getenv("GITHUB_REPOSITORY") + assert repo + cmds = [] + cmds.append( + f"gh release create --repo {repo} --title 'Release {self.release_tag}' {self.release_tag}" + ) + for file in packages_files: + cmds.append(f"gh release upload {self.release_tag} {file}") + if not dry_run: + for cmd in cmds: + ShellRunner.run(cmd) + else: + print("Dry-run, would run commands:") + print("\n * ".join(cmds)) + + +class RepoTypes: + RPM = "rpm" + DEBIAN = "deb" + TGZ = "tgz" + + +class PackageDownloader: + PACKAGES = ( + "clickhouse-client", + "clickhouse-common-static", + "clickhouse-common-static-dbg", + "clickhouse-keeper", + "clickhouse-keeper-dbg", + "clickhouse-server", + ) + + EXTRA_PACKAGES = ( + "clickhouse-library-bridge", + "clickhouse-odbc-bridge", + ) + PACKAGE_TYPES = (CI.BuildNames.PACKAGE_RELEASE, CI.BuildNames.PACKAGE_AARCH64) + MACOS_PACKAGE_TO_BIN_SUFFIX = { + CI.BuildNames.BINARY_DARWIN: "macos", + CI.BuildNames.BINARY_DARWIN_AARCH64: "macos-aarch64", + } + LOCAL_DIR = "/tmp/packages" + + @classmethod + def _get_arch_suffix(cls, package_arch, repo_type): + if package_arch == CI.BuildNames.PACKAGE_RELEASE: + return ( + "amd64" if repo_type in (RepoTypes.DEBIAN, RepoTypes.TGZ) else "x86_64" + ) + elif package_arch == CI.BuildNames.PACKAGE_AARCH64: + return ( + "arm64" if repo_type in (RepoTypes.DEBIAN, RepoTypes.TGZ) else "aarch64" + ) + else: + assert False, "BUG" + + def __init__(self, release, commit_sha, version): + assert version.startswith(release), "Invalid release branch or version" + major, minor = map(int, release.split(".")) + self.package_names = list(self.PACKAGES) + if major > 24 or (major == 24 and minor > 3): + self.package_names += list(self.EXTRA_PACKAGES) + self.release = release + self.commit_sha = commit_sha + self.version = version + self.s3 = S3Helper() + self.deb_package_files = [] + self.rpm_package_files = [] + self.tgz_package_files = [] + # just binaries for macos + self.macos_package_files = ["clickhouse-macos", "clickhouse-macos-aarch64"] + self.file_to_type = {} + + ShellRunner.run(f"mkdir -p {self.LOCAL_DIR}") + + for package_type in self.PACKAGE_TYPES: + for package in self.package_names: + deb_package_file_name = f"{package}_{self.version}_{self._get_arch_suffix(package_type, RepoTypes.DEBIAN)}.deb" + self.deb_package_files.append(deb_package_file_name) + self.file_to_type[deb_package_file_name] = package_type + + rpm_package_file_name = f"{package}-{self.version}.{self._get_arch_suffix(package_type, RepoTypes.RPM)}.rpm" + self.rpm_package_files.append(rpm_package_file_name) + self.file_to_type[rpm_package_file_name] = package_type + + tgz_package_file_name = f"{package}-{self.version}-{self._get_arch_suffix(package_type, RepoTypes.TGZ)}.tgz" + self.tgz_package_files.append(tgz_package_file_name) + self.file_to_type[tgz_package_file_name] = package_type + tgz_package_file_name += ".sha512" + self.tgz_package_files.append(tgz_package_file_name) + self.file_to_type[tgz_package_file_name] = package_type + + def get_deb_packages_files(self): + return self.deb_package_files + + def get_rpm_packages_files(self): + return self.rpm_package_files + + def get_tgz_packages_files(self): + return self.tgz_package_files + + def get_macos_packages_files(self): + return self.macos_package_files + + def get_packages_names(self): + return self.package_names + + def get_all_packages_files(self): + assert self.local_tgz_packages_ready() + assert self.local_deb_packages_ready() + assert self.local_rpm_packages_ready() + assert self.local_macos_packages_ready() + res = [] + for package_file in ( + self.deb_package_files + + self.rpm_package_files + + self.tgz_package_files + + self.macos_package_files + ): + res.append(self.LOCAL_DIR + "/" + package_file) + return res + + def run(self): + ShellRunner.run(f"rm -rf {self.LOCAL_DIR}/*") + for package_file in ( + self.deb_package_files + self.rpm_package_files + self.tgz_package_files + ): + print(f"Downloading: [{package_file}]") + s3_path = "/".join( + [ + self.release, + self.commit_sha, + self.file_to_type[package_file], + package_file, + ] + ) + self.s3.download_file( + bucket=S3_BUILDS_BUCKET, + s3_path=s3_path, + local_file_path="/".join([self.LOCAL_DIR, package_file]), + ) + + for macos_package, bin_suffix in self.MACOS_PACKAGE_TO_BIN_SUFFIX.items(): + binary_name = "clickhouse" + destination_binary_name = f"{binary_name}-{bin_suffix}" + assert destination_binary_name in self.macos_package_files + print( + f"Downloading: [{macos_package}] binary to [{destination_binary_name}]" + ) + s3_path = "/".join( + [ + self.release, + self.commit_sha, + macos_package, + binary_name, + ] + ) + self.s3.download_file( + bucket=S3_BUILDS_BUCKET, + s3_path=s3_path, + local_file_path="/".join([self.LOCAL_DIR, destination_binary_name]), + ) + + def local_deb_packages_ready(self) -> bool: + assert self.deb_package_files + for package_file in self.deb_package_files: + print(f"Check package is downloaded [{package_file}]") + if not Path(self.LOCAL_DIR + "/" + package_file).is_file(): + return False + return True + + def local_rpm_packages_ready(self) -> bool: + assert self.rpm_package_files + for package_file in self.rpm_package_files: + print(f"Check package is downloaded [{package_file}]") + if not Path(self.LOCAL_DIR + "/" + package_file).is_file(): + return False + return True + + def local_tgz_packages_ready(self) -> bool: + assert self.tgz_package_files + for package_file in self.tgz_package_files: + print(f"Check package is downloaded [{package_file}]") + if not Path(self.LOCAL_DIR + "/" + package_file).is_file(): + return False + return True + + def local_macos_packages_ready(self) -> bool: + assert self.macos_package_files + for package_file in self.macos_package_files: + print(f"Check package is downloaded [{package_file}]") + if not Path(self.LOCAL_DIR + "/" + package_file).is_file(): + return False + return True + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description="Creates release", + ) + parser.add_argument( + "--prepare-release-info", + action="store_true", + help="Initial step to prepare info like release branch, release tag, etc.", + ) + parser.add_argument( + "--push-release-tag", + action="store_true", + help="Creates and pushes git tag", + ) + parser.add_argument( + "--push-new-release-branch", + action="store_true", + help="Creates and pushes new release branch and corresponding service gh tags for backports", + ) + parser.add_argument( + "--create-bump-version-pr", + action="store_true", + help="Updates version, contributors' list and creates PR", + ) + parser.add_argument( + "--download-packages", + action="store_true", + help="Downloads all required packages from s3", + ) + parser.add_argument( + "--create-gh-release", + action="store_true", + help="Create GH Release object and attach all packages", + ) + parser.add_argument( + "--ref", + type=str, + help="the commit hash or branch", + ) + parser.add_argument( + "--release-type", + choices=("new", "patch"), + # dest="release_type", + help="a release type to bump the major.minor.patch version part, " + "new branch is created only for the value 'new'", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="do not make any actual changes in the repo, just show what will be done", + ) + parser.add_argument( + "--outfile", + default="", + type=str, + help="output file to write json result to, if not set - stdout", + ) + parser.add_argument( + "--infile", + default="", + type=str, + help="input file with release info", + ) + + return parser.parse_args() + + +@contextmanager +def checkout(ref: str) -> Iterator[None]: + _, orig_ref = ShellRunner.run(f"{GIT_PREFIX} symbolic-ref --short HEAD") + rollback_cmd = f"{GIT_PREFIX} checkout {orig_ref}" + assert orig_ref + if ref not in (orig_ref,): + ShellRunner.run(f"{GIT_PREFIX} checkout {ref}") + try: + yield + except (Exception, KeyboardInterrupt) as e: + print(f"ERROR: Exception [{e}]") + ShellRunner.run(rollback_cmd) + raise + ShellRunner.run(rollback_cmd) + + +@contextmanager +def checkout_new(ref: str) -> Iterator[None]: + _, orig_ref = ShellRunner.run(f"{GIT_PREFIX} symbolic-ref --short HEAD") + rollback_cmd = f"{GIT_PREFIX} checkout {orig_ref}" + assert orig_ref + ShellRunner.run(f"{GIT_PREFIX} checkout -b {ref}") + try: + yield + except (Exception, KeyboardInterrupt) as e: + print(f"ERROR: Exception [{e}]") + ShellRunner.run(rollback_cmd) + raise + ShellRunner.run(rollback_cmd) + + +if __name__ == "__main__": + args = parse_args() + assert args.dry_run + + # prepare ssh for git if needed + _ssh_agent = None + _key_pub = None + if os.getenv("ROBOT_CLICKHOUSE_SSH_KEY", ""): + _key = os.getenv("ROBOT_CLICKHOUSE_SSH_KEY") + _ssh_agent = SSHAgent() + _key_pub = _ssh_agent.add(_key) + _ssh_agent.print_keys() + + if args.prepare_release_info: + assert ( + args.ref and args.release_type and args.outfile + ), "--ref, --release-type and --outfile must be provided with --prepare-release-info" + ReleaseInfo.prepare( + commit_ref=args.ref, release_type=args.release_type, outfile=args.outfile + ) + if args.push_release_tag: + assert args.infile, "--infile must be provided" + release_info = ReleaseInfo.from_file(args.infile) + release_info.push_release_tag(dry_run=args.dry_run) + if args.push_new_release_branch: + assert args.infile, "--infile must be provided" + release_info = ReleaseInfo.from_file(args.infile) + release_info.push_new_release_branch(dry_run=args.dry_run) + if args.create_bump_version_pr: + # TODO: store link to PR in release info + assert args.infile, "--infile must be provided" + release_info = ReleaseInfo.from_file(args.infile) + release_info.update_version_and_contributors_list(dry_run=args.dry_run) + if args.download_packages: + assert args.infile, "--infile must be provided" + release_info = ReleaseInfo.from_file(args.infile) + p = PackageDownloader( + release=release_info.release_branch, + commit_sha=release_info.commit_sha, + version=release_info.version, + ) + p.run() + if args.create_gh_release: + assert args.infile, "--infile must be provided" + release_info = ReleaseInfo.from_file(args.infile) + p = PackageDownloader( + release=release_info.release_branch, + commit_sha=release_info.commit_sha, + version=release_info.version, + ) + release_info.create_gh_release(p.get_all_packages_files(), args.dry_run) + + # tear down ssh + if _ssh_agent and _key_pub: + _ssh_agent.remove(_key_pub) + + +""" +Prepare release machine: + +### INSTALL PACKAGES +sudo apt update +sudo apt install --yes --no-install-recommends python3-dev python3-pip gh unzip +sudo apt install --yes python3-boto3 +sudo apt install --yes python3-github +sudo apt install --yes python3-unidiff +sudo apt install --yes s3fs + +### INSTALL AWS CLI +cd /tmp +curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "awscliv2.zip" +unzip awscliv2.zip +sudo ./aws/install +rm -rf aws* +cd - + +### INSTALL GH ACTIONS RUNNER: +# Create a folder +RUNNER_VERSION=2.317.0 +cd ~ +mkdir actions-runner && cd actions-runner +# Download the latest runner package +runner_arch() { + case $(uname -m) in + x86_64 ) + echo x64;; + aarch64 ) + echo arm64;; + esac +} +curl -O -L https://github.com/actions/runner/releases/download/v$RUNNER_VERSION/actions-runner-linux-$(runner_arch)-$RUNNER_VERSION.tar.gz +# Extract the installer +tar xzf ./actions-runner-linux-$(runner_arch)-$RUNNER_VERSION.tar.gz +rm ./actions-runner-linux-$(runner_arch)-$RUNNER_VERSION.tar.gz + +### Install reprepro: +cd ~ +sudo apt install dpkg-dev libgpgme-dev libdb-dev libbz2-dev liblzma-dev libarchive-dev shunit2 db-util debhelper +git clone https://salsa.debian.org/debian/reprepro.git +cd reprepro +dpkg-buildpackage -b --no-sign && sudo dpkg -i ../reprepro_$(dpkg-parsechangelog --show-field Version)_$(dpkg-architecture -q DEB_HOST_ARCH).deb + +### Install createrepo-c: +sudo apt install createrepo-c +createrepo_c --version +#Version: 0.17.3 (Features: DeltaRPM LegacyWeakdeps ) + +### Import gpg sign key +gpg --import key.pgp +gpg --list-secret-keys + +### Install docker +sudo su; cd ~ + +deb_arch() { + case $(uname -m) in + x86_64 ) + echo amd64;; + aarch64 ) + echo arm64;; + esac +} +curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg + +echo "deb [arch=$(deb_arch) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + +sudo apt-get update +sudo apt-get install --yes --no-install-recommends docker-ce docker-buildx-plugin docker-ce-cli containerd.io + +sudo usermod -aG docker ubuntu + +# enable ipv6 in containers (fixed-cidr-v6 is some random network mask) +cat < /etc/docker/daemon.json +{ + "ipv6": true, + "fixed-cidr-v6": "2001:db8:1::/64", + "log-driver": "json-file", + "log-opts": { + "max-file": "5", + "max-size": "1000m" + }, + "insecure-registries" : ["dockerhub-proxy.dockerhub-proxy-zone:5000"], + "registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"] +} +EOT + +# if docker build does not work: + sudo systemctl restart docker + docker buildx rm mybuilder + docker buildx create --name mybuilder --driver docker-container --use + docker buildx inspect mybuilder --bootstrap + +### Install tailscale + +### Configure GH runner +""" diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index 151cc5a4c02..21fc02ce02a 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -11,7 +11,6 @@ from os import path as p from pathlib import Path from typing import Dict, List -from build_check import get_release_or_pr from build_download_helper import read_build_urls from docker_images_helper import DockerImageData, docker_login from env_helper import ( @@ -22,7 +21,7 @@ from env_helper import ( TEMP_PATH, ) from git_helper import Git -from pr_info import PRInfo +from pr_info import PRInfo, EventType from report import FAILURE, SUCCESS, JobReport, TestResult, TestResults from stopwatch import Stopwatch from tee_popen import TeePopen @@ -63,6 +62,12 @@ def parse_args() -> argparse.Namespace: help="a version to build, automaticaly got from version_helper, accepts either " "tag ('refs/tags/' is removed automatically) or a normal 22.2.2.2 format", ) + parser.add_argument( + "--sha", + type=str, + default="", + help="sha of the commit to use packages from", + ) parser.add_argument( "--release-type", type=str, @@ -122,7 +127,7 @@ def parse_args() -> argparse.Namespace: def retry_popen(cmd: str, log_file: Path) -> int: - max_retries = 5 + max_retries = 2 for retry in range(max_retries): # From time to time docker build may failed. Curl issues, or even push # It will sleep progressively 5, 15, 30 and 50 seconds between retries @@ -370,13 +375,22 @@ def main(): tags = gen_tags(args.version, args.release_type) repo_urls = {} direct_urls: Dict[str, List[str]] = {} - release_or_pr, _ = get_release_or_pr(pr_info, args.version) + if pr_info.event_type == EventType.PULL_REQUEST: + release_or_pr = str(pr_info.number) + sha = pr_info.sha + elif pr_info.event_type == EventType.PUSH and pr_info.is_master: + release_or_pr = str(0) + sha = pr_info.sha + else: + release_or_pr = f"{args.version.major}.{args.version.minor}" + sha = args.sha + assert sha for arch, build_name in zip(ARCH, ("package_release", "package_aarch64")): if not args.bucket_prefix: repo_urls[arch] = ( f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/" - f"{release_or_pr}/{pr_info.sha}/{build_name}" + f"{release_or_pr}/{sha}/{build_name}" ) else: repo_urls[arch] = f"{args.bucket_prefix}/{build_name}" diff --git a/tests/ci/download_release_packages.py b/tests/ci/download_release_packages.py index 8f3a2190ae8..e8260cf68f1 100755 --- a/tests/ci/download_release_packages.py +++ b/tests/ci/download_release_packages.py @@ -13,26 +13,28 @@ from get_previous_release_tag import ( PACKAGES_DIR = Path("previous_release_package_folder") -def download_packages(release: ReleaseInfo, dest_path: Path = PACKAGES_DIR) -> None: +def download_packages( + release: ReleaseInfo, dest_path: Path = PACKAGES_DIR, debug: bool = False +) -> None: dest_path.mkdir(parents=True, exist_ok=True) logging.info("Will download %s", release) for pkg, url in release.assets.items(): - if not pkg.endswith("_amd64.deb") or "-dbg_" in pkg: + if not pkg.endswith("_amd64.deb") or (not debug and "-dbg_" in pkg): continue pkg_name = dest_path / pkg download_build_with_progress(url, pkg_name) -def download_last_release(dest_path: Path) -> None: +def download_last_release(dest_path: Path, debug: bool = False) -> None: current_release = get_previous_release(None) if current_release is None: raise DownloadException("The current release is not found") - download_packages(current_release, dest_path=dest_path) + download_packages(current_release, dest_path=dest_path, debug=debug) if __name__ == "__main__": logging.basicConfig(level=logging.INFO) release = get_release_by_tag(input()) - download_packages(release) + download_packages(release, debug=True) diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index ee459ce35a0..ef9f4dc016e 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -104,8 +104,11 @@ def get_run_command( return ( f"docker run --volume={builds_path}:/package_folder " + # For dmesg and sysctl + "--privileged " f"{ci_logs_args}" f"--volume={repo_path}/tests:/usr/share/clickhouse-test " + f"--volume={repo_path}/utils/grpc-client:/usr/share/clickhouse-utils/grpc-client " f"{volume_with_broken_test}" f"--volume={result_path}:/test_output " f"--volume={server_log_path}:/var/log/clickhouse-server " @@ -253,7 +256,7 @@ def main(): packages_path.mkdir(parents=True, exist_ok=True) if validate_bugfix_check: - download_last_release(packages_path) + download_last_release(packages_path, debug=True) else: download_all_deb_packages(check_name, reports_path, packages_path) diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 7c74f52b610..80ac1935d95 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -185,7 +185,7 @@ def main(): build_path.mkdir(parents=True, exist_ok=True) if validate_bugfix_check: - download_last_release(build_path) + download_last_release(build_path, debug=True) else: download_all_deb_packages(check_name, reports_path, build_path) diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index 87f721cfde7..21f16d995a4 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -434,7 +434,14 @@ class ClickhouseIntegrationTestsRunner: "Getting all tests to the file %s with cmd: \n%s", out_file_full, cmd ) with open(out_file_full, "wb") as ofd: - subprocess.check_call(cmd, shell=True, stdout=ofd, stderr=ofd) + try: + subprocess.check_call(cmd, shell=True, stdout=ofd, stderr=ofd) + except subprocess.CalledProcessError as ex: + print("ERROR: Setting test plan failed. Output:") + with open(out_file_full, "r", encoding="utf-8") as file: + for line in file: + print(" " + line, end="") + raise ex all_tests = set() with open(out_file_full, "r", encoding="utf-8") as all_tests_fd: diff --git a/tests/ci/lambda_shared_package/lambda_shared/token.py b/tests/ci/lambda_shared_package/lambda_shared/token.py index 9749122bd39..3fb8f10c0e2 100644 --- a/tests/ci/lambda_shared_package/lambda_shared/token.py +++ b/tests/ci/lambda_shared_package/lambda_shared/token.py @@ -1,4 +1,5 @@ """Module to get the token for GitHub""" + from dataclasses import dataclass import json import time diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index 37c08fc4efe..59749abb4fa 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -26,6 +26,8 @@ from pr_info import PRInfo from report import SUCCESS, FAILURE from env_helper import GITHUB_UPSTREAM_REPOSITORY, GITHUB_REPOSITORY from synchronizer_utils import SYNC_BRANCH_PREFIX +from ci_config import CI +from ci_utils import Utils # The team name for accepted approvals TEAM_NAME = getenv("GITHUB_TEAM_NAME", "core") @@ -251,23 +253,77 @@ def main(): # set mergeable check status and exit commit = get_commit(gh, args.pr_info.sha) statuses = get_commit_filtered_statuses(commit) - state = trigger_mergeable_check( - commit, - statuses, - workflow_failed=(args.wf_status != "success"), - ) - # Process upstream StatusNames.SYNC - pr_info = PRInfo() - if ( - pr_info.head_ref.startswith(f"{SYNC_BRANCH_PREFIX}/pr/") - and GITHUB_REPOSITORY != GITHUB_UPSTREAM_REPOSITORY - ): - print("Updating upstream statuses") - update_upstream_sync_status(pr_info, state) + max_failed_tests_per_job = 0 + job_name_with_max_failures = None + total_failed_tests = 0 + failed_to_get_info = False + has_failed_statuses = False + for status in statuses: + if not CI.is_required(status.context): + continue + if status.state == FAILURE: + has_failed_statuses = True + failed_cnt = Utils.get_failed_tests_number(status.description) + if failed_cnt is None: + failed_to_get_info = True + else: + if failed_cnt > max_failed_tests_per_job: + job_name_with_max_failures = status.context + max_failed_tests_per_job = failed_cnt + total_failed_tests += failed_cnt + elif status.state != SUCCESS and status.context not in ( + CI.StatusNames.SYNC, + CI.StatusNames.PR_CHECK, + ): + # do not block CI on failures in (CI.StatusNames.SYNC, CI.StatusNames.PR_CHECK) + has_failed_statuses = True + print( + f"Unexpected status for [{status.context}]: [{status.state}] - block further testing" + ) + failed_to_get_info = True - if args.wf_status != "success": - # exit with 1 to rerun on workflow failed job restart + can_continue = True + if total_failed_tests > CI.MAX_TOTAL_FAILURES_BEFORE_BLOCKING_CI: + print( + f"Required check has [{total_failed_tests}] failed - block further testing" + ) + can_continue = False + if max_failed_tests_per_job > CI.MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI: + print( + f"Job [{job_name_with_max_failures}] has [{max_failed_tests_per_job}] failures - block further testing" + ) + can_continue = False + if failed_to_get_info: + print("Unexpected commit status state - block further testing") + can_continue = False + if args.wf_status != SUCCESS and not has_failed_statuses: + # workflow failed but reason is unknown as no failed statuses present + can_continue = False + print( + "WARNING: Either the runner is faulty or the operating status is unknown. The first is self-healing, the second requires investigation." + ) + + if args.wf_status == SUCCESS or has_failed_statuses: + # do not set mergeable check status if args.wf_status == failure, apparently it has died runners and is to be restarted + state = trigger_mergeable_check( + commit, + statuses, + ) + # Process upstream StatusNames.SYNC + pr_info = PRInfo() + if ( + pr_info.head_ref.startswith(f"{SYNC_BRANCH_PREFIX}/pr/") + and GITHUB_REPOSITORY != GITHUB_UPSTREAM_REPOSITORY + ): + print("Updating upstream statuses") + update_upstream_sync_status(pr_info, state) + else: + print( + "Workflow failed but no failed statuses found (died runner?) - cannot set Mergeable Check status" + ) + + if not can_continue: sys.exit(1) sys.exit(0) diff --git a/tests/ci/report.py b/tests/ci/report.py index bdaa2e15130..4be7b438f4f 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -23,7 +23,7 @@ from typing import ( from build_download_helper import get_gh_api from ci_config import CI from ci_utils import normalize_string -from env_helper import REPORT_PATH, TEMP_PATH +from env_helper import REPORT_PATH, GITHUB_WORKSPACE logger = logging.getLogger(__name__) @@ -244,7 +244,8 @@ HTML_TEST_PART = """ """ BASE_HEADERS = ["Test name", "Test status"] -JOB_REPORT_FILE = Path(TEMP_PATH) / "job_report.json" +# should not be in TEMP directory or any directory that may be cleaned during the job execution +JOB_REPORT_FILE = Path(GITHUB_WORKSPACE) / "job_report.json" @dataclass @@ -296,6 +297,33 @@ class JobReport: build_dir_for_upload: Union[Path, str] = "" # if False no GH commit status will be created by CI need_commit_status: bool = True + # indicates that this is not real job report but report for the job that was skipped by rerun check + job_skipped: bool = False + # indicates that report generated by CI script in order to check later if job was killed before real report is generated + pre_report: bool = False + exit_code: int = -1 + + @staticmethod + def create_pre_report() -> "JobReport": + return JobReport( + status=ERROR, + description="", + test_results=[], + start_time=datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"), + duration=0.0, + additional_files=[], + pre_report=True, + ) + + def update_duration(self): + if not self.start_time: + self.duration = 0.0 + else: + start_time = datetime.datetime.strptime( + self.start_time, "%Y-%m-%d %H:%M:%S" + ) + current_time = datetime.datetime.utcnow() + self.duration = (current_time - start_time).total_seconds() def __post_init__(self): assert self.status in (SUCCESS, ERROR, FAILURE, PENDING) diff --git a/tests/ci/ssh.py b/tests/ci/ssh.py index 321826fcf44..89d90d724d2 100644 --- a/tests/ci/ssh.py +++ b/tests/ci/ssh.py @@ -37,9 +37,9 @@ class SSHAgent: ssh_options = ( "," + os.environ["SSH_OPTIONS"] if os.environ.get("SSH_OPTIONS") else "" ) - os.environ[ - "SSH_OPTIONS" - ] = f"{ssh_options}UserKnownHostsFile=/dev/null,StrictHostKeyChecking=no" + os.environ["SSH_OPTIONS"] = ( + f"{ssh_options}UserKnownHostsFile=/dev/null,StrictHostKeyChecking=no" + ) def add(self, key): key_pub = self._key_pub(key) diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index bf0281cae68..486bfc25e22 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -30,6 +30,9 @@ def get_additional_envs(check_name: str) -> List[str]: if "azure" in check_name: result.append("USE_AZURE_STORAGE_FOR_MERGE_TREE=1") + if "s3" in check_name: + result.append("USE_S3_STORAGE_FOR_MERGE_TREE=1") + return result diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 9deae06d9f4..36620d44a2d 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -16,7 +16,15 @@ from docker_images_helper import get_docker_image, pull_image from env_helper import IS_CI, REPO_COPY, TEMP_PATH, GITHUB_EVENT_PATH from git_helper import GIT_PREFIX, git_runner from pr_info import PRInfo -from report import ERROR, FAILURE, SUCCESS, JobReport, TestResults, read_test_results +from report import ( + ERROR, + FAILURE, + SUCCESS, + JobReport, + TestResults, + read_test_results, + FAIL, +) from ssh import SSHKey from stopwatch import Stopwatch @@ -192,15 +200,6 @@ def main(): future = executor.submit(subprocess.run, cmd_shell, shell=True) _ = future.result() - autofix_description = "" - if args.push: - try: - commit_push_staged(pr_info) - except subprocess.SubprocessError: - # do not fail the whole script if the autofix didn't work out - logging.error("Unable to push the autofix. Continue.") - autofix_description = "Failed to push autofix to the PR. " - subprocess.check_call( f"python3 ../../utils/check-style/process_style_check_result.py --in-results-dir {temp_path} " f"--out-results-file {temp_path}/test_results.tsv --out-status-file {temp_path}/check_status.tsv || " @@ -210,6 +209,21 @@ def main(): state, description, test_results, additional_files = process_result(temp_path) + autofix_description = "" + fail_cnt = 0 + for result in test_results: + if result.status in (FAILURE, FAIL): + # do not autofix if not only black failed + fail_cnt += 1 + + if args.push and fail_cnt == 1: + try: + commit_push_staged(pr_info) + except subprocess.SubprocessError: + # do not fail the whole script if the autofix didn't work out + logging.error("Unable to push the autofix. Continue.") + autofix_description = "Failed to push autofix to the PR. " + JobReport( description=f"{autofix_description}{description}", test_results=test_results, diff --git a/tests/ci/sync_pr.py b/tests/ci/sync_pr.py index 8251ccbaf38..1b71231f820 100644 --- a/tests/ci/sync_pr.py +++ b/tests/ci/sync_pr.py @@ -101,23 +101,20 @@ def main(): assert pr_info.merged_pr, "BUG. merged PR number could not been determined" prs = gh.get_pulls_from_search( - query=f"head:sync-upstream/pr/{pr_info.merged_pr} org:ClickHouse type:pr", + query=f"head:sync-upstream/pr/{pr_info.merged_pr} org:ClickHouse type:pr is:open", repo="ClickHouse/clickhouse-private", ) - sync_pr = None - if len(prs) > 1: print(f"WARNING: More than one PR found [{prs}] - exiting") elif len(prs) == 0: print("WARNING: No Sync PR found") else: sync_pr = prs[0] - - if args.merge: - merge_sync_pr(gh, sync_pr) - elif args.status: - set_sync_status(gh, pr_info, sync_pr) + if args.merge: + merge_sync_pr(gh, sync_pr) + elif args.status: + set_sync_status(gh, pr_info, sync_pr) if __name__ == "__main__": diff --git a/tests/ci/test_ci_config.py b/tests/ci/test_ci_config.py index 47247b91858..c901994affa 100644 --- a/tests/ci/test_ci_config.py +++ b/tests/ci/test_ci_config.py @@ -1,6 +1,8 @@ #!/usr/bin/env python3 import unittest +import random + from ci_config import CI import ci as CIPY from ci_settings import CiSettings @@ -57,6 +59,18 @@ class TestCIConfig(unittest.TestCase): f"Job [{job}] apparently uses wrong common config with job keyword [{CI.JOB_CONFIGS[job].job_name_keyword}]", ) + def test_job_config_has_proper_values(self): + for job in CI.JobNames: + if CI.JOB_CONFIGS[job].reference_job_name: + reference_job_config = CI.JOB_CONFIGS[ + CI.JOB_CONFIGS[job].reference_job_name + ] + # reference job must run in all workflows and has digest + self.assertTrue(reference_job_config.pr_only == False) + self.assertTrue(reference_job_config.release_only == False) + self.assertTrue(reference_job_config.run_always == False) + self.assertTrue(reference_job_config.digest != CI.DigestConfig()) + def test_required_checks(self): for job in CI.REQUIRED_CHECKS: if job in (CI.StatusNames.PR_CHECK, CI.StatusNames.SYNC): @@ -269,6 +283,7 @@ class TestCIConfig(unittest.TestCase): ci_cache = CIPY._configure_jobs( S3Helper(), pr_info, settings, skip_jobs=False, dry_run=True ) + ci_cache.filter_out_not_affected_jobs() actual_jobs_to_do = list(ci_cache.jobs_to_do) expected_jobs_to_do = [] for set_ in settings.ci_sets: @@ -417,7 +432,7 @@ class TestCIConfig(unittest.TestCase): assert not ci_cache.jobs_to_skip assert not ci_cache.jobs_to_wait - # pretend there are pending jobs that we neet to wait + # pretend there are pending jobs that we need to wait ci_cache.jobs_to_wait = dict(ci_cache.jobs_to_do) for job, config in ci_cache.jobs_to_wait.items(): assert not config.pending_batches @@ -489,3 +504,76 @@ class TestCIConfig(unittest.TestCase): self.assertCountEqual( list(ci_cache.jobs_to_do) + ci_cache.jobs_to_skip, all_jobs_in_wf ) + + def test_ci_py_filters_not_affected_jobs_in_prs(self): + """ + checks ci.py filters not affected jobs in PRs + """ + settings = CiSettings() + settings.no_ci_cache = True + pr_info = PRInfo(github_event=_TEST_EVENT_JSON) + pr_info.event_type = EventType.PULL_REQUEST + pr_info.number = 123 + assert pr_info.is_pr + ci_cache = CIPY._configure_jobs( + S3Helper(), pr_info, settings, skip_jobs=False, dry_run=True + ) + self.assertTrue(not ci_cache.jobs_to_skip, "Must be no jobs in skip list") + assert not ci_cache.jobs_to_wait + assert not ci_cache.jobs_to_skip + + MOCK_AFFECTED_JOBS = [ + CI.JobNames.STATELESS_TEST_S3_DEBUG, + CI.JobNames.STRESS_TEST_TSAN, + ] + MOCK_REQUIRED_BUILDS = [] + + # pretend there are pending jobs that we need to wait + for job, job_config in ci_cache.jobs_to_do.items(): + if job in MOCK_AFFECTED_JOBS: + MOCK_REQUIRED_BUILDS += job_config.required_builds + elif job not in MOCK_AFFECTED_JOBS: + ci_cache.jobs_to_wait[job] = job_config + + for job, job_config in ci_cache.jobs_to_do.items(): + if job_config.reference_job_name: + # jobs with reference_job_name in config are not supposed to have records in the cache - continue + continue + if job in MOCK_AFFECTED_JOBS: + continue + for batch in range(job_config.num_batches): + # add any record into cache + record = CiCache.Record( + record_type=random.choice( + [ + CiCache.RecordType.FAILED, + CiCache.RecordType.PENDING, + CiCache.RecordType.SUCCESSFUL, + ] + ), + job_name=job, + job_digest=ci_cache.job_digests[job], + batch=batch, + num_batches=job_config.num_batches, + release_branch=True, + ) + for record_t_, records_ in ci_cache.records.items(): + if record_t_.value == CiCache.RecordType.FAILED.value: + records_[record.to_str_key()] = record + + ci_cache.filter_out_not_affected_jobs() + expected_to_do = ( + [ + CI.JobNames.BUILD_CHECK, + ] + + MOCK_AFFECTED_JOBS + + MOCK_REQUIRED_BUILDS + ) + self.assertCountEqual( + list(ci_cache.jobs_to_wait), + [ + CI.JobNames.BUILD_CHECK, + ] + + MOCK_REQUIRED_BUILDS, + ) + self.assertCountEqual(list(ci_cache.jobs_to_do), expected_to_do) diff --git a/tests/ci/test_ci_options.py b/tests/ci/test_ci_options.py index 3f158e79f30..f4d14a17512 100644 --- a/tests/ci/test_ci_options.py +++ b/tests/ci/test_ci_options.py @@ -172,14 +172,10 @@ class TestCIOptions(unittest.TestCase): job: CI.JobConfig(runner_type=CI.Runners.STYLE_CHECKER) for job in _TEST_JOB_LIST } - jobs_configs[ - "fuzzers" - ].run_by_label = ( + jobs_configs["fuzzers"].run_by_label = ( "TEST_LABEL" # check "fuzzers" appears in the result due to the label ) - jobs_configs[ - "Integration tests (asan)" - ].release_only = ( + jobs_configs["Integration tests (asan)"].release_only = ( True # still must be included as it's set with include keywords ) filtered_jobs = list( @@ -311,9 +307,9 @@ class TestCIOptions(unittest.TestCase): job: CI.JobConfig(runner_type=CI.Runners.STYLE_CHECKER) for job in _TEST_JOB_LIST } - jobs_configs[ - "fuzzers" - ].run_by_label = "TEST_LABEL" # check "fuzzers" does not appears in the result + jobs_configs["fuzzers"].run_by_label = ( + "TEST_LABEL" # check "fuzzers" does not appears in the result + ) jobs_configs["Integration tests (asan)"].release_only = True filtered_jobs = list( ci_options.apply( diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py index 50263f6ebb6..07a7a9601c0 100755 --- a/tests/ci/version_helper.py +++ b/tests/ci/version_helper.py @@ -72,6 +72,19 @@ class ClickHouseVersion: return self.patch_update() raise KeyError(f"wrong part {part} is used") + def bump(self) -> "ClickHouseVersion": + if self.minor < 12: + self._minor += 1 + self._revision += 1 + self._patch = 1 + self._tweak = 1 + else: + self._major += 1 + self._revision += 1 + self._patch = 1 + self._tweak = 1 + return self + def major_update(self) -> "ClickHouseVersion": if self._git is not None: self._git.update() @@ -148,6 +161,11 @@ class ClickHouseVersion: """our X.3 and X.8 are LTS""" return self.minor % 5 == 3 + def get_stable_release_type(self) -> str: + if self.is_lts: + return VersionType.LTS + return VersionType.STABLE + def as_dict(self) -> VERSIONS: return { "revision": self.revision, @@ -168,6 +186,7 @@ class ClickHouseVersion: raise ValueError(f"version type {version_type} not in {VersionType.VALID}") self._description = version_type self._describe = f"v{self.string}-{version_type}" + return self def copy(self) -> "ClickHouseVersion": copy = ClickHouseVersion( diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 36870d59c3a..90fb9611151 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -34,10 +34,8 @@ import urllib.parse # for crc32 import zlib from argparse import ArgumentParser -from contextlib import closing from datetime import datetime, timedelta from errno import ESRCH -from queue import Full from subprocess import PIPE, Popen from time import sleep, time from typing import Dict, List, Optional, Set, Tuple, Union @@ -69,7 +67,7 @@ TEST_FILE_EXTENSIONS = [".sql", ".sql.j2", ".sh", ".py", ".expect"] VERSION_PATTERN = r"^((\d+\.)?(\d+\.)?(\d+\.)?\d+)$" -TEST_MAX_RUN_TIME_IN_SECONDS = 120 +TEST_MAX_RUN_TIME_IN_SECONDS = 180 class SharedEngineReplacer: @@ -360,37 +358,14 @@ def clickhouse_execute_json( return rows -class Terminated(KeyboardInterrupt): - pass - - -def signal_handler(sig, frame): - raise Terminated(f"Terminated with {sig} signal") - - def stop_tests(): - global stop_tests_triggered_lock - global stop_tests_triggered - global restarted_tests - - with stop_tests_triggered_lock: - print("Stopping tests") - if not stop_tests_triggered.is_set(): - stop_tests_triggered.set() - - # materialize multiprocessing.Manager().list() object before - # sending SIGTERM since this object is a proxy, that requires - # communicating with manager thread, but after SIGTERM will be - # send, this thread will die, and you will get - # ConnectionRefusedError error for any access to "restarted_tests" - # variable. - restarted_tests = [*restarted_tests] - - # send signal to all processes in group to avoid hung check triggering - # (to avoid terminating clickhouse-test itself, the signal should be ignored) - signal.signal(signal.SIGTERM, signal.SIG_IGN) - os.killpg(os.getpgid(os.getpid()), signal.SIGTERM) - signal.signal(signal.SIGTERM, signal.SIG_DFL) + # send signal to all processes in group to avoid hung check triggering + # (to avoid terminating clickhouse-test itself, the signal should be ignored) + print("Sending signals") + signal.signal(signal.SIGTERM, signal.SIG_IGN) + os.killpg(os.getpgid(os.getpid()), signal.SIGTERM) + signal.signal(signal.SIGTERM, signal.SIG_DFL) + print("Sending signals DONE") def get_db_engine(args, database_name): @@ -700,7 +675,9 @@ class FailureReason(enum.Enum): NO_LONG = "not running long tests" REPLICATED_DB = "replicated-database" NON_ATOMIC_DB = "database engine not Atomic" + OBJECT_STORAGE = "object-storage" S3_STORAGE = "s3-storage" + AZURE_BLOB_STORAGE = "azure-blob-storage" BUILD = "not running for current build" NO_PARALLEL_REPLICAS = "smth in not supported with parallel replicas" SHARED_MERGE_TREE = "no-shared-merge-tree" @@ -735,7 +712,7 @@ def get_localzone(): class SettingsRandomizer: settings = { "max_insert_threads": lambda: ( - 0 if random.random() < 0.5 else random.randint(1, 16) + 12 if random.random() < 0.03 else random.randint(1, 3) ), "group_by_two_level_threshold": threshold_generator(0.2, 0.2, 1, 1000000), "group_by_two_level_threshold_bytes": threshold_generator( @@ -752,7 +729,7 @@ class SettingsRandomizer: "prefer_localhost_replica": lambda: random.randint(0, 1), "max_block_size": lambda: random.randint(8000, 100000), "max_joined_block_size_rows": lambda: random.randint(8000, 100000), - "max_threads": lambda: random.randint(1, 64), + "max_threads": lambda: 32 if random.random() < 0.03 else random.randint(1, 3), "optimize_append_index": lambda: random.randint(0, 1), "optimize_if_chain_to_multiif": lambda: random.randint(0, 1), "optimize_if_transform_strings_to_enum": lambda: random.randint(0, 1), @@ -1226,13 +1203,26 @@ class TestCase: elif tags and ("no-s3-storage" in tags) and args.s3_storage: return FailureReason.S3_STORAGE + elif tags and ("no-azure-blob-storage" in tags) and args.azure_blob_storage: + return FailureReason.AZURE_BLOB_STORAGE elif ( tags - and "no-s3-storage-with-slow-build" in tags - and args.s3_storage + and ("no-object-storage" in tags) + and (args.azure_blob_storage or args.s3_storage) + ): + return FailureReason.OBJECT_STORAGE + elif ( + tags + and "no-object-storage-with-slow-build" in tags + and (args.s3_storage or args.azure_blob_storage) and BuildFlags.RELEASE not in args.build_flags ): - return FailureReason.S3_STORAGE + return FailureReason.OBJECT_STORAGE + + elif "no-batch" in tags and ( + args.run_by_hash_num is not None or args.run_by_hash_total is not None + ): + return FailureReason.SKIP elif tags: for build_flag in args.build_flags: @@ -1464,8 +1454,7 @@ class TestCase: description_full = messages[result.status] description_full += self.print_test_time(result.total_time) if result.reason is not None: - description_full += " - " - description_full += result.reason.value + description_full += f"\nReason: {result.reason.value} " description_full += result.description @@ -1592,10 +1581,11 @@ class TestCase: # pylint:disable-next=consider-using-with; TODO: fix proc = Popen(command, shell=True, env=os.environ, start_new_session=True) - while ( - datetime.now() - start_time - ).total_seconds() < args.timeout and proc.poll() is None: - sleep(0.01) + try: + proc.wait(args.timeout) + except subprocess.TimeoutExpired: + # Whether the test timed out will be decided later + pass debug_log = "" if os.path.exists(self.testcase_args.debug_log_file): @@ -1617,6 +1607,44 @@ class TestCase: # Normalize hostname in stdout file. replace_in_file(self.stdout_file, socket.gethostname(), "localhost") + if os.environ.get("CLICKHOUSE_PORT_TCP"): + replace_in_file( + self.stdout_file, + f"PORT {os.environ['CLICKHOUSE_PORT_TCP']}", + "PORT 9000", + ) + replace_in_file( + self.stdout_file, + f"localhost {os.environ['CLICKHOUSE_PORT_TCP']}", + "localhost 9000", + ) + + if os.environ.get("CLICKHOUSE_PORT_TCP_SECURE"): + replace_in_file( + self.stdout_file, + f"PORT {os.environ['CLICKHOUSE_PORT_TCP_SECURE']}", + "PORT 9440", + ) + replace_in_file( + self.stdout_file, + f"localhost {os.environ['CLICKHOUSE_PORT_TCP_SECURE']}", + "localhost 9440", + ) + + if os.environ.get("CLICKHOUSE_PATH"): + replace_in_file( + self.stdout_file, + os.environ["CLICKHOUSE_PATH"], + "/var/lib/clickhouse", + ) + + if os.environ.get("CLICKHOUSE_PORT_HTTPS"): + replace_in_file( + self.stdout_file, + f"https://localhost:{os.environ['CLICKHOUSE_PORT_HTTPS']}/", + "https://localhost:8443/", + ) + stdout = "" if os.path.exists(self.stdout_file): with open(self.stdout_file, "rb") as stdfd: @@ -1722,7 +1750,7 @@ class TestCase: return TestResult( self.name, TestStatus.FAIL, - FailureReason.INTERNAL_QUERY_FAIL, + FailureReason.TIMEOUT, total_time, self.add_info_about_settings( self.get_description_from_exception_info(sys.exc_info()) @@ -2061,15 +2089,25 @@ class TestSuite: stop_time = None exit_code = None server_died = None -stop_tests_triggered_lock = None -stop_tests_triggered = None -queue = None multiprocessing_manager = None restarted_tests = None -def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]): - all_tests, num_tests, test_suite = all_tests_with_params +class ServerDied(Exception): + pass + + +class GlobalTimeout(Exception): + pass + + +def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite, bool]): + ( + all_tests, + num_tests, + test_suite, + is_concurrent, + ) = all_tests_with_params global stop_time global exit_code global server_died @@ -2112,56 +2150,65 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]): failures_chain = 0 start_time = datetime.now() - is_concurrent = multiprocessing.current_process().name != "MainProcess" - client_options = get_additional_client_options(args) if num_tests > 0: about = "about " if is_concurrent else "" proc_name = multiprocessing.current_process().name - print(f"\nRunning {about}{num_tests} {test_suite.suite} tests ({proc_name}).\n") + print(f"Running {about}{num_tests} {test_suite.suite} tests ({proc_name}).") while True: - if is_concurrent: - case = queue.get(timeout=args.timeout * 1.1) - if not case: - break + if all_tests: + case = all_tests.pop(0) else: - if all_tests: - case = all_tests.pop(0) - else: - break + break if server_died.is_set(): stop_tests() - break + raise ServerDied("Server died") if stop_time and time() > stop_time: print("\nStop tests run because global time limit is exceeded.\n") stop_tests() - break + raise GlobalTimeout("Stop tests run because global time limit is exceeded") test_case = TestCase(test_suite, case, args, is_concurrent) try: description = "" - test_cace_name = removesuffix(test_case.name, ".gen", ".sql") + ": " - if not is_concurrent: + test_case_name = removesuffix(test_case.name, ".gen", ".sql") + ": " + + if is_concurrent or args.run_sequential_tests_in_parallel: + description = f"{test_case_name:72}" + else: sys.stdout.flush() - sys.stdout.write(f"{test_cace_name:72}") + sys.stdout.write(f"{test_case_name:72}") # This flush is needed so you can see the test name of the long # running test before it will finish. But don't do it in parallel # mode, so that the lines don't mix. sys.stdout.flush() - else: - description = f"{test_cace_name:72}" while True: - test_result = test_case.run( - args, test_suite, client_options, server_logs_level - ) - test_result = test_case.process_result(test_result, MESSAGES) - if not test_result.need_retry: + # This is the upper level timeout + # It helps with completely frozen processes, like in case of gdb errors + def timeout_handler(signum, frame): + raise TimeoutError("Test execution timed out") + + signal.signal(signal.SIGALRM, timeout_handler) + signal.alarm(int(args.timeout * 1.1)) + test_result = None + try: + test_result = test_case.run( + args, test_suite, client_options, server_logs_level + ) + test_result = test_case.process_result(test_result, MESSAGES) + break + except TimeoutError: + break + finally: + signal.alarm(0) + + if not test_result or not test_result.need_retry: break restarted_tests.append(test_result) @@ -2181,8 +2228,9 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]): failures_total += 1 failures_chain += 1 if test_result.reason == FailureReason.SERVER_DIED: - server_died.set() stop_tests() + server_died.set() + raise ServerDied("Server died") elif test_result.status == TestStatus.SKIPPED: skipped_total += 1 @@ -2193,7 +2241,7 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]): if failures_chain >= args.max_failures_chain: stop_tests() - break + raise ServerDied("Max failures chain") if failures_total > 0: print( @@ -2390,7 +2438,40 @@ def extract_key(key: str) -> str: )[1] -def do_run_tests(jobs, test_suite: TestSuite, parallel): +def override_envs(*args_, **kwargs): + global args + args.client += " --port 19000" + args.http_port = 18123 + args.https_port = 18443 + + updated_env = { + "CLICKHOUSE_CONFIG": "/etc/clickhouse-server3/config.xml", + "CLICKHOUSE_CONFIG_DIR": "/etc/clickhouse-server3", + "CLICKHOUSE_CONFIG_GREP": "/etc/clickhouse-server3/preprocessed/config.xml", + "CLICKHOUSE_USER_FILES": "/var/lib/clickhouse3/user_files", + "CLICKHOUSE_SCHEMA_FILES": "/var/lib/clickhouse3/format_schemas", + "CLICKHOUSE_PATH": "/var/lib/clickhouse3", + "CLICKHOUSE_PORT_TCP": "19000", + "CLICKHOUSE_PORT_TCP_SECURE": "19440", + "CLICKHOUSE_PORT_TCP_WITH_PROXY": "19010", + "CLICKHOUSE_PORT_HTTP": "18123", + "CLICKHOUSE_PORT_HTTPS": "18443", + "CLICKHOUSE_PORT_INTERSERVER": "19009", + "CLICKHOUSE_PORT_KEEPER": "19181", + "CLICKHOUSE_PORT_PROMTHEUS_PORT": "19988", + "CLICKHOUSE_PORT_MYSQL": "19004", + "CLICKHOUSE_PORT_POSTGRESQL": "19005", + } + os.environ.update(updated_env) + + run_tests_array(*args_, **kwargs) + + +def run_tests_process(*args, **kwargs): + return run_tests_array(*args, **kwargs) + + +def do_run_tests(jobs, test_suite: TestSuite): if jobs > 1 and len(test_suite.parallel_tests) > 0: print( "Found", @@ -2399,19 +2480,8 @@ def do_run_tests(jobs, test_suite: TestSuite, parallel): len(test_suite.sequential_tests), "sequential tests", ) - run_n, run_total = parallel.split("/") - run_n = float(run_n) - run_total = float(run_total) tests_n = len(test_suite.parallel_tests) - run_total = min(run_total, tests_n) - jobs = min(jobs, tests_n) - run_total = max(jobs, run_total) - - batch_size = max(1, len(test_suite.parallel_tests) // jobs) - parallel_tests_array = [] - for _ in range(jobs): - parallel_tests_array.append((None, batch_size, test_suite)) # If we don't do random shuffling then there will be always # nearly the same groups of test suites running concurrently. @@ -2424,33 +2494,91 @@ def do_run_tests(jobs, test_suite: TestSuite, parallel): # of failures will be nearly the same for all tests from the group. random.shuffle(test_suite.parallel_tests) - try: - with closing(multiprocessing.Pool(processes=jobs)) as pool: - pool.map_async(run_tests_array, parallel_tests_array) + batch_size = max(1, (len(test_suite.parallel_tests) // jobs) + 1) + parallel_tests_array = [] + for job in range(jobs): + range_ = job * batch_size, job * batch_size + batch_size + batch = test_suite.parallel_tests[range_[0] : range_[1]] + parallel_tests_array.append((batch, batch_size, test_suite, True)) - for suit in test_suite.parallel_tests: - queue.put(suit, timeout=args.timeout * 1.1) + processes = [] - for _ in range(jobs): - queue.put(None, timeout=args.timeout * 1.1) - - queue.close() - except Full: - print( - "Couldn't put test to the queue within timeout. Server probably hung." + for test_batch in parallel_tests_array: + process = multiprocessing.Process( + target=run_tests_process, args=(test_batch,) ) - print_stacktraces() - queue.close() + processes.append(process) + process.start() - pool.join() + if args.run_sequential_tests_in_parallel: + # Run parallel tests and sequential tests at the same time + # Sequential tests will use different ClickHouse instance + # In this process we can safely override values in `args` and `os.environ` + process = multiprocessing.Process( + target=override_envs, + args=( + ( + test_suite.sequential_tests, + len(test_suite.sequential_tests), + test_suite, + False, + ), + ), + ) + processes.append(process) + process.start() - run_tests_array( - (test_suite.sequential_tests, len(test_suite.sequential_tests), test_suite) - ) + while processes: + sys.stdout.flush() + # Periodically check the server for hangs + # and stop all processes in this case + try: + clickhouse_execute( + args, + query="SELECT 1 /*hang up check*/", + max_http_retries=5, + timeout=20, + ) + except Exception: + print("Hang up check failed") + server_died.set() + + if server_died.is_set(): + print("Server died, terminating all processes...") + kill_gdb_if_any() + # Wait for test results + sleep(args.timeout) + for p in processes: + if p.is_alive(): + p.terminate() + break + + for p in processes[:]: + if not p.is_alive(): + processes.remove(p) + + sleep(5) + + if not args.run_sequential_tests_in_parallel: + run_tests_array( + ( + test_suite.sequential_tests, + len(test_suite.sequential_tests), + test_suite, + False, + ) + ) return len(test_suite.sequential_tests) + len(test_suite.parallel_tests) else: num_tests = len(test_suite.all_tests) - run_tests_array((test_suite.all_tests, num_tests, test_suite)) + run_tests_array( + ( + test_suite.all_tests, + num_tests, + test_suite, + False, + ) + ) return num_tests @@ -2755,6 +2883,7 @@ def main(args): f"{get_db_engine(args, db_name)}", settings=get_create_database_settings(args, None), ) + break except HTTPError as e: total_time = (datetime.now() - start_time).total_seconds() if not need_retry(args, e.message, e.message, total_time): @@ -2807,7 +2936,7 @@ def main(args): test_suite.cloud_skip_list = cloud_skip_list test_suite.private_skip_list = private_skip_list - total_tests_run += do_run_tests(args.jobs, test_suite, args.parallel) + total_tests_run += do_run_tests(args.jobs, test_suite) if server_died.is_set(): exit_code.value = 1 @@ -3099,6 +3228,12 @@ def parse_args(): default=False, help="Run tests over s3 storage", ) + parser.add_argument( + "--azure-blob-storage", + action="store_true", + default=False, + help="Run tests over azure blob storage", + ) parser.add_argument( "--no-random-settings", action="store_true", @@ -3261,16 +3396,30 @@ def parse_args(): help="Replace ordinary MergeTree engine with SharedMergeTree", ) + parser.add_argument( + "--run-sequential-tests-in-parallel", + action="store_true", + default=False, + help="If `true`, tests with the tag `no-parallel` will run on a " + "separate ClickHouse instance in parallel with other tests. " + "This is used in CI to make test jobs run faster.", + ) + return parser.parse_args() +class Terminated(KeyboardInterrupt): + pass + + +def signal_handler(sig, frame): + raise Terminated(f"Terminated with {sig} signal") + + if __name__ == "__main__": stop_time = None exit_code = multiprocessing.Value("i", 0) server_died = multiprocessing.Event() - stop_tests_triggered_lock = multiprocessing.Lock() - stop_tests_triggered = multiprocessing.Event() - queue = multiprocessing.Queue(maxsize=1) multiprocessing_manager = multiprocessing.Manager() restarted_tests = multiprocessing_manager.list() diff --git a/tests/config/config.d/grpc_protocol.xml b/tests/config/config.d/grpc_protocol.xml new file mode 100644 index 00000000000..b957618120d --- /dev/null +++ b/tests/config/config.d/grpc_protocol.xml @@ -0,0 +1,3 @@ + + 9100 + diff --git a/tests/config/install.sh b/tests/config/install.sh index 08ee11a7407..1b0edc5fc16 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -27,6 +27,7 @@ ln -sf $SRC_PATH/config.d/secure_ports.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/clusters.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/graphite.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/graphite_alternative.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/grpc_protocol.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/database_atomic.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/max_concurrent_queries.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/merge_tree_settings.xml $DEST_SERVER_PATH/config.d/ @@ -57,7 +58,6 @@ ln -sf $SRC_PATH/config.d/forbidden_headers.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/enable_keeper_map.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/custom_disks_base_path.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/display_name.xml $DEST_SERVER_PATH/config.d/ -ln -sf $SRC_PATH/config.d/reverse_dns_query_function.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/compressed_marks_and_index.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/disable_s3_env_credentials.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/enable_wait_for_shutdown_replicated_tables.xml $DEST_SERVER_PATH/config.d/ diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 544b06cca1b..548b58a17e8 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -73,7 +73,7 @@ CLICKHOUSE_ERROR_LOG_FILE = "/var/log/clickhouse-server/clickhouse-server.err.lo # Minimum version we use in integration tests to check compatibility with old releases # Keep in mind that we only support upgrading between releases that are at most 1 year different. # This means that this minimum need to be, at least, 1 year older than the current release -CLICKHOUSE_CI_MIN_TESTED_VERSION = "22.8" +CLICKHOUSE_CI_MIN_TESTED_VERSION = "23.3" # to create docker-compose env file @@ -1454,9 +1454,9 @@ class ClickHouseCluster: def setup_azurite_cmd(self, instance, env_variables, docker_compose_yml_dir): self.with_azurite = True env_variables["AZURITE_PORT"] = str(self.azurite_port) - env_variables[ - "AZURITE_STORAGE_ACCOUNT_URL" - ] = f"http://azurite1:{env_variables['AZURITE_PORT']}/devstoreaccount1" + env_variables["AZURITE_STORAGE_ACCOUNT_URL"] = ( + f"http://azurite1:{env_variables['AZURITE_PORT']}/devstoreaccount1" + ) env_variables["AZURITE_CONNECTION_STRING"] = ( f"DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;" f"AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;" @@ -1653,9 +1653,9 @@ class ClickHouseCluster: # Code coverage files will be placed in database directory # (affect only WITH_COVERAGE=1 build) - env_variables[ - "LLVM_PROFILE_FILE" - ] = "/var/lib/clickhouse/server_%h_%p_%m.profraw" + env_variables["LLVM_PROFILE_FILE"] = ( + "/var/lib/clickhouse/server_%h_%p_%m.profraw" + ) clickhouse_start_command = CLICKHOUSE_START_COMMAND if clickhouse_log_file: @@ -1668,9 +1668,9 @@ class ClickHouseCluster: cluster=self, base_path=self.base_dir, name=name, - base_config_dir=base_config_dir - if base_config_dir - else self.base_config_dir, + base_config_dir=( + base_config_dir if base_config_dir else self.base_config_dir + ), custom_main_configs=main_configs or [], custom_user_configs=user_configs or [], custom_dictionaries=dictionaries or [], diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json index 33dd85aceaf..3c3d1b6cc96 100644 --- a/tests/integration/parallel_skip.json +++ b/tests/integration/parallel_skip.json @@ -48,6 +48,7 @@ "test_system_metrics/test.py::test_readonly_metrics", "test_system_replicated_fetches/test.py::test_system_replicated_fetches", "test_zookeeper_config_load_balancing/test.py::test_round_robin", + "test_zookeeper_config_load_balancing/test.py::test_az", "test_zookeeper_fallback_session/test.py::test_fallback_session", "test_global_overcommit_tracker/test.py::test_global_overcommit", diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/__init__.py b/tests/integration/test_analyzer_compatibility/__init__.py similarity index 100% rename from tests/integration/test_host_regexp_multiple_ptr_records_concurrent/__init__.py rename to tests/integration/test_analyzer_compatibility/__init__.py diff --git a/tests/integration/test_analyzer_compatibility/configs/remote_servers.xml b/tests/integration/test_analyzer_compatibility/configs/remote_servers.xml new file mode 100644 index 00000000000..0a50dab7fd3 --- /dev/null +++ b/tests/integration/test_analyzer_compatibility/configs/remote_servers.xml @@ -0,0 +1,17 @@ + + + + + true + + current + 9000 + + + backward + 9000 + + + + + diff --git a/tests/integration/test_analyzer_compatibility/test.py b/tests/integration/test_analyzer_compatibility/test.py new file mode 100644 index 00000000000..d4ded420c61 --- /dev/null +++ b/tests/integration/test_analyzer_compatibility/test.py @@ -0,0 +1,100 @@ +import uuid + +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV + +CLICKHOUSE_MAX_VERSION_WITH_ANALYZER_DISABLED_BY_DEFAULT = "24.2" + +cluster = ClickHouseCluster(__file__) +# Here analyzer is enabled by default +current = cluster.add_instance( + "current", + main_configs=["configs/remote_servers.xml"], +) +# Here analyzer is disabled by default +backward = cluster.add_instance( + "backward", + use_old_analyzer=True, + main_configs=["configs/remote_servers.xml"], + image="clickhouse/clickhouse-server", + tag=CLICKHOUSE_MAX_VERSION_WITH_ANALYZER_DISABLED_BY_DEFAULT, + with_installed_binary=True, +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_two_new_versions(start_cluster): + # Two new versions (both know about the analyzer) + # One have it enabled by default, another one - disabled. + + current.query("SYSTEM FLUSH LOGS") + backward.query("SYSTEM FLUSH LOGS") + + query_id = str(uuid.uuid4()) + current.query( + "SELECT * FROM clusterAllReplicas('test_cluster_mixed', system.tables);", + query_id=query_id, + ) + + current.query("SYSTEM FLUSH LOGS") + backward.query("SYSTEM FLUSH LOGS") + + assert ( + current.query( + """ +SELECT hostname() AS h, getSetting('allow_experimental_analyzer') +FROM clusterAllReplicas('test_cluster_mixed', system.one) +ORDER BY h;""" + ) + == TSV([["backward", "true"], ["current", "true"]]) + ) + + # Should be enabled everywhere + analyzer_enabled = current.query( + f""" +SELECT +DISTINCT Settings['allow_experimental_analyzer'] +FROM clusterAllReplicas('test_cluster_mixed', system.query_log) +WHERE initial_query_id = '{query_id}';""" + ) + + assert TSV(analyzer_enabled) == TSV("1") + + query_id = str(uuid.uuid4()) + backward.query( + "SELECT * FROM clusterAllReplicas('test_cluster_mixed', system.tables)", + query_id=query_id, + ) + + current.query("SYSTEM FLUSH LOGS") + backward.query("SYSTEM FLUSH LOGS") + + assert ( + backward.query( + """ +SELECT hostname() AS h, getSetting('allow_experimental_analyzer') +FROM clusterAllReplicas('test_cluster_mixed', system.one) +ORDER BY h;""" + ) + == TSV([["backward", "false"], ["current", "false"]]) + ) + + # Should be disabled everywhere + analyzer_enabled = backward.query( + f""" +SELECT +DISTINCT Settings['allow_experimental_analyzer'] +FROM clusterAllReplicas('test_cluster_mixed', system.query_log) +WHERE initial_query_id = '{query_id}';""" + ) + + assert TSV(analyzer_enabled) == TSV("0") diff --git a/tests/integration/test_backward_compatibility/test_functions.py b/tests/integration/test_backward_compatibility/test_functions.py index 758dda655da..fc03a77030e 100644 --- a/tests/integration/test_backward_compatibility/test_functions.py +++ b/tests/integration/test_backward_compatibility/test_functions.py @@ -130,10 +130,13 @@ def test_string_functions(start_cluster): functions = map(lambda x: x.strip(), functions) excludes = [ + # The argument of this function is not a seed, but an arbitrary expression needed for bypassing common subexpression elimination. "rand", "rand64", "randConstant", + "randCanonical", "generateUUIDv4", + "generateULID", # Syntax error otherwise "position", "substring", @@ -153,6 +156,18 @@ def test_string_functions(start_cluster): "tryBase64Decode", # Removed in 23.9 "meiliMatch", + # These functions require more than one argument. + "parseDateTimeInJodaSyntaxOrZero", + "parseDateTimeInJodaSyntaxOrNull", + "parseDateTimeOrNull", + "parseDateTimeOrZero", + "parseDateTime", + # The argument is effectively a disk name (and we don't have one with name foo) + "filesystemUnreserved", + "filesystemCapacity", + "filesystemAvailable", + # Exclude it for now. Looks like the result depends on the build type. + "farmHash64", ] functions = filter(lambda x: x not in excludes, functions) @@ -205,6 +220,9 @@ def test_string_functions(start_cluster): # Function X takes exactly one parameter: # The function 'X' can only be used as a window function "BAD_ARGUMENTS", + # String foo is obviously not a valid IP address. + "CANNOT_PARSE_IPV4", + "CANNOT_PARSE_IPV6", ] if any(map(lambda x: x in error_message, allowed_errors)): logging.info("Skipping %s", function) diff --git a/tests/integration/test_config_substitutions/configs/000-server_overrides.xml b/tests/integration/test_config_substitutions/configs/000-server_overrides.xml new file mode 100644 index 00000000000..9335f663d68 --- /dev/null +++ b/tests/integration/test_config_substitutions/configs/000-server_overrides.xml @@ -0,0 +1,3 @@ + + 10000 + diff --git a/tests/integration/test_config_substitutions/configs/000-config_with_env_subst.xml b/tests/integration/test_config_substitutions/configs/000-users_with_env_subst.xml similarity index 100% rename from tests/integration/test_config_substitutions/configs/000-config_with_env_subst.xml rename to tests/integration/test_config_substitutions/configs/000-users_with_env_subst.xml diff --git a/tests/integration/test_config_substitutions/configs/010-server_with_env_subst.xml b/tests/integration/test_config_substitutions/configs/010-server_with_env_subst.xml new file mode 100644 index 00000000000..ea91f066a21 --- /dev/null +++ b/tests/integration/test_config_substitutions/configs/010-server_with_env_subst.xml @@ -0,0 +1,3 @@ + + + diff --git a/tests/integration/test_config_substitutions/test.py b/tests/integration/test_config_substitutions/test.py index faceab6fbcd..124dbcaedf7 100644 --- a/tests/integration/test_config_substitutions/test.py +++ b/tests/integration/test_config_substitutions/test.py @@ -39,9 +39,13 @@ node6 = cluster.add_instance( node7 = cluster.add_instance( "node7", user_configs=[ - "configs/000-config_with_env_subst.xml", + "configs/000-users_with_env_subst.xml", "configs/010-env_subst_override.xml", ], + main_configs=[ + "configs/000-server_overrides.xml", + "configs/010-server_with_env_subst.xml", + ], env_variables={ # overridden with 424242 "MAX_QUERY_SIZE": "121212", @@ -126,9 +130,9 @@ def test_config(start_cluster): ) -def test_config_invalid_overrides(start_cluster): +def test_config_from_env_overrides(start_cluster): node7.replace_config( - "/etc/clickhouse-server/users.d/000-config_with_env_subst.xml", + "/etc/clickhouse-server/users.d/000-users_with_env_subst.xml", """ @@ -156,7 +160,7 @@ def test_config_invalid_overrides(start_cluster): ): node7.query("SYSTEM RELOAD CONFIG") node7.replace_config( - "/etc/clickhouse-server/users.d/000-config_with_env_subst.xml", + "/etc/clickhouse-server/users.d/000-users_with_env_subst.xml", """ @@ -181,6 +185,24 @@ def test_config_invalid_overrides(start_cluster): node7.query("SYSTEM RELOAD CONFIG") +def test_config_merge_from_env_overrides(start_cluster): + assert ( + node7.query( + "SELECT value FROM system.server_settings WHERE name='max_thread_pool_size'" + ) + == "10000\n" + ) + node7.replace_config( + "/etc/clickhouse-server/config.d/010-server_with_env_subst.xml", + """ + + 9000 + +""", + ) + node7.query("SYSTEM RELOAD CONFIG") + + def test_include_config(start_cluster): # assert node4.query("select 1") diff --git a/tests/integration/test_disk_types/test.py b/tests/integration/test_disk_types/test.py index 1cc5048eb69..a5e2456ef4f 100644 --- a/tests/integration/test_disk_types/test.py +++ b/tests/integration/test_disk_types/test.py @@ -19,9 +19,9 @@ def cluster(): cluster = ClickHouseCluster(__file__) cluster.add_instance( "node", - main_configs=["configs/storage_arm.xml"] - if is_arm() - else ["configs/storage_amd.xml"], + main_configs=( + ["configs/storage_arm.xml"] if is_arm() else ["configs/storage_amd.xml"] + ), with_minio=True, with_hdfs=not is_arm(), ) diff --git a/tests/integration/test_disks_app_func/test.py b/tests/integration/test_disks_app_func/test.py index 97d5da787cd..56ea5c8846a 100644 --- a/tests/integration/test_disks_app_func/test.py +++ b/tests/integration/test_disks_app_func/test.py @@ -9,7 +9,9 @@ def started_cluster(): try: cluster = ClickHouseCluster(__file__) cluster.add_instance( - "disks_app_test", main_configs=["config.xml"], with_minio=True + "disks_app_test", + main_configs=["config.xml"], + with_minio=True, ) cluster.start() @@ -47,12 +49,18 @@ def test_disks_app_func_ld(started_cluster): source = cluster.instances["disks_app_test"] out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "list-disks"] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--query", "list-disks"] ) - disks = out.split("\n") + disks = list( + sorted( + map( + lambda x: x.split(":")[0], filter(lambda x: len(x) > 1, out.split("\n")) + ) + ) + ) - assert disks[0] == "default" and disks[1] == "test1" and disks[2] == "test2" + assert disks[:4] == ["default", "local", "test1", "test2"] def test_disks_app_func_ls(started_cluster): @@ -61,7 +69,15 @@ def test_disks_app_func_ls(started_cluster): init_data(source) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test1", + "--query", + "list .", + ] ) files = out.split("\n") @@ -75,9 +91,8 @@ def test_disks_app_func_ls(started_cluster): "--save-logs", "--disk", "test1", - "list", - ".", - "--recursive", + "--query", + "list . --recursive", ] ) @@ -102,8 +117,8 @@ def test_disks_app_func_cp(started_cluster): "--save-logs", "--disk", "test1", - "write", - "path1", + "--query", + "'write path1'", ] ), ] @@ -113,18 +128,21 @@ def test_disks_app_func_cp(started_cluster): [ "/usr/bin/clickhouse", "disks", - "copy", - "--disk-from", - "test1", - "--disk-to", - "test2", - ".", - ".", + "--query", + "copy --recursive --disk-from test1 --disk-to test2 . .", ] ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test2", + "--query", + "list .", + ] ) assert "path1" in out @@ -136,8 +154,8 @@ def test_disks_app_func_cp(started_cluster): "--save-logs", "--disk", "test2", - "remove", - "path1", + "--query", + "remove path1", ] ) @@ -148,21 +166,37 @@ def test_disks_app_func_cp(started_cluster): "--save-logs", "--disk", "test1", - "remove", - "path1", + "--query", + "remove path1", ] ) # alesapin: Why we need list one more time? # kssenii: it is an assertion that the file is indeed deleted out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test2", + "--query", + "list .", + ] ) assert "path1" not in out out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test1", + "--query", + "list .", + ] ) assert "path1" not in out @@ -177,14 +211,13 @@ def test_disks_app_func_ln(started_cluster): [ "/usr/bin/clickhouse", "disks", - "link", - "data/default/test_table", - "data/default/z_tester", + "--query", + "link data/default/test_table data/default/z_tester", ] ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "list", "data/default/"] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--query", "list data/default/"] ) files = out.split("\n") @@ -209,15 +242,23 @@ def test_disks_app_func_rm(started_cluster): "--save-logs", "--disk", "test2", - "write", - "path3", + "--query", + "'write path3'", ] ), ] ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test2", + "--query", + "list .", + ] ) assert "path3" in out @@ -229,13 +270,21 @@ def test_disks_app_func_rm(started_cluster): "--save-logs", "--disk", "test2", - "remove", - "path3", + "--query", + "remove path3", ] ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test2", + "--query", + "list .", + ] ) assert "path3" not in out @@ -247,7 +296,15 @@ def test_disks_app_func_mv(started_cluster): init_data(source) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test1", + "--query", + "list .", + ] ) files = out.split("\n") @@ -260,14 +317,21 @@ def test_disks_app_func_mv(started_cluster): "disks", "--disk", "test1", - "move", - "store", - "old_store", + "--query", + "move store old_store", ] ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test1", + "--query", + "list .", + ] ) files = out.split("\n") @@ -290,8 +354,8 @@ def test_disks_app_func_read_write(started_cluster): "--save-logs", "--disk", "test1", - "write", - "5.txt", + "--query", + "'write 5.txt'", ] ), ] @@ -304,8 +368,8 @@ def test_disks_app_func_read_write(started_cluster): "--save-logs", "--disk", "test1", - "read", - "5.txt", + "--query", + "read 5.txt", ] ) @@ -319,7 +383,15 @@ def test_remote_disk_list(started_cluster): init_data_s3(source) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test3", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test3", + "--query", + "list .", + ] ) files = out.split("\n") @@ -333,9 +405,8 @@ def test_remote_disk_list(started_cluster): "--save-logs", "--disk", "test3", - "list", - ".", - "--recursive", + "--query", + "list . --recursive", ] ) diff --git a/tests/integration/test_disks_app_interactive/__init__.py b/tests/integration/test_disks_app_interactive/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_disks_app_interactive/configs/config.xml b/tests/integration/test_disks_app_interactive/configs/config.xml new file mode 100644 index 00000000000..bcbb107f0a2 --- /dev/null +++ b/tests/integration/test_disks_app_interactive/configs/config.xml @@ -0,0 +1,3 @@ + + /var/lib/clickhouse/ + \ No newline at end of file diff --git a/tests/integration/test_disks_app_interactive/test.py b/tests/integration/test_disks_app_interactive/test.py new file mode 100644 index 00000000000..ca4ba5d9065 --- /dev/null +++ b/tests/integration/test_disks_app_interactive/test.py @@ -0,0 +1,331 @@ +from helpers.cluster import ClickHouseCluster + +import pytest + +import pathlib + +import subprocess +import select +import io +from typing import List, Tuple, Dict, Union, Optional + +import os + + +class ClickHouseDisksException(Exception): + pass + + +@pytest.fixture(scope="module") +def started_cluster(): + global cluster + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance( + "disks_app_test", + main_configs=["server_configs/config.xml"], + with_minio=True, + ) + + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +class DisksClient(object): + SEPARATOR = b"\a\a\a\a\n" + local_client: Optional["DisksClient"] = None # static variable + default_disk_root_directory: str = "/var/lib/clickhouse" + + def __init__(self, bin_path: str, config_path: str, working_path: str): + self.bin_path = bin_path + self.working_path = working_path + + self.proc = subprocess.Popen( + [bin_path, "disks", "--test-mode", "--config", config_path], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + self.poller = select.epoll() + self.poller.register(self.proc.stdout) + self.poller.register(self.proc.stderr) + + self.stopped = False + + self._fd_nums = { + self.proc.stdout.fileno(): self.proc.stdout, + self.proc.stderr.fileno(): self.proc.stderr, + } + + def execute_query(self, query: str, timeout: float = 5.0) -> str: + output = io.BytesIO() + + self.proc.stdin.write(query.encode() + b"\n") + self.proc.stdin.flush() + + events = self.poller.poll(timeout) + if not events: + raise TimeoutError(f"Disks client returned no output") + + for fd_num, event in events: + if event & (select.EPOLLIN | select.EPOLLPRI): + file = self._fd_nums[fd_num] + + if file == self.proc.stdout: + while True: + chunk = file.readline() + if chunk.endswith(self.SEPARATOR): + break + + output.write(chunk) + + elif file == self.proc.stderr: + error_line = self.proc.stderr.readline() + print(error_line) + raise ClickHouseDisksException(error_line.strip().decode()) + + else: + raise ValueError(f"Failed to read from pipe. Flag {event}") + + data = output.getvalue().strip().decode() + return data + + def list_disks(self) -> List[Tuple[str, str]]: + output = self.execute_query("list-disks") + return list( + sorted( + map( + lambda x: (x.split(":")[0], ":".join(x.split(":")[1:])), + output.split("\n"), + ) + ) + ) + + def current_disk_with_path(self) -> Tuple[str, str]: + output = self.execute_query("current_disk_with_path") + disk_line = output.split("\n")[0] + path_line = output.split("\n")[1] + assert disk_line.startswith("Disk: ") + assert path_line.startswith("Path: ") + return disk_line[6:], path_line[6:] + + def ls( + self, path: str, recursive: bool = False, show_hidden: bool = False + ) -> Union[List[str], Dict[str, List[str]]]: + recursive_adding = "--recursive " if recursive else "" + show_hidden_adding = "--all " if show_hidden else "" + output = self.execute_query( + f"list {path} {recursive_adding} {show_hidden_adding}" + ) + if recursive: + answer: Dict[str, List[str]] = dict() + blocks = output.split("\n\n") + for block in blocks: + directory = block.split("\n")[0][:-1] + files = block.split("\n")[1:] + answer[directory] = files + return answer + else: + return output.split("\n") + + def switch_disk(self, disk: str, directory: Optional[str] = None): + directory_addition = f"--path {directory} " if directory is not None else "" + self.execute_query(f"switch-disk {disk} {directory_addition}") + + def cd(self, directory: str, disk: Optional[str] = None): + disk_addition = f"--disk {disk} " if disk is not None else "" + self.execute_query(f"cd {directory} {disk_addition}") + + def copy( + self, + path_from, + path_to, + disk_from: Optional[str] = None, + disk_to: Optional[str] = None, + recursive: bool = False, + ): + disk_from_option = f"--disk-from {disk_from} " if disk_from is not None else "" + disk_to_option = f"--disk-to {disk_to} " if disk_to is not None else "" + recursive_tag = "--recursive" if recursive else "" + + self.execute_query( + f"copy {recursive_tag} {path_from} {path_to} {disk_from_option} {disk_to_option}" + ) + + def move(self, path_from: str, path_to: str): + self.execute_query(f"move {path_from} {path_to}") + + def rm(self, path: str, recursive: bool = False): + recursive_tag = "--recursive" if recursive else "" + self.execute_query(f"rm {recursive_tag} {path}") + + def mkdir(self, path: str, recursive: bool = False): + recursive_adding = "--recursive " if recursive else "" + self.execute_query(f"mkdir {path} {recursive_adding}") + + def ln(self, path_from: str, path_to: str): + self.execute_query(f"link {path_from} {path_to}") + + def read(self, path_from: str, path_to: Optional[str] = None): + path_to_adding = f"--path-to {path_to} " if path_to is not None else "" + output = self.execute_query(f"read {path_from} {path_to_adding}") + return output + + def write( + self, path_from: str, path_to: str + ): # Writing from stdin is difficult to test (do not know how to do this in python) + path_from_adding = f"--path-from {path_from}" + self.execute_query(f"write {path_from_adding} {path_to}") + + @staticmethod + def getLocalDisksClient(refresh: bool): + if (DisksClient.local_client is None) or refresh: + binary_file = os.environ.get("CLICKHOUSE_TESTS_SERVER_BIN_PATH") + current_working_directory = str(pathlib.Path().resolve()) + config_file = f"{current_working_directory}/test_disks_app_interactive/configs/config.xml" + if not os.path.exists(DisksClient.default_disk_root_directory): + os.mkdir(DisksClient.default_disk_root_directory) + + DisksClient.local_client = DisksClient( + binary_file, config_file, current_working_directory + ) + return DisksClient.local_client + else: + return DisksClient.local_client + + +def test_disks_app_interactive_list_disks(): + client = DisksClient.getLocalDisksClient(True) + expected_disks_with_path = [ + ("default", "/"), + ("local", client.working_path), + ] + assert expected_disks_with_path == client.list_disks() + assert client.current_disk_with_path() == ("default", "/") + client.switch_disk("local") + assert client.current_disk_with_path() == ( + "local", + client.working_path, + ) + + +def test_disks_app_interactive_list_files_local(): + client = DisksClient.getLocalDisksClient(True) + client.switch_disk("local") + excepted_listed_files = sorted(os.listdir("test_disks_app_interactive/")) + listed_files = sorted(client.ls("test_disks_app_interactive/")) + assert excepted_listed_files == listed_files + + +def test_disks_app_interactive_list_directories_default(): + client = DisksClient.getLocalDisksClient(True) + traversed_dir = client.ls(".", recursive=True) + client.mkdir("dir1") + client.mkdir("dir2") + client.mkdir(".dir3") + client.cd("dir1") + client.mkdir("dir11") + client.mkdir(".dir12") + client.mkdir("dir13") + client.cd("../dir2") + client.mkdir("dir21") + client.mkdir("dir22") + client.mkdir(".dir23") + client.cd("../.dir3") + client.mkdir("dir31") + client.mkdir(".dir32") + client.cd("..") + traversed_dir = client.ls(".", recursive=True) + assert traversed_dir == { + ".": ["dir1", "dir2"], + "./dir1": ["dir11", "dir13"], + "./dir2": ["dir21", "dir22"], + "./dir1/dir11": [], + "./dir1/dir13": [], + "./dir2/dir21": [], + "./dir2/dir22": [], + } + traversed_dir = client.ls(".", recursive=True, show_hidden=True) + assert traversed_dir == { + ".": [".dir3", "dir1", "dir2"], + "./dir1": [".dir12", "dir11", "dir13"], + "./dir2": [".dir23", "dir21", "dir22"], + "./.dir3": [".dir32", "dir31"], + "./dir1/dir11": [], + "./dir1/.dir12": [], + "./dir1/dir13": [], + "./dir2/dir21": [], + "./dir2/dir22": [], + "./dir2/.dir23": [], + "./.dir3/dir31": [], + "./.dir3/.dir32": [], + } + client.rm("dir2", recursive=True) + traversed_dir = client.ls(".", recursive=True, show_hidden=True) + assert traversed_dir == { + ".": [".dir3", "dir1"], + "./dir1": [".dir12", "dir11", "dir13"], + "./.dir3": [".dir32", "dir31"], + "./dir1/dir11": [], + "./dir1/.dir12": [], + "./dir1/dir13": [], + "./.dir3/dir31": [], + "./.dir3/.dir32": [], + } + traversed_dir = client.ls(".", recursive=True, show_hidden=False) + assert traversed_dir == { + ".": ["dir1"], + "./dir1": ["dir11", "dir13"], + "./dir1/dir11": [], + "./dir1/dir13": [], + } + client.rm("dir1", recursive=True) + client.rm(".dir3", recursive=True) + assert client.ls(".", recursive=True, show_hidden=False) == {".": []} + + +def test_disks_app_interactive_cp_and_read(): + initial_text = "File content" + with open("a.txt", "w") as file: + file.write(initial_text) + client = DisksClient.getLocalDisksClient(True) + client.switch_disk("default") + client.copy("a.txt", "/a.txt", disk_from="local", disk_to="default") + read_text = client.read("a.txt") + assert initial_text == read_text + client.mkdir("dir1") + client.copy("a.txt", "/dir1/b.txt", disk_from="local", disk_to="default") + read_text = client.read("a.txt", path_to="dir1/b.txt") + assert "" == read_text + read_text = client.read("/dir1/b.txt") + assert read_text == initial_text + with open(f"{DisksClient.default_disk_root_directory}/dir1/b.txt", "r") as file: + read_text = file.read() + assert read_text == initial_text + os.remove("a.txt") + client.rm("a.txt") + client.rm("/dir1", recursive=True) + + +def test_disks_app_interactive_test_move_and_write(): + initial_text = "File content" + with open("a.txt", "w") as file: + file.write(initial_text) + client = DisksClient.getLocalDisksClient(True) + client.switch_disk("default") + client.copy("a.txt", "/a.txt", disk_from="local", disk_to="default") + files = client.ls(".") + assert files == ["a.txt"] + client.move("a.txt", "b.txt") + files = client.ls(".") + assert files == ["b.txt"] + read_text = client.read("/b.txt") + assert read_text == initial_text + client.write("b.txt", "c.txt") + read_text = client.read("c.txt") + assert read_text == initial_text + os.remove("a.txt") diff --git a/tests/integration/test_distributed_default_database/test.py b/tests/integration/test_distributed_default_database/test.py index ef69533416b..7da9a368997 100644 --- a/tests/integration/test_distributed_default_database/test.py +++ b/tests/integration/test_distributed_default_database/test.py @@ -5,6 +5,7 @@ in this test we write into per-node tables and read from the distributed table. The default database in the distributed table definition is left empty on purpose to test default database deduction. """ + import pytest from helpers.client import QueryRuntimeException diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py index 50d7be4d11e..7ecb2cda257 100644 --- a/tests/integration/test_distributed_inter_server_secret/test.py +++ b/tests/integration/test_distributed_inter_server_secret/test.py @@ -7,7 +7,7 @@ import uuid import time from helpers.client import QueryRuntimeException -from helpers.cluster import ClickHouseCluster, CLICKHOUSE_CI_MIN_TESTED_VERSION +from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) @@ -27,9 +27,6 @@ def make_instance(name, *args, **kwargs): ) -# DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 added in 23.3, ensure that CLICKHOUSE_CI_MIN_TESTED_VERSION fits -assert CLICKHOUSE_CI_MIN_TESTED_VERSION < "23.3" - # _n1/_n2 contains cluster with different -- should fail # only n1 contains new_user n1 = make_instance( @@ -38,14 +35,6 @@ n1 = make_instance( user_configs=["configs/users.d/new_user.xml"], ) n2 = make_instance("n2", main_configs=["configs/remote_servers_n2.xml"]) -backward = make_instance( - "backward", - main_configs=["configs/remote_servers_backward.xml"], - image="clickhouse/clickhouse-server", - # version without DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 - tag=CLICKHOUSE_CI_MIN_TESTED_VERSION, - with_installed_binary=True, -) users = pytest.mark.parametrize( "user,password", @@ -427,28 +416,6 @@ def test_per_user_protocol_settings_secure_cluster(user, password): ) -@users -def test_user_secure_cluster_with_backward(user, password): - id_ = "with-backward-query-dist_secure-" + user - n1.query( - f"SELECT *, '{id_}' FROM dist_secure_backward", user=user, password=password - ) - assert get_query_user_info(n1, id_) == [user, user] - assert get_query_user_info(backward, id_) == [user, user] - - -@users -def test_user_secure_cluster_from_backward(user, password): - id_ = "from-backward-query-dist_secure-" + user - backward.query(f"SELECT *, '{id_}' FROM dist_secure", user=user, password=password) - assert get_query_user_info(n1, id_) == [user, user] - assert get_query_user_info(backward, id_) == [user, user] - - assert n1.contains_in_log( - "Using deprecated interserver protocol because the client is too old. Consider upgrading all nodes in cluster." - ) - - def test_secure_cluster_distributed_over_distributed_different_users(): # This works because we will have initial_user='default' n1.query( diff --git a/tests/integration/test_grpc_protocol/test.py b/tests/integration/test_grpc_protocol/test.py index 851da99acf3..328ba3bc05c 100644 --- a/tests/integration/test_grpc_protocol/test.py +++ b/tests/integration/test_grpc_protocol/test.py @@ -39,6 +39,7 @@ node = cluster.add_instance( "TSAN_OPTIONS": "report_atomic_races=0 " + os.getenv("TSAN_OPTIONS", default="") }, ipv6_address=IPV6_ADDRESS, + stay_alive=True, ) main_channel = None @@ -369,47 +370,33 @@ def test_progress(): "SELECT number, sleep(0.31) FROM numbers(8) SETTINGS max_block_size=2, interactive_delay=100000", stream_output=True, ) - results = list(results) - for result in results: - result.time_zone = "" - result.query_id = "" - # print(results) - # Note: We can't convert those messages to string like `results = str(results)` and then compare it as a string - # because str() can serialize a protobuf message with any order of fields. - expected_results = [ - clickhouse_grpc_pb2.Result( - output_format="TabSeparated", - progress=clickhouse_grpc_pb2.Progress( - read_rows=2, read_bytes=16, total_rows_to_read=8 - ), - ), - clickhouse_grpc_pb2.Result(output=b"0\t0\n1\t0\n"), - clickhouse_grpc_pb2.Result( - progress=clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16) - ), - clickhouse_grpc_pb2.Result(output=b"2\t0\n3\t0\n"), - clickhouse_grpc_pb2.Result( - progress=clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16) - ), - clickhouse_grpc_pb2.Result(output=b"4\t0\n5\t0\n"), - clickhouse_grpc_pb2.Result( - progress=clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16) - ), - clickhouse_grpc_pb2.Result(output=b"6\t0\n7\t0\n"), - clickhouse_grpc_pb2.Result( - stats=clickhouse_grpc_pb2.Stats( - rows=8, - blocks=4, - allocated_bytes=1092, - ) - ), + # Note: We can't compare results using a statement like `assert results == expected_results` + # because `results` can come in slightly different order. + # So we compare `outputs` and `progresses` separately and not `results` as a whole. + + outputs = [i.output for i in results if i.output] + progresses = [i.progress for i in results if i.HasField("progress")] + + # print(outputs) + # print(progresses) + + expected_outputs = [ + b"0\t0\n1\t0\n", + b"2\t0\n3\t0\n", + b"4\t0\n5\t0\n", + b"6\t0\n7\t0\n", ] - # Stats data can be returned, which broke the test - results = [i for i in results if not isinstance(i, clickhouse_grpc_pb2.Stats)] + expected_progresses = [ + clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16, total_rows_to_read=8), + clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16), + clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16), + clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16), + ] - assert results == expected_results + assert outputs == expected_outputs + assert progresses == expected_progresses def test_session_settings(): @@ -763,3 +750,9 @@ def test_opentelemetry_context_propagation(): ) == "SELECT 1\tsome custom state\n" ) + + +def test_restart(): + assert query("SELECT 1") == "1\n" + node.restart_clickhouse() + assert query("SELECT 2") == "2\n" diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/config.xml b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/config.xml deleted file mode 100644 index 42a1f962705..00000000000 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/config.xml +++ /dev/null @@ -1,4 +0,0 @@ - - 1 - 250 - diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/host_regexp.xml b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/host_regexp.xml deleted file mode 100644 index 9329c8dbde2..00000000000 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/host_regexp.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - test1\.example\.com$ - - default - - - diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/listen_host.xml b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/listen_host.xml deleted file mode 100644 index 9c27c612f63..00000000000 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/listen_host.xml +++ /dev/null @@ -1,5 +0,0 @@ - - :: - 0.0.0.0 - 1 - diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/Corefile b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/Corefile deleted file mode 100644 index 3edf37dafa5..00000000000 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/Corefile +++ /dev/null @@ -1,8 +0,0 @@ -. { - hosts /example.com { - reload "20ms" - fallthrough - } - forward . 127.0.0.11 - log -} diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/example.com b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/example.com deleted file mode 100644 index 9beb415c290..00000000000 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/example.com +++ /dev/null @@ -1 +0,0 @@ -filled in runtime, but needs to exist in order to be volume mapped in docker \ No newline at end of file diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py deleted file mode 100644 index 70419f95dd3..00000000000 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py +++ /dev/null @@ -1,62 +0,0 @@ -import pycurl -import threading -from io import BytesIO -import sys - -client_ip = sys.argv[1] -server_ip = sys.argv[2] - -mutex = threading.Lock() -success_counter = 0 -number_of_threads = 100 -number_of_iterations = 50 - - -def perform_request(): - buffer = BytesIO() - crl = pycurl.Curl() - crl.setopt(pycurl.INTERFACE, client_ip) - crl.setopt(crl.WRITEDATA, buffer) - crl.setopt(crl.URL, f"http://{server_ip}:8123/?query=select+1&user=test_dns") - - crl.perform() - - # End curl session - crl.close() - - str_response = buffer.getvalue().decode("iso-8859-1") - expected_response = "1\n" - - mutex.acquire() - - global success_counter - - if str_response == expected_response: - success_counter += 1 - - mutex.release() - - -def perform_multiple_requests(n): - for request_number in range(n): - perform_request() - - -threads = [] - - -for i in range(number_of_threads): - thread = threading.Thread( - target=perform_multiple_requests, args=(number_of_iterations,) - ) - thread.start() - threads.append(thread) - -for thread in threads: - thread.join() - - -if success_counter == number_of_threads * number_of_iterations: - exit(0) - -exit(1) diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/test.py b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/test.py deleted file mode 100644 index d73e8813e79..00000000000 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/test.py +++ /dev/null @@ -1,88 +0,0 @@ -import pytest -import socket -from helpers.cluster import ClickHouseCluster, get_docker_compose_path, run_and_check -from time import sleep -import os - -DOCKER_COMPOSE_PATH = get_docker_compose_path() -SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) - -cluster = ClickHouseCluster(__file__) - -ch_server = cluster.add_instance( - "clickhouse-server", - with_coredns=True, - main_configs=["configs/config.xml", "configs/listen_host.xml"], - user_configs=["configs/host_regexp.xml"], -) - -client = cluster.add_instance( - "clickhouse-client", -) - - -@pytest.fixture(scope="module") -def started_cluster(): - global cluster - try: - cluster.start() - yield cluster - - finally: - cluster.shutdown() - - -def check_ptr_record(ip, hostname): - try: - host, aliaslist, ipaddrlist = socket.gethostbyaddr(ip) - if hostname.lower() == host.lower(): - return True - except socket.herror: - pass - return False - - -def setup_dns_server(ip): - domains_string = "test3.example.com test2.example.com test1.example.com" - example_file_path = f'{ch_server.env_variables["COREDNS_CONFIG_DIR"]}/example.com' - run_and_check(f"echo '{ip} {domains_string}' > {example_file_path}", shell=True) - - # DNS server takes time to reload the configuration. - for try_num in range(10): - if all(check_ptr_record(ip, host) for host in domains_string.split()): - break - sleep(1) - - -def setup_ch_server(dns_server_ip): - ch_server.exec_in_container( - (["bash", "-c", f"echo 'nameserver {dns_server_ip}' > /etc/resolv.conf"]) - ) - ch_server.exec_in_container( - (["bash", "-c", "echo 'options ndots:0' >> /etc/resolv.conf"]) - ) - ch_server.query("SYSTEM DROP DNS CACHE") - - -def build_endpoint_v4(ip): - return f"'http://{ip}:8123/?query=SELECT+1&user=test_dns'" - - -def build_endpoint_v6(ip): - return build_endpoint_v4(f"[{ip}]") - - -def test_host_regexp_multiple_ptr_v4(started_cluster): - server_ip = cluster.get_instance_ip("clickhouse-server") - client_ip = cluster.get_instance_ip("clickhouse-client") - dns_server_ip = cluster.get_instance_ip(cluster.coredns_host) - - setup_dns_server(client_ip) - setup_ch_server(dns_server_ip) - - current_dir = os.path.dirname(__file__) - client.copy_file_to_container( - os.path.join(current_dir, "scripts", "stress_test.py"), "stress_test.py" - ) - - client.exec_in_container(["python3", f"stress_test.py", client_ip, server_ip]) diff --git a/tests/integration/test_interserver_dns_retires/test.py b/tests/integration/test_interserver_dns_retires/test.py index f0c581e6450..7c81f278737 100644 --- a/tests/integration/test_interserver_dns_retires/test.py +++ b/tests/integration/test_interserver_dns_retires/test.py @@ -2,6 +2,7 @@ This test makes sure interserver cluster queries handle invalid DNS records for replicas. """ + from helpers.client import QueryRuntimeException from helpers.cluster import ClickHouseCluster, ClickHouseInstance diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py index 44b2b50673a..83503122729 100644 --- a/tests/integration/test_keeper_four_word_command/test.py +++ b/tests/integration/test_keeper_four_word_command/test.py @@ -293,6 +293,16 @@ def test_cmd_conf(started_cluster): assert result["configuration_change_tries_count"] == "20" assert result["async_replication"] == "true" + + assert result["latest_logs_cache_size_threshold"] == "1073741824" + assert result["commit_logs_cache_size_threshold"] == "524288000" + + assert result["disk_move_retries_wait_ms"] == "1000" + assert result["disk_move_retries_during_init"] == "100" + + assert result["log_slow_total_threshold_ms"] == "5000" + assert result["log_slow_cpu_threshold_ms"] == "100" + assert result["log_slow_connection_operation_threshold_ms"] == "1000" finally: close_keeper_socket(client) diff --git a/tests/integration/test_lost_part/test.py b/tests/integration/test_lost_part/test.py index b8e67551d79..c2469d6c524 100644 --- a/tests/integration/test_lost_part/test.py +++ b/tests/integration/test_lost_part/test.py @@ -266,7 +266,7 @@ def test_lost_last_part(start_cluster): "ALTER TABLE mt3 UPDATE id = 777 WHERE 1", settings={"mutations_sync": "0"} ) - partition_id = node1.query("select partitionId('x')").strip() + partition_id = node1.query("select partitionID('x')").strip() remove_part_from_disk(node1, "mt3", f"{partition_id}_0_0_0") # other way to detect broken parts diff --git a/tests/integration/test_memory_limit_observer/test.py b/tests/integration/test_memory_limit_observer/test.py index fe3acd9a0cf..0eda165b1d2 100644 --- a/tests/integration/test_memory_limit_observer/test.py +++ b/tests/integration/test_memory_limit_observer/test.py @@ -35,7 +35,7 @@ def get_latest_mem_limit(): ).strip() ) return mem_limit - except Exception as e: + except Exception: time.sleep(1) raise Exception("Cannot get memory limit") @@ -51,3 +51,29 @@ def test_observe_memory_limit(started_cluster): if new_max_mem > original_max_mem: return raise Exception("the memory limit does not increase as expected") + + +def test_memory_usage_doesnt_include_page_cache_size(started_cluster): + try: + # populate page cache with 4GB of data; it might be killed by OOM killer but it is fine + node1.exec_in_container( + ["dd", "if=/dev/zero", "of=outputfile", "bs=1M", "count=4K"] + ) + except Exception: + pass + + observer_refresh_period = int( + node1.query( + "select value from system.server_settings where name = 'cgroups_memory_usage_observer_wait_time'" + ).strip() + ) + time.sleep(observer_refresh_period + 1) + + max_mem_usage_from_cgroup = node1.query( + """ + SELECT max(toUInt64(replaceRegexpAll(message, 'Read current memory usage (\\d+) bytes.*', '\\1'))) AS max_mem + FROM system.text_log + WHERE logger_name = 'CgroupsMemoryUsageObserver' AND message LIKE 'Read current memory usage%bytes%' + """ + ).strip() + assert int(max_mem_usage_from_cgroup) < 2 * 2**30 diff --git a/tests/integration/test_named_collections/configs/config.d/named_collections_with_zookeeper.xml b/tests/integration/test_named_collections/configs/config.d/named_collections_with_zookeeper.xml index 2d7946d1587..43d80ee6f69 100644 --- a/tests/integration/test_named_collections/configs/config.d/named_collections_with_zookeeper.xml +++ b/tests/integration/test_named_collections/configs/config.d/named_collections_with_zookeeper.xml @@ -9,4 +9,21 @@ value1 + + + + + true + + node_with_keeper + 9000 + + + node_with_keeper_2 + 9000 + + + true + + diff --git a/tests/integration/test_named_collections/configs/users.d/users.xml b/tests/integration/test_named_collections/configs/users.d/users.xml index 15da914f666..7d4f0543ff1 100644 --- a/tests/integration/test_named_collections/configs/users.d/users.xml +++ b/tests/integration/test_named_collections/configs/users.d/users.xml @@ -1,4 +1,9 @@ + + + 0 + + diff --git a/tests/integration/test_named_collections/test.py b/tests/integration/test_named_collections/test.py index dbc502236c0..32846c79d23 100644 --- a/tests/integration/test_named_collections/test.py +++ b/tests/integration/test_named_collections/test.py @@ -3,6 +3,8 @@ import pytest import os import time from helpers.cluster import ClickHouseCluster +from contextlib import nullcontext as does_not_raise +from helpers.client import QueryRuntimeException SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) NAMED_COLLECTIONS_CONFIG = os.path.join( @@ -761,3 +763,32 @@ def test_keeper_storage(cluster): check_dropped(node1) check_dropped(node2) + + +@pytest.mark.parametrize( + "ignore, expected_raise", + [(True, does_not_raise()), (False, pytest.raises(QueryRuntimeException))], +) +def test_keeper_storage_remove_on_cluster(cluster, ignore, expected_raise): + node = cluster.instances["node_with_keeper"] + + replace_in_users_config( + node, + "ignore_on_cluster_for_replicated_named_collections_queries>.", + f"ignore_on_cluster_for_replicated_named_collections_queries>{int(ignore)}", + ) + node.query("SYSTEM RELOAD CONFIG") + + with expected_raise: + node.query( + "DROP NAMED COLLECTION IF EXISTS test_nc ON CLUSTER `replicated_nc_nodes_cluster`" + ) + node.query( + f"CREATE NAMED COLLECTION test_nc ON CLUSTER `replicated_nc_nodes_cluster` AS key1=1, key2=2 OVERRIDABLE" + ) + node.query( + f"ALTER NAMED COLLECTION test_nc ON CLUSTER `replicated_nc_nodes_cluster` SET key2=3" + ) + node.query( + f"DROP NAMED COLLECTION test_nc ON CLUSTER `replicated_nc_nodes_cluster`" + ) diff --git a/tests/integration/test_odbc_interaction/test.py b/tests/integration/test_odbc_interaction/test.py index 06cbe70f7c6..0d0d7a0afb1 100644 --- a/tests/integration/test_odbc_interaction/test.py +++ b/tests/integration/test_odbc_interaction/test.py @@ -40,6 +40,16 @@ create_table_sql_template = """ PRIMARY KEY (`id`)) ENGINE=InnoDB; """ +create_table_sql_nullable_template = """ + CREATE TABLE `clickhouse`.`{}` ( + `id` integer not null, + `col1` integer, + `col2` decimal(15,10), + `col3` varchar(32), + `col4` datetime + ) + """ + def skip_test_msan(instance): if instance.is_built_with_memory_sanitizer(): @@ -77,6 +87,11 @@ def create_mysql_db(conn, name): cursor.execute("CREATE DATABASE {} DEFAULT CHARACTER SET 'utf8'".format(name)) +def create_mysql_nullable_table(conn, table_name): + with conn.cursor() as cursor: + cursor.execute(create_table_sql_nullable_template.format(table_name)) + + def create_mysql_table(conn, table_name): with conn.cursor() as cursor: cursor.execute(create_table_sql_template.format(table_name)) @@ -192,6 +207,46 @@ def started_cluster(): cluster.shutdown() +def test_mysql_odbc_select_nullable(started_cluster): + skip_test_msan(node1) + mysql_setup = node1.odbc_drivers["MySQL"] + + table_name = "test_insert_nullable_select" + conn = get_mysql_conn() + create_mysql_nullable_table(conn, table_name) + with conn.cursor() as cursor: + cursor.execute( + "INSERT INTO clickhouse.{} VALUES(1, 1, 1.23456, 'data1', '2010-01-01 00:00:00');".format( + table_name + ) + ) + cursor.execute( + "INSERT INTO clickhouse.{} VALUES(2, NULL, NULL, NULL, NULL);".format( + table_name + ) + ) + conn.commit() + + node1.query( + """ + CREATE TABLE {}(id UInt32, col1 Nullable(UInt32), col2 Nullable(Decimal(15, 10)), col3 Nullable(String), col4 Nullable(DateTime)) ENGINE = ODBC('DSN={}', 'clickhouse', '{}'); + """.format( + table_name, mysql_setup["DSN"], table_name + ) + ) + + assert ( + node1.query( + "SELECT id, col1, col2, col3, col4 from {} order by id asc".format( + table_name + ) + ) + == "1\t1\t1.23456\tdata1\t2010-01-01 00:00:00\n2\t\\N\t\\N\t\\N\t\\N\n" + ) + drop_mysql_table(conn, table_name) + conn.close() + + def test_mysql_simple_select_works(started_cluster): skip_test_msan(node1) diff --git a/tests/integration/test_parallel_replicas_custom_key/test.py b/tests/integration/test_parallel_replicas_custom_key/test.py index 07a9e2badff..375fe58d741 100644 --- a/tests/integration/test_parallel_replicas_custom_key/test.py +++ b/tests/integration/test_parallel_replicas_custom_key/test.py @@ -5,7 +5,10 @@ cluster = ClickHouseCluster(__file__) nodes = [ cluster.add_instance( - f"n{i}", main_configs=["configs/remote_servers.xml"], with_zookeeper=True + f"n{i}", + main_configs=["configs/remote_servers.xml"], + with_zookeeper=True, + macros={"replica": f"r{i}"}, ) for i in range(1, 5) ] @@ -20,34 +23,17 @@ def start_cluster(): cluster.shutdown() -def create_tables(cluster): - n1 = nodes[0] - n1.query("DROP TABLE IF EXISTS dist_table") - n1.query(f"DROP TABLE IF EXISTS test_table ON CLUSTER {cluster}") - - n1.query( - f"CREATE TABLE test_table ON CLUSTER {cluster} (key UInt32, value String) Engine=MergeTree ORDER BY (key, sipHash64(value))" - ) - n1.query( - f""" - CREATE TABLE dist_table AS test_table - Engine=Distributed( - {cluster}, - currentDatabase(), - test_table, - rand() - ) - """ +def insert_data(table_name, row_num, all_nodes=False): + query = ( + f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers({row_num})" ) - -def insert_data(cluster, row_num): - create_tables(cluster) - n1 = nodes[0] - n1.query( - f"INSERT INTO dist_table SELECT number % 4, number FROM numbers({row_num})" - ) - n1.query("SYSTEM FLUSH DISTRIBUTED dist_table") + if all_nodes: + for n in nodes: + n.query(query) + else: + n1 = nodes[0] + n1.query(query) @pytest.mark.parametrize("custom_key", ["sipHash64(key)", "key"]) @@ -56,12 +42,36 @@ def insert_data(cluster, row_num): "cluster", ["test_multiple_shards_multiple_replicas", "test_single_shard_multiple_replicas"], ) -def test_parallel_replicas_custom_key(start_cluster, cluster, custom_key, filter_type): +def test_parallel_replicas_custom_key_distributed( + start_cluster, cluster, custom_key, filter_type +): for node in nodes: node.rotate_logs() row_num = 1000 - insert_data(cluster, row_num) + + n1 = nodes[0] + n1.query(f"DROP TABLE IF EXISTS dist_table ON CLUSTER {cluster} SYNC") + n1.query(f"DROP TABLE IF EXISTS test_table_for_dist ON CLUSTER {cluster} SYNC") + n1.query( + f"CREATE TABLE test_table_for_dist ON CLUSTER {cluster} (key UInt32, value String) Engine=MergeTree ORDER BY (key, sipHash64(value))" + ) + + n1.query( + f""" + CREATE TABLE dist_table AS test_table_for_dist + Engine=Distributed( + {cluster}, + currentDatabase(), + test_table_for_dist, + rand() + ) + """ + ) + + insert_data("dist_table", row_num) + + n1.query("SYSTEM FLUSH DISTRIBUTED dist_table") expected_result = "" for i in range(4): @@ -72,10 +82,10 @@ def test_parallel_replicas_custom_key(start_cluster, cluster, custom_key, filter n1.query( "SELECT key, count() FROM dist_table GROUP BY key ORDER BY key", settings={ - "prefer_localhost_replica": 0, "max_parallel_replicas": 4, "parallel_replicas_custom_key": custom_key, "parallel_replicas_custom_key_filter_type": filter_type, + "prefer_localhost_replica": 0, }, ) == expected_result @@ -87,3 +97,87 @@ def test_parallel_replicas_custom_key(start_cluster, cluster, custom_key, filter node.contains_in_log("Processing query on a replica using custom_key") for node in nodes ) + + +@pytest.mark.parametrize("custom_key", ["sipHash64(key)", "key"]) +@pytest.mark.parametrize("filter_type", ["default", "range"]) +@pytest.mark.parametrize( + "cluster", + ["test_single_shard_multiple_replicas"], +) +def test_parallel_replicas_custom_key_mergetree( + start_cluster, cluster, custom_key, filter_type +): + for node in nodes: + node.rotate_logs() + + row_num = 1000 + n1 = nodes[0] + n1.query(f"DROP TABLE IF EXISTS test_table_for_mt ON CLUSTER {cluster} SYNC") + n1.query( + f"CREATE TABLE test_table_for_mt ON CLUSTER {cluster} (key UInt32, value String) Engine=MergeTree ORDER BY (key, sipHash64(value))" + ) + + insert_data("test_table_for_mt", row_num, all_nodes=True) + + expected_result = "" + for i in range(4): + expected_result += f"{i}\t250\n" + + n1 = nodes[0] + assert ( + n1.query( + "SELECT key, count() FROM test_table_for_mt GROUP BY key ORDER BY key", + settings={ + "max_parallel_replicas": 4, + "parallel_replicas_custom_key": custom_key, + "parallel_replicas_custom_key_filter_type": filter_type, + "parallel_replicas_for_non_replicated_merge_tree": 1, + "cluster_for_parallel_replicas": cluster, + }, + ) + == expected_result + ) + + +@pytest.mark.parametrize("custom_key", ["sipHash64(key)", "key"]) +@pytest.mark.parametrize("filter_type", ["default", "range"]) +@pytest.mark.parametrize( + "cluster", + ["test_single_shard_multiple_replicas"], +) +def test_parallel_replicas_custom_key_replicatedmergetree( + start_cluster, cluster, custom_key, filter_type +): + for node in nodes: + node.rotate_logs() + + row_num = 1000 + n1 = nodes[0] + n1.query(f"DROP TABLE IF EXISTS test_table_for_rmt ON CLUSTER {cluster} SYNC") + n1.query( + f"CREATE TABLE test_table_for_rmt ON CLUSTER {cluster} (key UInt32, value String) Engine=ReplicatedMergeTree('/clickhouse/tables', '{{replica}}') ORDER BY (key, sipHash64(value))" + ) + + insert_data("test_table_for_rmt", row_num, all_nodes=False) + + for node in nodes: + node.query("SYSTEM SYNC REPLICA test_table_for_rmt LIGHTWEIGHT") + + expected_result = "" + for i in range(4): + expected_result += f"{i}\t250\n" + + n1 = nodes[0] + assert ( + n1.query( + "SELECT key, count() FROM test_table_for_rmt GROUP BY key ORDER BY key", + settings={ + "max_parallel_replicas": 4, + "parallel_replicas_custom_key": custom_key, + "parallel_replicas_custom_key_filter_type": filter_type, + "cluster_for_parallel_replicas": cluster, + }, + ) + == expected_result + ) diff --git a/tests/integration/test_parallel_replicas_custom_key_failover/test.py b/tests/integration/test_parallel_replicas_custom_key_failover/test.py index 3ba3ce092c3..f24a24f3238 100644 --- a/tests/integration/test_parallel_replicas_custom_key_failover/test.py +++ b/tests/integration/test_parallel_replicas_custom_key_failover/test.py @@ -76,11 +76,11 @@ def test_parallel_replicas_custom_key_failover( f"SELECT key, count() FROM cluster('{cluster_name}', currentDatabase(), test_table) GROUP BY key ORDER BY key", settings={ "log_comment": log_comment, - "prefer_localhost_replica": prefer_localhost_replica, "max_parallel_replicas": 4, "parallel_replicas_custom_key": custom_key, "parallel_replicas_custom_key_filter_type": filter_type, "use_hedged_requests": use_hedged_requests, + "prefer_localhost_replica": prefer_localhost_replica, # avoid considering replica delay on connection choice # otherwise connection can be not distributed evenly among available nodes # and so custom key secondary queries (we check it bellow) @@ -100,20 +100,19 @@ def test_parallel_replicas_custom_key_failover( assert query_id != "" query_id = query_id[:-1] - if prefer_localhost_replica == 0: + assert ( + node1.query( + f"SELECT 'subqueries', count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' AND query_id != initial_query_id SETTINGS skip_unavailable_shards=1" + ) + == "subqueries\t4\n" + ) + + # With enabled hedged requests, we can't guarantee exact query distribution among nodes + # In case of a replica being slow in terms of responsiveness, hedged connection can change initial replicas choice + if use_hedged_requests == 0: assert ( node1.query( - f"SELECT 'subqueries', count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' AND query_id != initial_query_id SETTINGS skip_unavailable_shards=1" + f"SELECT h, count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' GROUP BY hostname() as h ORDER BY h SETTINGS skip_unavailable_shards=1" ) - == "subqueries\t4\n" + == "n1\t3\nn3\t2\n" ) - - # With enabled hedged requests, we can't guarantee exact query distribution among nodes - # In case of a replica being slow in terms of responsiveness, hedged connection can change initial replicas choice - if use_hedged_requests == 0: - assert ( - node1.query( - f"SELECT h, count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' GROUP BY hostname() as h ORDER BY h SETTINGS skip_unavailable_shards=1" - ) - == "n1\t3\nn3\t2\n" - ) diff --git a/tests/integration/test_profile_max_sessions_for_user/test.py b/tests/integration/test_profile_max_sessions_for_user/test.py index 133991fed7a..a2fa77e8dc9 100755 --- a/tests/integration/test_profile_max_sessions_for_user/test.py +++ b/tests/integration/test_profile_max_sessions_for_user/test.py @@ -7,7 +7,7 @@ import pytest import sys import threading -from helpers.cluster import ClickHouseCluster, run_and_check +from helpers.cluster import ClickHouseCluster from helpers.test_tools import assert_logs_contain_with_retry from helpers.uclient import client, prompt @@ -51,7 +51,7 @@ instance = cluster.add_instance( def get_query(name, id): - return f"SElECT '{name}', {id}, number from system.numbers" + return f"SELECT '{name}', {id}, COUNT(*) from system.numbers" def grpc_get_url(): @@ -90,7 +90,7 @@ def threaded_run_test(sessions): if len(sessions) > MAX_SESSIONS_FOR_USER: # High retry amount to avoid flakiness in ASAN (+Analyzer) tests assert_logs_contain_with_retry( - instance, "overflown session count", retry_count=60 + instance, "overflown session count", retry_count=120 ) instance.query(f"KILL QUERY WHERE user='{TEST_USER}' SYNC") diff --git a/tests/integration/test_startup_scripts/__init__.py b/tests/integration/test_startup_scripts/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_startup_scripts/configs/config.d/query_log.xml b/tests/integration/test_startup_scripts/configs/config.d/query_log.xml new file mode 100644 index 00000000000..24d66fc674e --- /dev/null +++ b/tests/integration/test_startup_scripts/configs/config.d/query_log.xml @@ -0,0 +1,8 @@ + + + system + query_log
+ toYYYYMM(event_date) + 1000 +
+
diff --git a/tests/integration/test_startup_scripts/configs/config.d/startup_scripts.xml b/tests/integration/test_startup_scripts/configs/config.d/startup_scripts.xml new file mode 100644 index 00000000000..e8a711a926a --- /dev/null +++ b/tests/integration/test_startup_scripts/configs/config.d/startup_scripts.xml @@ -0,0 +1,17 @@ + + + + CREATE ROLE OR REPLACE testrole + + + GRANT CREATE USER, ALTER USER, DROP USER, SHOW USERS, SHOW CREATE USER ON *.* TO 'testrole' WITH GRANT OPTION; + + + CREATE TABLE TestTable (id UInt64) ENGINE=TinyLog + SELECT 1; + + + SELECT * FROM system.query_log LIMIT 1 + + + diff --git a/tests/integration/test_startup_scripts/configs/users.xml b/tests/integration/test_startup_scripts/configs/users.xml new file mode 100644 index 00000000000..f9917b034b2 --- /dev/null +++ b/tests/integration/test_startup_scripts/configs/users.xml @@ -0,0 +1,41 @@ + + + + + + + + 1 + + + + + + + + + + ::/0 + + + default + + default + + + + + + + + 3600 + + 0 + 0 + 0 + 0 + 0 + + + + diff --git a/tests/integration/test_startup_scripts/test.py b/tests/integration/test_startup_scripts/test.py new file mode 100644 index 00000000000..43a871a6fc5 --- /dev/null +++ b/tests/integration/test_startup_scripts/test.py @@ -0,0 +1,21 @@ +from helpers.cluster import ClickHouseCluster + + +def test_startup_scripts(): + cluster = ClickHouseCluster(__file__) + + node = cluster.add_instance( + "node", + main_configs=[ + "configs/config.d/query_log.xml", + "configs/config.d/startup_scripts.xml", + ], + with_zookeeper=False, + ) + + try: + cluster.start() + assert node.query("SHOW TABLES") == "TestTable\n" + + finally: + cluster.shutdown() diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index 20b004a7605..6fbe7634642 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -809,7 +809,7 @@ def test_read_subcolumn_time(cluster): def test_read_from_not_existing_container(cluster): node = cluster.instances["node"] query = ( - f"select * from azureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', 'cont_not_exists', 'test_table.csv', " + f"select * from azureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', 'cont-not-exists', 'test_table.csv', " f"'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto')" ) expected_err_msg = "container does not exist" diff --git a/tests/integration/test_storage_delta/test.py b/tests/integration/test_storage_delta/test.py index 25f0b58e0f5..d3dd7cfe52a 100644 --- a/tests/integration/test_storage_delta/test.py +++ b/tests/integration/test_storage_delta/test.py @@ -153,7 +153,7 @@ def test_single_log_file(started_cluster): bucket = started_cluster.minio_bucket TABLE_NAME = "test_single_log_file" - inserted_data = "SELECT number, toString(number + 1) FROM numbers(100)" + inserted_data = "SELECT number as a, toString(number + 1) as b FROM numbers(100)" parquet_data_path = create_initial_data_file( started_cluster, instance, inserted_data, TABLE_NAME ) @@ -511,3 +511,201 @@ def test_restart_broken_table_function(started_cluster): upload_directory(minio_client, bucket, f"/{TABLE_NAME}", "") assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100 + + +def test_partition_columns(started_cluster): + instance = started_cluster.instances["node1"] + spark = started_cluster.spark_session + minio_client = started_cluster.minio_client + bucket = started_cluster.minio_bucket + TABLE_NAME = "test_partition_columns" + result_file = f"{TABLE_NAME}" + partition_columns = ["b", "c", "d", "e"] + + delta_table = ( + DeltaTable.create(spark) + .tableName(TABLE_NAME) + .location(f"/{result_file}") + .addColumn("a", "INT") + .addColumn("b", "STRING") + .addColumn("c", "DATE") + .addColumn("d", "INT") + .addColumn("e", "BOOLEAN") + .partitionedBy(partition_columns) + .execute() + ) + num_rows = 9 + + schema = StructType( + [ + StructField("a", IntegerType()), + StructField("b", StringType()), + StructField("c", DateType()), + StructField("d", IntegerType()), + StructField("e", BooleanType()), + ] + ) + + for i in range(1, num_rows + 1): + data = [ + ( + i, + "test" + str(i), + datetime.strptime(f"2000-01-0{i}", "%Y-%m-%d"), + i, + False, + ) + ] + df = spark.createDataFrame(data=data, schema=schema) + df.printSchema() + df.write.mode("append").format("delta").partitionBy(partition_columns).save( + f"/{TABLE_NAME}" + ) + + minio_client = started_cluster.minio_client + bucket = started_cluster.minio_bucket + + files = upload_directory(minio_client, bucket, f"/{TABLE_NAME}", "") + assert len(files) > 0 + print(f"Uploaded files: {files}") + + result = instance.query( + f"describe table deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123')" + ).strip() + + assert ( + result + == "a\tNullable(Int32)\t\t\t\t\t\nb\tNullable(String)\t\t\t\t\t\nc\tNullable(Date32)\t\t\t\t\t\nd\tNullable(Int32)\t\t\t\t\t\ne\tNullable(Bool)" + ) + + result = int( + instance.query( + f"""SELECT count() + FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123') + """ + ) + ) + assert result == num_rows + result = int( + instance.query( + f"""SELECT count() + FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123') + WHERE c == toDateTime('2000/01/05') + """ + ) + ) + assert result == 1 + + instance.query( + f""" + DROP TABLE IF EXISTS {TABLE_NAME}; + CREATE TABLE {TABLE_NAME} (a Nullable(Int32), b Nullable(String), c Nullable(Date32), d Nullable(Int32), e Nullable(Bool)) + ENGINE=DeltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123')""" + ) + assert ( + """1 test1 2000-01-01 1 false +2 test2 2000-01-02 2 false +3 test3 2000-01-03 3 false +4 test4 2000-01-04 4 false +5 test5 2000-01-05 5 false +6 test6 2000-01-06 6 false +7 test7 2000-01-07 7 false +8 test8 2000-01-08 8 false +9 test9 2000-01-09 9 false""" + == instance.query(f"SELECT * FROM {TABLE_NAME} ORDER BY b").strip() + ) + + assert ( + int( + instance.query( + f"SELECT count() FROM {TABLE_NAME} WHERE c == toDateTime('2000/01/05')" + ) + ) + == 1 + ) + + # Subset of columns should work. + instance.query( + f""" + DROP TABLE IF EXISTS {TABLE_NAME}; + CREATE TABLE {TABLE_NAME} (b Nullable(String), c Nullable(Date32), d Nullable(Int32)) + ENGINE=DeltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123')""" + ) + assert ( + """test1 2000-01-01 1 +test2 2000-01-02 2 +test3 2000-01-03 3 +test4 2000-01-04 4 +test5 2000-01-05 5 +test6 2000-01-06 6 +test7 2000-01-07 7 +test8 2000-01-08 8 +test9 2000-01-09 9""" + == instance.query(f"SELECT * FROM {TABLE_NAME} ORDER BY b").strip() + ) + + for i in range(num_rows + 1, 2 * num_rows + 1): + data = [ + ( + i, + "test" + str(i), + datetime.strptime(f"2000-01-{i}", "%Y-%m-%d"), + i, + False, + ) + ] + df = spark.createDataFrame(data=data, schema=schema) + df.printSchema() + df.write.mode("append").format("delta").partitionBy(partition_columns).save( + f"/{TABLE_NAME}" + ) + + files = upload_directory(minio_client, bucket, f"/{TABLE_NAME}", "") + ok = False + for file in files: + if file.endswith("last_checkpoint"): + ok = True + assert ok + + result = int( + instance.query( + f"""SELECT count() + FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123') + """ + ) + ) + assert result == num_rows * 2 + + assert ( + """1 test1 2000-01-01 1 false +2 test2 2000-01-02 2 false +3 test3 2000-01-03 3 false +4 test4 2000-01-04 4 false +5 test5 2000-01-05 5 false +6 test6 2000-01-06 6 false +7 test7 2000-01-07 7 false +8 test8 2000-01-08 8 false +9 test9 2000-01-09 9 false +10 test10 2000-01-10 10 false +11 test11 2000-01-11 11 false +12 test12 2000-01-12 12 false +13 test13 2000-01-13 13 false +14 test14 2000-01-14 14 false +15 test15 2000-01-15 15 false +16 test16 2000-01-16 16 false +17 test17 2000-01-17 17 false +18 test18 2000-01-18 18 false""" + == instance.query( + f""" +SELECT * FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123') ORDER BY c + """ + ).strip() + ) + assert ( + int( + instance.query( + f"SELECT count() FROM {TABLE_NAME} WHERE c == toDateTime('2000/01/15')" + ) + ) + == 1 + ) diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 3240039ee81..f885a3507ac 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -78,13 +78,13 @@ def wait_rabbitmq_to_start(rabbitmq_docker_id, cookie, timeout=180): def kill_rabbitmq(rabbitmq_id): p = subprocess.Popen(("docker", "stop", rabbitmq_id), stdout=subprocess.PIPE) - p.communicate() + p.wait(timeout=60) return p.returncode == 0 def revive_rabbitmq(rabbitmq_id, cookie): p = subprocess.Popen(("docker", "start", rabbitmq_id), stdout=subprocess.PIPE) - p.communicate() + p.wait(timeout=60) wait_rabbitmq_to_start(rabbitmq_id, cookie) diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 9a0cb352088..40cbf4b44a6 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -197,7 +197,9 @@ def test_partition_by_string_column(started_cluster): started_cluster, bucket, "test_foo/bar.csv" ) assert '3,"йцук"\n' == get_s3_file_content(started_cluster, bucket, "test_йцук.csv") - assert '78,"你好"\n' == get_s3_file_content(started_cluster, bucket, "test_你好.csv") + assert '78,"你好"\n' == get_s3_file_content( + started_cluster, bucket, "test_你好.csv" + ) def test_partition_by_const_column(started_cluster): diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py index b93e560d5b9..bf3c28c5429 100644 --- a/tests/integration/test_storage_s3_queue/test.py +++ b/tests/integration/test_storage_s3_queue/test.py @@ -1780,6 +1780,7 @@ def test_commit_on_limit(started_cluster): if "test_999999.csv" in get_processed_files(): break time.sleep(1) + assert "test_999999.csv" in get_processed_files() assert 1 == int( diff --git a/tests/integration/test_tcp_handler_http_responses/test_case.py b/tests/integration/test_tcp_handler_http_responses/test_case.py index 2fc53674ca4..98f4b74223e 100644 --- a/tests/integration/test_tcp_handler_http_responses/test_case.py +++ b/tests/integration/test_tcp_handler_http_responses/test_case.py @@ -1,4 +1,5 @@ """Test HTTP responses given by the TCP Handler.""" + from pathlib import Path import pytest from helpers.cluster import ClickHouseCluster diff --git a/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py b/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py index 62581996f3b..b20ab48795b 100644 --- a/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py +++ b/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py @@ -1,4 +1,5 @@ """Test Interserver responses on configured IP.""" + from pathlib import Path import pytest from helpers.cluster import ClickHouseCluster diff --git a/tests/performance/replaceRegexp_fallback.xml b/tests/performance/replaceRegexp_fallback.xml new file mode 100644 index 00000000000..509257efeb5 --- /dev/null +++ b/tests/performance/replaceRegexp_fallback.xml @@ -0,0 +1,12 @@ + + + + + WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpAll(materialize(s), ' ', '\n') AS w FROM numbers_mt(5000000) FORMAT Null + WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpOne(materialize(s), ' ', '\n') AS w FROM numbers_mt(5000000) FORMAT Null + + + + WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpAll(materialize(s), '\s+', '\\0\n') AS w FROM numbers_mt(500000) FORMAT Null + WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpOne(materialize(s), '\s+', '\\0\n') AS w FROM numbers_mt(500000) FORMAT Null + diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql index 0ca7df8ecd3..07c42d6d039 100644 --- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql +++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest +-- Tags: no-parallel, no-fasttest, no-ubsan, no-batch -- no-parallel because we want to run this test when most of the other tests already passed -- If this test fails, see the "Top patterns of log messages" diagnostics in the end of run.log diff --git a/tests/queries/0_stateless/00027_argMinMax.sql b/tests/queries/0_stateless/00027_argMinMax.sql index dbf7c9176d2..57f815add27 100644 --- a/tests/queries/0_stateless/00027_argMinMax.sql +++ b/tests/queries/0_stateless/00027_argMinMax.sql @@ -13,4 +13,4 @@ FROM SELECT arrayJoin([[10, 4, 3], [7, 5, 6], [8, 8, 2]]) AS num, arrayJoin([[1, 2, 4]]) AS id -) +); diff --git a/tests/queries/0_stateless/00133_long_shard_memory_tracker_and_exception_safety.sh b/tests/queries/0_stateless/00133_long_shard_memory_tracker_and_exception_safety.sh index a42fd58190a..d57efaa1f0e 100755 --- a/tests/queries/0_stateless/00133_long_shard_memory_tracker_and_exception_safety.sh +++ b/tests/queries/0_stateless/00133_long_shard_memory_tracker_and_exception_safety.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, shard, no-parallel +# Tags: long, shard CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/00307_format_xml.sql b/tests/queries/0_stateless/00307_format_xml.sql index 7fdca83b69f..29c733bb186 100644 --- a/tests/queries/0_stateless/00307_format_xml.sql +++ b/tests/queries/0_stateless/00307_format_xml.sql @@ -1,2 +1,2 @@ SET output_format_write_statistics = 0; -SELECT 'Hello & world' AS s, 'Hello\n', toDateTime('2001-02-03 04:05:06') AS time, arrayMap(x -> toString(x), range(10)) AS arr, (s, time) AS tpl SETTINGS extremes = 1 FORMAT XML; +SELECT 'Hello & world' AS s, 'Hello\n', toDateTime('2001-02-03 04:05:06') AS time, arrayMap(x -> toString(x), range(10)) AS arr, (s, time) AS tpl SETTINGS extremes = 1, enable_named_columns_in_function_tuple = 0 FORMAT XML; diff --git a/tests/queries/0_stateless/00309_formats.sql b/tests/queries/0_stateless/00309_formats.sql index 87a1ea454d0..b0939c00a10 100644 --- a/tests/queries/0_stateless/00309_formats.sql +++ b/tests/queries/0_stateless/00309_formats.sql @@ -1,4 +1,6 @@ SET output_format_write_statistics = 0; +SET enable_named_columns_in_function_tuple = 0; + SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, range(n) AS arr, arrayStringConcat(arrayMap(x -> reinterpretAsString(x), arr)) AS s, (n, d) AS tuple FROM system.numbers LIMIT 2 FORMAT RowBinary; SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, range(n) AS arr, arrayStringConcat(arrayMap(x -> reinterpretAsString(x), arr)) AS s, (n, d) AS tuple FROM system.numbers LIMIT 2 FORMAT RowBinaryWithNamesAndTypes; SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, range(n) AS arr, arrayStringConcat(arrayMap(x -> reinterpretAsString(x), arr)) AS s, (n, d) AS tuple FROM system.numbers LIMIT 2 FORMAT TabSeparatedWithNamesAndTypes; diff --git a/tests/queries/0_stateless/00411_long_accurate_number_comparison.python b/tests/queries/0_stateless/00411_long_accurate_number_comparison.python index 183a2637d36..045de9ee7ee 100644 --- a/tests/queries/0_stateless/00411_long_accurate_number_comparison.python +++ b/tests/queries/0_stateless/00411_long_accurate_number_comparison.python @@ -50,7 +50,7 @@ TYPES = { "UInt32": {"bits": 32, "sign": False, "float": False}, "Int32": {"bits": 32, "sign": True, "float": False}, "UInt64": {"bits": 64, "sign": False, "float": False}, - "Int64": {"bits": 64, "sign": True, "float": False} + "Int64": {"bits": 64, "sign": True, "float": False}, # "Float32" : { "bits" : 32, "sign" : True, "float" : True }, # "Float64" : { "bits" : 64, "sign" : True, "float" : True } } diff --git a/tests/queries/0_stateless/00429_long_http_bufferization.sh b/tests/queries/0_stateless/00429_long_http_bufferization.sh index 98dd300e6ab..83a6a4e8043 100755 --- a/tests/queries/0_stateless/00429_long_http_bufferization.sh +++ b/tests/queries/0_stateless/00429_long_http_bufferization.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-parallel +# Tags: long set -e diff --git a/tests/queries/0_stateless/00504_mergetree_arrays_rw.sql b/tests/queries/0_stateless/00504_mergetree_arrays_rw.sql index 7c939d060ea..14929045356 100644 --- a/tests/queries/0_stateless/00504_mergetree_arrays_rw.sql +++ b/tests/queries/0_stateless/00504_mergetree_arrays_rw.sql @@ -1,5 +1,8 @@ set allow_deprecated_syntax_for_merge_tree=1; +set max_threads = 1; +set max_insert_threads = 1; + drop table if exists test_ins_arr; create table test_ins_arr (date Date, val Array(UInt64)) engine = MergeTree(date, (date), 8192); insert into test_ins_arr select toDate('2017-10-02'), [number, 42] from system.numbers limit 10000; diff --git a/tests/queries/0_stateless/00600_replace_running_query.sh b/tests/queries/0_stateless/00600_replace_running_query.sh index 6a682210489..7a71d17f19b 100755 --- a/tests/queries/0_stateless/00600_replace_running_query.sh +++ b/tests/queries/0_stateless/00600_replace_running_query.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-parallel CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none @@ -7,9 +6,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -q "drop user if exists u_00600" -${CLICKHOUSE_CLIENT} -q "create user u_00600 settings max_execution_time=60, readonly=1" -${CLICKHOUSE_CLIENT} -q "grant select on system.numbers to u_00600" +TEST_PREFIX=$RANDOM +${CLICKHOUSE_CLIENT} -q "drop user if exists u_00600${TEST_PREFIX}" +${CLICKHOUSE_CLIENT} -q "create user u_00600${TEST_PREFIX} settings max_execution_time=60, readonly=1" +${CLICKHOUSE_CLIENT} -q "grant select on system.numbers to u_00600${TEST_PREFIX}" function wait_for_query_to_start() { @@ -26,7 +26,7 @@ $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=hello&replace_running_query=1" -d # Wait for it to be replaced wait -${CLICKHOUSE_CLIENT_BINARY} --user=u_00600 --query_id=42 --query='SELECT 2, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' & +${CLICKHOUSE_CLIENT_BINARY} --user=u_00600${TEST_PREFIX} --query_id=42 --query='SELECT 2, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' & wait_for_query_to_start '42' # Trying to run another query with the same query_id @@ -43,4 +43,4 @@ wait_for_query_to_start '42' ${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --replace_running_query_max_wait_ms=500 --query='SELECT 43' 2>&1 | grep -F "can't be stopped" > /dev/null wait ${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --query='SELECT 44' -${CLICKHOUSE_CLIENT} -q "drop user u_00600" +${CLICKHOUSE_CLIENT} -q "drop user u_00600${TEST_PREFIX}" diff --git a/tests/queries/0_stateless/00623_truncate_all_tables.sql b/tests/queries/0_stateless/00623_truncate_all_tables.sql index 2d5e9d48f59..2626f7ed285 100644 --- a/tests/queries/0_stateless/00623_truncate_all_tables.sql +++ b/tests/queries/0_stateless/00623_truncate_all_tables.sql @@ -1,50 +1,43 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS truncate_test; - -CREATE DATABASE IF NOT EXISTS truncate_test; -CREATE TABLE IF NOT EXISTS truncate_test.truncate_test_set(id UInt64) ENGINE = Set; -CREATE TABLE IF NOT EXISTS truncate_test.truncate_test_log(id UInt64) ENGINE = Log; -CREATE TABLE IF NOT EXISTS truncate_test.truncate_test_memory(id UInt64) ENGINE = Memory; -CREATE TABLE IF NOT EXISTS truncate_test.truncate_test_tiny_log(id UInt64) ENGINE = TinyLog; -CREATE TABLE IF NOT EXISTS truncate_test.truncate_test_stripe_log(id UInt64) ENGINE = StripeLog; -CREATE TABLE IF NOT EXISTS truncate_test.truncate_test_merge_tree(p Date, k UInt64) ENGINE = MergeTree ORDER BY p; +CREATE TABLE IF NOT EXISTS truncate_test_set(id UInt64) ENGINE = Set; +CREATE TABLE IF NOT EXISTS truncate_test_log(id UInt64) ENGINE = Log; +CREATE TABLE IF NOT EXISTS truncate_test_memory(id UInt64) ENGINE = Memory; +CREATE TABLE IF NOT EXISTS truncate_test_tiny_log(id UInt64) ENGINE = TinyLog; +CREATE TABLE IF NOT EXISTS truncate_test_stripe_log(id UInt64) ENGINE = StripeLog; +CREATE TABLE IF NOT EXISTS truncate_test_merge_tree(p Date, k UInt64) ENGINE = MergeTree ORDER BY p; SELECT '======Before Truncate======'; -INSERT INTO truncate_test.truncate_test_set VALUES(0); -INSERT INTO truncate_test.truncate_test_log VALUES(1); -INSERT INTO truncate_test.truncate_test_memory VALUES(1); -INSERT INTO truncate_test.truncate_test_tiny_log VALUES(1); -INSERT INTO truncate_test.truncate_test_stripe_log VALUES(1); -INSERT INTO truncate_test.truncate_test_merge_tree VALUES('2000-01-01', 1); -SELECT * FROM system.numbers WHERE number NOT IN truncate_test.truncate_test_set LIMIT 1; -SELECT * FROM truncate_test.truncate_test_log; -SELECT * FROM truncate_test.truncate_test_memory; -SELECT * FROM truncate_test.truncate_test_tiny_log; -SELECT * FROM truncate_test.truncate_test_stripe_log; -SELECT * FROM truncate_test.truncate_test_merge_tree; +INSERT INTO truncate_test_set VALUES(0); +INSERT INTO truncate_test_log VALUES(1); +INSERT INTO truncate_test_memory VALUES(1); +INSERT INTO truncate_test_tiny_log VALUES(1); +INSERT INTO truncate_test_stripe_log VALUES(1); +INSERT INTO truncate_test_merge_tree VALUES('2000-01-01', 1); +SELECT * FROM system.numbers WHERE number NOT IN truncate_test_set LIMIT 1; +SELECT * FROM truncate_test_log; +SELECT * FROM truncate_test_memory; +SELECT * FROM truncate_test_tiny_log; +SELECT * FROM truncate_test_stripe_log; +SELECT * FROM truncate_test_merge_tree; SELECT '======After Truncate And Empty======'; -TRUNCATE ALL TABLES FROM IF EXISTS truncate_test; -SELECT * FROM system.numbers WHERE number NOT IN truncate_test.truncate_test_set LIMIT 1; -SELECT * FROM truncate_test.truncate_test_log; -SELECT * FROM truncate_test.truncate_test_memory; -SELECT * FROM truncate_test.truncate_test_tiny_log; -SELECT * FROM truncate_test.truncate_test_stripe_log; -SELECT * FROM truncate_test.truncate_test_merge_tree; +TRUNCATE ALL TABLES FROM IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; +SELECT * FROM system.numbers WHERE number NOT IN truncate_test_set LIMIT 1; +SELECT * FROM truncate_test_log; +SELECT * FROM truncate_test_memory; +SELECT * FROM truncate_test_tiny_log; +SELECT * FROM truncate_test_stripe_log; +SELECT * FROM truncate_test_merge_tree; SELECT '======After Truncate And Insert Data======'; -INSERT INTO truncate_test.truncate_test_set VALUES(0); -INSERT INTO truncate_test.truncate_test_log VALUES(1); -INSERT INTO truncate_test.truncate_test_memory VALUES(1); -INSERT INTO truncate_test.truncate_test_tiny_log VALUES(1); -INSERT INTO truncate_test.truncate_test_stripe_log VALUES(1); -INSERT INTO truncate_test.truncate_test_merge_tree VALUES('2000-01-01', 1); -SELECT * FROM system.numbers WHERE number NOT IN truncate_test.truncate_test_set LIMIT 1; -SELECT * FROM truncate_test.truncate_test_log; -SELECT * FROM truncate_test.truncate_test_memory; -SELECT * FROM truncate_test.truncate_test_tiny_log; -SELECT * FROM truncate_test.truncate_test_stripe_log; -SELECT * FROM truncate_test.truncate_test_merge_tree; - -DROP DATABASE IF EXISTS truncate_test; +INSERT INTO truncate_test_set VALUES(0); +INSERT INTO truncate_test_log VALUES(1); +INSERT INTO truncate_test_memory VALUES(1); +INSERT INTO truncate_test_tiny_log VALUES(1); +INSERT INTO truncate_test_stripe_log VALUES(1); +INSERT INTO truncate_test_merge_tree VALUES('2000-01-01', 1); +SELECT * FROM system.numbers WHERE number NOT IN truncate_test_set LIMIT 1; +SELECT * FROM truncate_test_log; +SELECT * FROM truncate_test_memory; +SELECT * FROM truncate_test_tiny_log; +SELECT * FROM truncate_test_stripe_log; +SELECT * FROM truncate_test_merge_tree; diff --git a/tests/queries/0_stateless/00623_truncate_table.sql b/tests/queries/0_stateless/00623_truncate_table.sql index 4a67e49acda..e35803db1d9 100644 --- a/tests/queries/0_stateless/00623_truncate_table.sql +++ b/tests/queries/0_stateless/00623_truncate_table.sql @@ -1,6 +1,5 @@ set allow_deprecated_syntax_for_merge_tree=1; -DROP DATABASE IF EXISTS truncate_test; DROP TABLE IF EXISTS truncate_test_log; DROP TABLE IF EXISTS truncate_test_memory; DROP TABLE IF EXISTS truncate_test_tiny_log; @@ -9,7 +8,6 @@ DROP TABLE IF EXISTS truncate_test_merge_tree; DROP TABLE IF EXISTS truncate_test_materialized_view; DROP TABLE IF EXISTS truncate_test_materialized_depend; -CREATE DATABASE truncate_test; CREATE TABLE truncate_test_set(id UInt64) ENGINE = Set; CREATE TABLE truncate_test_log(id UInt64) ENGINE = Log; CREATE TABLE truncate_test_memory(id UInt64) ENGINE = Memory; @@ -75,4 +73,3 @@ DROP TABLE IF EXISTS truncate_test_stripe_log; DROP TABLE IF EXISTS truncate_test_merge_tree; DROP TABLE IF EXISTS truncate_test_materialized_view; DROP TABLE IF EXISTS truncate_test_materialized_depend; -DROP DATABASE IF EXISTS truncate_test; diff --git a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh index ffbf4df4ba7..13146f2eab0 100755 --- a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh +++ b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: zookeeper, no-s3-storage +# Tags: zookeeper, no-object-storage # Because REPLACE PARTITION does not forces immediate removal of replaced data parts from local filesystem # (it tries to do it as quick as possible, but it still performed in separate thread asynchronously) diff --git a/tests/queries/0_stateless/00632_get_sample_block_cache.sql b/tests/queries/0_stateless/00632_get_sample_block_cache.sql index c54ca0b084e..ae9b6bb7b2c 100644 --- a/tests/queries/0_stateless/00632_get_sample_block_cache.sql +++ b/tests/queries/0_stateless/00632_get_sample_block_cache.sql @@ -1,4 +1,4 @@ --- Tags: long, no-s3-storage, no-asan +-- Tags: long, no-object-storage, no-asan SET joined_subquery_requires_alias = 0; diff --git a/tests/queries/0_stateless/00719_parallel_ddl_db.sh b/tests/queries/0_stateless/00719_parallel_ddl_db.sh index b7dea25c182..ac4e4eae4f2 100755 --- a/tests/queries/0_stateless/00719_parallel_ddl_db.sh +++ b/tests/queries/0_stateless/00719_parallel_ddl_db.sh @@ -1,13 +1,12 @@ #!/usr/bin/env bash -# Tags: no-parallel - set -e CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS parallel_ddl" +DB_SUFFIX=${RANDOM}${RANDOM}${RANDOM}${RANDOM} +${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS parallel_ddl_${DB_SUFFIX}" function query() { @@ -16,8 +15,8 @@ function query() while [ $SECONDS -lt "$TIMELIMIT" ] && [ $it -lt 50 ]; do it=$((it+1)) - ${CLICKHOUSE_CLIENT} --query "CREATE DATABASE IF NOT EXISTS parallel_ddl" - ${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS parallel_ddl" + ${CLICKHOUSE_CLIENT} --query "CREATE DATABASE IF NOT EXISTS parallel_ddl_${DB_SUFFIX}" + ${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS parallel_ddl_${DB_SUFFIX}" done } @@ -27,4 +26,4 @@ done wait -${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS parallel_ddl" +${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS parallel_ddl_${DB_SUFFIX}" diff --git a/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh b/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh index af746c43da9..5a4fd901f8d 100755 --- a/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh +++ b/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-s3-storage, no-tsan +# Tags: long, no-object-storage, no-tsan # no-s3 because read FileOpen metric set -e diff --git a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql index 4613576cf4e..009fc0bbb9f 100644 --- a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql +++ b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql @@ -1,4 +1,4 @@ --- Tags: long, no-s3-storage, no-random-merge-tree-settings +-- Tags: long, no-object-storage, no-random-merge-tree-settings SET output_format_pretty_row_numbers = 0; DROP TABLE IF EXISTS check_system_tables; diff --git a/tests/queries/0_stateless/00763_lock_buffer_long.sh b/tests/queries/0_stateless/00763_lock_buffer_long.sh index 046e4efaa85..444a66767aa 100755 --- a/tests/queries/0_stateless/00763_lock_buffer_long.sh +++ b/tests/queries/0_stateless/00763_lock_buffer_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-s3-storage, no-msan, no-asan, no-tsan, no-debug +# Tags: long, no-object-storage, no-msan, no-asan, no-tsan, no-debug # Some kind of stress test, it doesn't make sense to test in a non-release build set -e @@ -21,7 +21,7 @@ function thread1() function thread2() { - seq 1 1000 | sed -r -e 's/.+/SELECT count() FROM buffer_00763_2;/' | ${CLICKHOUSE_CLIENT} --multiquery --server_logs_file='/dev/null' --ignore-error 2>&1 | grep -vP '^0$|^10$|^Received exception|^Code: 60|^Code: 218|^Code: 473' | grep -v '(query: ' + seq 1 500 | sed -r -e 's/.+/SELECT count() FROM buffer_00763_2;/' | ${CLICKHOUSE_CLIENT} --multiquery --server_logs_file='/dev/null' --ignore-error 2>&1 | grep -vP '^0$|^10$|^Received exception|^Code: 60|^Code: 218|^Code: 473' | grep -v '(query: ' } thread1 & diff --git a/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh b/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh index 71acc11b971..0ed9593c689 100755 --- a/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh +++ b/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh @@ -50,7 +50,7 @@ export -f thread2; export -f thread3; export -f thread4; -TIMEOUT=30 +TIMEOUT=20 timeout $TIMEOUT bash -c thread1 2> /dev/null & timeout $TIMEOUT bash -c thread2 2> /dev/null & diff --git a/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh b/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh index 238cdcea547..ae728c8d10d 100755 --- a/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh +++ b/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: deadlock, no-parallel, no-debug +# Tags: deadlock, no-debug # NOTE: database = $CLICKHOUSE_DATABASE is unwanted @@ -49,7 +49,7 @@ function thread_select() export -f thread_drop_create export -f thread_select -TIMEOUT=60 +TIMEOUT=30 thread_drop_create $TIMEOUT & thread_select $TIMEOUT & diff --git a/tests/queries/0_stateless/00910_buffer_prewhere.sql b/tests/queries/0_stateless/00910_buffer_prewhere.sql index deda0db85fb..e6b1cc424ad 100644 --- a/tests/queries/0_stateless/00910_buffer_prewhere.sql +++ b/tests/queries/0_stateless/00910_buffer_prewhere.sql @@ -1,9 +1,4 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS test_buffer; -CREATE DATABASE test_buffer; -CREATE TABLE test_buffer.mt (uid UInt64, ts DateTime, val Float64) ENGINE = MergeTree PARTITION BY toDate(ts) ORDER BY (uid, ts); -CREATE TABLE test_buffer.buf as test_buffer.mt ENGINE = Buffer(test_buffer, mt, 2, 10, 60, 10000, 100000, 1000000, 10000000); -INSERT INTO test_buffer.buf VALUES (1, '2019-03-01 10:00:00', 0.5), (2, '2019-03-02 10:00:00', 0.15), (1, '2019-03-03 10:00:00', 0.25); -SELECT count() from test_buffer.buf prewhere ts > toDateTime('2019-03-01 12:00:00') and ts < toDateTime('2019-03-02 12:00:00'); -DROP DATABASE test_buffer; +CREATE TABLE mt (uid UInt64, ts DateTime, val Float64) ENGINE = MergeTree PARTITION BY toDate(ts) ORDER BY (uid, ts); +CREATE TABLE buf as mt ENGINE = Buffer({CLICKHOUSE_DATABASE:Identifier}, mt, 2, 10, 60, 10000, 100000, 1000000, 10000000); +INSERT INTO buf VALUES (1, '2019-03-01 10:00:00', 0.5), (2, '2019-03-02 10:00:00', 0.15), (1, '2019-03-03 10:00:00', 0.25); +SELECT count() from buf prewhere ts > toDateTime('2019-03-01 12:00:00') and ts < toDateTime('2019-03-02 12:00:00'); diff --git a/tests/queries/0_stateless/00938_template_input_format.sh b/tests/queries/0_stateless/00938_template_input_format.sh index be75edcdb61..016e662ea3b 100755 --- a/tests/queries/0_stateless/00938_template_input_format.sh +++ b/tests/queries/0_stateless/00938_template_input_format.sh @@ -1,12 +1,13 @@ #!/usr/bin/env bash -# Tags: no-parallel - # shellcheck disable=SC2016,SC2028 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +CURDIR=$CURDIR/${CLICKHOUSE_DATABASE} +mkdir -p $CURDIR + $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS template1"; $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS template2"; $CLICKHOUSE_CLIENT --query="CREATE TABLE template1 (s1 String, s2 String, s3 String, s4 String, n UInt64, d Date) ENGINE = Memory"; diff --git a/tests/queries/0_stateless/00967_ubsan_bit_test.sql b/tests/queries/0_stateless/00967_ubsan_bit_test.sql deleted file mode 100644 index 1682e725670..00000000000 --- a/tests/queries/0_stateless/00967_ubsan_bit_test.sql +++ /dev/null @@ -1 +0,0 @@ -SELECT sum(ignore(bitTest(number, 65))) FROM numbers(10); diff --git a/tests/queries/0_stateless/00989_parallel_parts_loading.sql b/tests/queries/0_stateless/00989_parallel_parts_loading.sql index a05515cf756..407e124f137 100644 --- a/tests/queries/0_stateless/00989_parallel_parts_loading.sql +++ b/tests/queries/0_stateless/00989_parallel_parts_loading.sql @@ -1,5 +1,3 @@ --- Tags: no-parallel - DROP TABLE IF EXISTS mt; CREATE TABLE mt (x UInt64) ENGINE = MergeTree ORDER BY x SETTINGS parts_to_delay_insert = 100000, parts_to_throw_insert = 100000; diff --git a/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh b/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh index d45cc3a6871..4887c409844 100755 --- a/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh +++ b/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh @@ -41,7 +41,7 @@ function thread3() function thread4() { - while true; do $CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE alter_table0 FINAL"; done + while true; do $CLICKHOUSE_CLIENT --receive_timeout=3 -q "OPTIMIZE TABLE alter_table0 FINAL" | grep -Fv "Timeout exceeded while receiving data from server"; done } function thread5() diff --git a/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh b/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh index 16ebf2e6e54..c3ad29d33a1 100755 --- a/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh +++ b/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh @@ -18,7 +18,7 @@ ${CLICKHOUSE_CLIENT} --query="CREATE TABLE table_with_empty_part ENGINE = MergeTree() ORDER BY id PARTITION BY id -SETTINGS vertical_merge_algorithm_min_rows_to_activate=0, vertical_merge_algorithm_min_columns_to_activate=0, remove_empty_parts = 0 +SETTINGS vertical_merge_algorithm_min_rows_to_activate=0, vertical_merge_algorithm_min_columns_to_activate=0, remove_empty_parts = 0, min_bytes_for_wide_part=0, min_bytes_for_full_part_storage = 0 " diff --git a/tests/queries/0_stateless/01014_lazy_database_concurrent_recreate_reattach_and_show_tables.sh b/tests/queries/0_stateless/01014_lazy_database_concurrent_recreate_reattach_and_show_tables.sh index 0a6888a5c69..3046fcbcd73 100755 --- a/tests/queries/0_stateless/01014_lazy_database_concurrent_recreate_reattach_and_show_tables.sh +++ b/tests/queries/0_stateless/01014_lazy_database_concurrent_recreate_reattach_and_show_tables.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -89,7 +89,7 @@ ${CLICKHOUSE_CLIENT} -n -q " " -TIMEOUT=30 +TIMEOUT=20 timeout $TIMEOUT bash -c recreate_lazy_func1 2> /dev/null & timeout $TIMEOUT bash -c recreate_lazy_func2 2> /dev/null & diff --git a/tests/queries/0_stateless/01035_concurrent_move_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/01035_concurrent_move_partition_from_table_zookeeper.sh index 06a460f3600..0d57bb25543 100755 --- a/tests/queries/0_stateless/01035_concurrent_move_partition_from_table_zookeeper.sh +++ b/tests/queries/0_stateless/01035_concurrent_move_partition_from_table_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: zookeeper, no-parallel, no-fasttest +# Tags: zookeeper, no-fasttest set -e @@ -61,7 +61,7 @@ export -f thread3; export -f thread4; export -f thread5; -TIMEOUT=30 +TIMEOUT=20 timeout $TIMEOUT bash -c thread1 2> /dev/null & timeout $TIMEOUT bash -c thread2 2> /dev/null & diff --git a/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.sql b/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.sql index 9040d7b3231..d0841124706 100644 --- a/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.sql +++ b/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.sql @@ -1,35 +1,25 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS dict_db_01036; -CREATE DATABASE dict_db_01036; - -CREATE TABLE dict_db_01036.dict_data (key UInt64, val UInt64) Engine=Memory(); -CREATE DICTIONARY dict_db_01036.dict +CREATE TABLE dict_data (key UInt64, val UInt64) Engine=Memory(); +CREATE DICTIONARY dict ( key UInt64 DEFAULT 0, val UInt64 DEFAULT 10 ) PRIMARY KEY key -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_data' PASSWORD '' DB 'dict_db_01036')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_data' PASSWORD '' DB currentDatabase())) LIFETIME(MIN 0 MAX 0) LAYOUT(FLAT()); -SELECT query_count FROM system.dictionaries WHERE database = 'dict_db_01036' AND name = 'dict'; -SELECT dictGetUInt64('dict_db_01036.dict', 'val', toUInt64(0)); -SELECT query_count FROM system.dictionaries WHERE database = 'dict_db_01036' AND name = 'dict'; +SELECT query_count FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; +SELECT dictGetUInt64('dict', 'val', toUInt64(0)); +SELECT query_count FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; SELECT 'SYSTEM RELOAD DICTIONARY'; -SYSTEM RELOAD DICTIONARY dict_db_01036.dict; -SELECT query_count FROM system.dictionaries WHERE database = 'dict_db_01036' AND name = 'dict'; -SELECT dictGetUInt64('dict_db_01036.dict', 'val', toUInt64(0)); -SELECT query_count FROM system.dictionaries WHERE database = 'dict_db_01036' AND name = 'dict'; +SYSTEM RELOAD DICTIONARY dict; +SELECT query_count FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; +SELECT dictGetUInt64('dict', 'val', toUInt64(0)); +SELECT query_count FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; SELECT 'CREATE DATABASE'; DROP DATABASE IF EXISTS empty_db_01036; -CREATE DATABASE empty_db_01036; -SELECT query_count FROM system.dictionaries WHERE database = 'dict_db_01036' AND name = 'dict'; - -DROP DICTIONARY dict_db_01036.dict; -DROP TABLE dict_db_01036.dict_data; -DROP DATABASE dict_db_01036; -DROP DATABASE empty_db_01036; +CREATE DATABASE IF NOT EXISTS empty_db_01036; +SELECT query_count FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; diff --git a/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.ans b/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.ans index 6e31edbdd40..0a3f4123eb8 100644 --- a/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.ans +++ b/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.ans @@ -1,1000 +1,1000 @@ -dictGet test_01037.dict_array (29.5699,2.50068) 101 -dictGet test_01037.dict_array (29.5796,1.55456) 101 -dictGet test_01037.dict_array (29.5796,2.36864) 101 -dictGet test_01037.dict_array (29.5844,1.59626) 101 -dictGet test_01037.dict_array (29.5886,4.03321) 101 -dictGet test_01037.dict_array (29.5914,3.02628) 101 -dictGet test_01037.dict_array (29.5926,-0.0965169) 101 -dictGet test_01037.dict_array (29.5968,2.37773) 101 -dictGet test_01037.dict_array (29.5984,0.755853) 101 -dictGet test_01037.dict_array (29.6066,3.47173) 101 -dictGet test_01037.dict_array (29.6085,-1.26007) 101 -dictGet test_01037.dict_array (29.6131,0.246565) 101 -dictGet test_01037.dict_array (29.6157,-0.266687) 101 -dictGet test_01037.dict_array (29.6164,2.94674) 101 -dictGet test_01037.dict_array (29.6195,-0.591941) 101 -dictGet test_01037.dict_array (29.6231,1.54818) 101 -dictGet test_01037.dict_array (29.6379,0.764114) 101 -dictGet test_01037.dict_array (29.6462,-0.772059) 934570 -dictGet test_01037.dict_array (29.6579,-1.07336) 101 -dictGet test_01037.dict_array (29.6618,-0.271842) 101 -dictGet test_01037.dict_array (29.6629,-0.303602) 101 -dictGet test_01037.dict_array (29.6659,-0.782823) 934570 -dictGet test_01037.dict_array (29.6736,-0.113832) 101 -dictGet test_01037.dict_array (29.6759,3.02905) 101 -dictGet test_01037.dict_array (29.6778,3.71898) 101 -dictGet test_01037.dict_array (29.6796,1.10433) 101 -dictGet test_01037.dict_array (29.6809,2.13677) 101 -dictGet test_01037.dict_array (29.6935,4.11894) 101 -dictGet test_01037.dict_array (29.6991,-1.4458199999999999) 101 -dictGet test_01037.dict_array (29.6997,3.17297) 101 -dictGet test_01037.dict_array (29.7043,3.6145899999999997) 101 -dictGet test_01037.dict_array (29.7065,3.24885) 101 -dictGet test_01037.dict_array (29.7126,0.28108) 101 -dictGet test_01037.dict_array (29.7192,0.174273) 101 -dictGet test_01037.dict_array (29.7217,-0.523481) 934570 -dictGet test_01037.dict_array (29.7271,1.67967) 101 -dictGet test_01037.dict_array (29.7311,4.12444) 101 -dictGet test_01037.dict_array (29.7347,1.88378) 101 -dictGet test_01037.dict_array (29.7358,0.67944) 101 -dictGet test_01037.dict_array (29.7366,-0.2973) 101 -dictGet test_01037.dict_array (29.7446,0.646536) 101 -dictGet test_01037.dict_array (29.7453,-0.567963) 101 -dictGet test_01037.dict_array (29.764,4.04217) 101 -dictGet test_01037.dict_array (29.7655,1.51372) 101 -dictGet test_01037.dict_array (29.7744,1.12435) 101 -dictGet test_01037.dict_array (29.7774,-0.0681196) 101 -dictGet test_01037.dict_array (29.7784,1.54864) 101 -dictGet test_01037.dict_array (29.7785,2.24139) 101 -dictGet test_01037.dict_array (29.7922,0.220808) 101 -dictGet test_01037.dict_array (29.7936,2.37709) 101 -dictGet test_01037.dict_array (29.8008,0.948536) 101 -dictGet test_01037.dict_array (29.8115,0.201227) 101 -dictGet test_01037.dict_array (29.814,0.149601) 101 -dictGet test_01037.dict_array (29.8193,-1.35858) 101 -dictGet test_01037.dict_array (29.8201,0.965518) 101 -dictGet test_01037.dict_array (29.8265,-0.727286) 101 -dictGet test_01037.dict_array (29.8277,-0.531746) 101 -dictGet test_01037.dict_array (29.8289,3.63009) 101 -dictGet test_01037.dict_array (29.8548,0.838047) 101 -dictGet test_01037.dict_array (29.8641,-0.845265) 101 -dictGet test_01037.dict_array (29.8649,0.0562212) 101 -dictGet test_01037.dict_array (29.8701,-1.02045) 101 -dictGet test_01037.dict_array (29.8733,2.76654) 101 -dictGet test_01037.dict_array (29.876,0.555475) 101 -dictGet test_01037.dict_array (29.8794,-0.800108) 101 -dictGet test_01037.dict_array (29.8813,2.7426399999999997) 101 -dictGet test_01037.dict_array (29.897100000000002,2.66193) 101 -dictGet test_01037.dict_array (29.908,4.01339) 101 -dictGet test_01037.dict_array (29.9165,-1.08246) 101 -dictGet test_01037.dict_array (29.9201,-0.420861) 101 -dictGet test_01037.dict_array (29.9217,3.03778) 101 -dictGet test_01037.dict_array (29.9355,0.773833) 101 -dictGet test_01037.dict_array (29.947,3.76517) 101 -dictGet test_01037.dict_array (29.9518,-0.60557) 101 -dictGet test_01037.dict_array (29.9564,-0.600163) 101 -dictGet test_01037.dict_array (29.959600000000002,4.16591) 101 -dictGet test_01037.dict_array (29.9615,-1.33708) 101 -dictGet test_01037.dict_array (29.9699,-0.392375) 101 -dictGet test_01037.dict_array (29.9776,1.04552) 101 -dictGet test_01037.dict_array (29.9784,4.02756) 101 -dictGet test_01037.dict_array (29.9819,4.00597) 101 -dictGet test_01037.dict_array (29.9826,1.2816100000000001) 101 -dictGet test_01037.dict_array (30.0026,2.76257) 101 -dictGet test_01037.dict_array (30.0126,3.68255) 101 -dictGet test_01037.dict_array (30.0131,0.796576) 101 -dictGet test_01037.dict_array (30.018,1.16523) 101 -dictGet test_01037.dict_array (30.0261,-0.210653) 101 -dictGet test_01037.dict_array (30.0472,-1.11007) 101 -dictGet test_01037.dict_array (30.0542,-0.479585) 101 -dictGet test_01037.dict_array (30.0613,1.6278000000000001) 101 -dictGet test_01037.dict_array (30.0617,-0.0551152) 101 -dictGet test_01037.dict_array (30.0637,2.62066) 101 -dictGet test_01037.dict_array (30.0721,1.6424400000000001) 101 -dictGet test_01037.dict_array (30.0769,-0.402636) 101 -dictGet test_01037.dict_array (30.0791,-0.277435) 101 -dictGet test_01037.dict_array (30.0931,0.0327512) 101 -dictGet test_01037.dict_array (30.1059,3.52623) 101 -dictGet test_01037.dict_array (30.1103,0.865466) 101 -dictGet test_01037.dict_array (30.1115,2.95243) 101 -dictGet test_01037.dict_array (30.1144,1.71029) 101 -dictGet test_01037.dict_array (30.1311,-0.864751) 101 -dictGet test_01037.dict_array (30.1336,-0.851386) 101 -dictGet test_01037.dict_array (30.1393,3.89901) 101 -dictGet test_01037.dict_array (30.1456,-0.531898) 101 -dictGet test_01037.dict_array (30.1492,2.07833) 101 -dictGet test_01037.dict_array (30.1575,2.43856) 101 -dictGet test_01037.dict_array (30.1682,1.19771) 101 -dictGet test_01037.dict_array (30.1716,3.9853300000000003) 101 -dictGet test_01037.dict_array (30.1849,2.78374) 101 -dictGet test_01037.dict_array (30.1866,0.65658) 101 -dictGet test_01037.dict_array (30.1885,1.56943) 101 -dictGet test_01037.dict_array (30.1959,-1.38202) 101 -dictGet test_01037.dict_array (30.1999,1.58413) 101 -dictGet test_01037.dict_array (30.2024,0.713081) 101 -dictGet test_01037.dict_array (30.2054,0.620143) 101 -dictGet test_01037.dict_array (30.2091,1.51641) 101 -dictGet test_01037.dict_array (30.2124,-0.331782) 101 -dictGet test_01037.dict_array (30.226,3.03527) 101 -dictGet test_01037.dict_array (30.2261,3.18486) 101 -dictGet test_01037.dict_array (30.2288,2.48407) 101 -dictGet test_01037.dict_array (30.2345,3.7462400000000002) 101 -dictGet test_01037.dict_array (30.2375,0.62046) 101 -dictGet test_01037.dict_array (30.2425,-0.472914) 101 -dictGet test_01037.dict_array (30.247,3.95863) 101 -dictGet test_01037.dict_array (30.2494,-0.305093) 101 -dictGet test_01037.dict_array (30.2499,2.54337) 101 -dictGet test_01037.dict_array (30.2606,2.16644) 101 -dictGet test_01037.dict_array (30.2672,3.94847) 101 -dictGet test_01037.dict_array (30.2709,-0.136264) 101 -dictGet test_01037.dict_array (30.2764,1.18654) 101 -dictGet test_01037.dict_array (30.2765,1.20383) 101 -dictGet test_01037.dict_array (30.2839,1.05762) 101 -dictGet test_01037.dict_array (30.286,0.469327) 101 -dictGet test_01037.dict_array (30.2927,3.1693) 101 -dictGet test_01037.dict_array (30.2935,3.49854) 101 -dictGet test_01037.dict_array (30.307,0.312338) 101 -dictGet test_01037.dict_array (30.3085,1.07791) 101 -dictGet test_01037.dict_array (30.3139,2.77248) 101 -dictGet test_01037.dict_array (30.314,0.822823) 101 -dictGet test_01037.dict_array (30.3227,-0.587351) 101 -dictGet test_01037.dict_array (30.332,1.00174) 101 -dictGet test_01037.dict_array (30.3388,0.844148) 101 -dictGet test_01037.dict_array (30.3485,0.561902) 101 -dictGet test_01037.dict_array (30.3497,0.180362) 101 -dictGet test_01037.dict_array (30.361,4.13016) 101 -dictGet test_01037.dict_array (30.3623,-0.0484027) 101 -dictGet test_01037.dict_array (30.3638,3.9845800000000002) 101 -dictGet test_01037.dict_array (30.3853,3.16051) 101 -dictGet test_01037.dict_array (30.3974,2.6617800000000003) 101 -dictGet test_01037.dict_array (30.4002,-1.15886) 101 -dictGet test_01037.dict_array (30.4008,-0.387015) 101 -dictGet test_01037.dict_array (30.4018,1.86493) 101 -dictGet test_01037.dict_array (30.4239,1.16818) 101 -dictGet test_01037.dict_array (30.4363,3.63938) 101 -dictGet test_01037.dict_array (30.4377,-0.81315) 101 -dictGet test_01037.dict_array (30.4391,3.54703) 101 -dictGet test_01037.dict_array (30.4424,-1.39435) 101 -dictGet test_01037.dict_array (30.4441,2.8463000000000003) 101 -dictGet test_01037.dict_array (30.4517,3.28117) 101 -dictGet test_01037.dict_array (30.4658,2.6928) 101 -dictGet test_01037.dict_array (30.4734,2.66161) 101 -dictGet test_01037.dict_array (30.4799,-1.07578) 101 -dictGet test_01037.dict_array (30.4837,-1.02486) 101 -dictGet test_01037.dict_array (30.485,1.06326) 101 -dictGet test_01037.dict_array (30.495,1.12306) 101 -dictGet test_01037.dict_array (30.501,2.27264) 101 -dictGet test_01037.dict_array (30.5027,1.99382) 101 -dictGet test_01037.dict_array (30.5194,-1.03943) 101 -dictGet test_01037.dict_array (30.5239,1.04328) 101 -dictGet test_01037.dict_array (30.528,3.82041) 101 -dictGet test_01037.dict_array (30.5299,-0.715248) 101 -dictGet test_01037.dict_array (30.5331,1.19603) 101 -dictGet test_01037.dict_array (30.535800000000002,2.71485) 101 -dictGet test_01037.dict_array (30.5405,0.804694) 101 -dictGet test_01037.dict_array (30.542,1.23739) 101 -dictGet test_01037.dict_array (30.5432,4.04189) 101 -dictGet test_01037.dict_array (30.5457,-0.956121) 101 -dictGet test_01037.dict_array (30.5506,3.07443) 101 -dictGet test_01037.dict_array (30.5539,3.87084) 101 -dictGet test_01037.dict_array (30.5578,3.78837) 101 -dictGet test_01037.dict_array (30.5588,0.966135) 101 -dictGet test_01037.dict_array (30.5637,2.5605) 101 -dictGet test_01037.dict_array (30.5647,-1.27328) 101 -dictGet test_01037.dict_array (30.5656,-0.0581332) 101 -dictGet test_01037.dict_array (30.5715,0.65755) 101 -dictGet test_01037.dict_array (30.5727,3.01604) 101 -dictGet test_01037.dict_array (30.5729,-0.976857) 101 -dictGet test_01037.dict_array (30.5751,0.60204) 101 -dictGet test_01037.dict_array (30.5854,3.02473) 101 -dictGet test_01037.dict_array (30.5866,0.174099) 101 -dictGet test_01037.dict_array (30.5947,0.875193) 101 -dictGet test_01037.dict_array (30.5992,-0.403901) 101 -dictGet test_01037.dict_array (30.6002,4.18891) 101 -dictGet test_01037.dict_array (30.6025,0.217712) 101 -dictGet test_01037.dict_array (30.6054,0.927203) 101 -dictGet test_01037.dict_array (30.6075,3.79359) 101 -dictGet test_01037.dict_array (30.6159,3.82773) 101 -dictGet test_01037.dict_array (30.627,3.84039) 101 -dictGet test_01037.dict_array (30.6308,0.77517) 101 -dictGet test_01037.dict_array (30.6338,0.179565) 101 -dictGet test_01037.dict_array (30.6461,1.3293599999999999) 101 -dictGet test_01037.dict_array (30.6674,-0.424547) 101 -dictGet test_01037.dict_array (30.669,1.76539) 101 -dictGet test_01037.dict_array (30.6788,4.01239) 101 -dictGet test_01037.dict_array (30.6864,3.59158) 101 -dictGet test_01037.dict_array (30.7049,-0.875413) 101 -dictGet test_01037.dict_array (30.705,1.3307) 101 -dictGet test_01037.dict_array (30.7063,-0.473192) 101 -dictGet test_01037.dict_array (30.7075,-1.1958199999999999) 101 -dictGet test_01037.dict_array (30.7101,-0.367562) 101 -dictGet test_01037.dict_array (30.7203,2.98725) 101 -dictGet test_01037.dict_array (30.7213,2.2745699999999998) 101 -dictGet test_01037.dict_array (30.7446,-0.334144) 101 -dictGet test_01037.dict_array (30.7468,3.82967) 101 -dictGet test_01037.dict_array (30.747,-0.384779) 101 -dictGet test_01037.dict_array (30.7681,0.904198) 101 -dictGet test_01037.dict_array (30.7757,1.78743) 101 -dictGet test_01037.dict_array (30.8021,-0.479212) 101 -dictGet test_01037.dict_array (30.8079,-1.40869) 101 -dictGet test_01037.dict_array (30.8206,-0.0608489) 101 -dictGet test_01037.dict_array (30.8218,0.43909) 101 -dictGet test_01037.dict_array (30.8239,0.10014) 101 -dictGet test_01037.dict_array (30.8282,4.15409) 101 -dictGet test_01037.dict_array (30.8288,-0.709528) 101 -dictGet test_01037.dict_array (30.8326,0.156011) 101 -dictGet test_01037.dict_array (30.8328,-1.03704) 101 -dictGet test_01037.dict_array (30.839,2.15528) 101 -dictGet test_01037.dict_array (30.8452,0.219377) 101 -dictGet test_01037.dict_array (30.8463,0.0515355) 101 -dictGet test_01037.dict_array (30.8526,2.06614) 101 -dictGet test_01037.dict_array (30.8566,0.517876) 101 -dictGet test_01037.dict_array (30.8588,-1.31738) 101 -dictGet test_01037.dict_array (30.8681,0.44207) 101 -dictGet test_01037.dict_array (30.8914,1.0072) 101 -dictGet test_01037.dict_array (30.897,0.483425) 101 -dictGet test_01037.dict_array (30.905,2.8731999999999998) 101 -dictGet test_01037.dict_array (30.9051,2.21956) 101 -dictGet test_01037.dict_array (30.9115,4.00663) 101 -dictGet test_01037.dict_array (30.9167,-0.834462) 101 -dictGet test_01037.dict_array (30.9252,-1.3289900000000001) 101 -dictGet test_01037.dict_array (30.9314,1.85384) 101 -dictGet test_01037.dict_array (30.9392,2.53236) 101 -dictGet test_01037.dict_array (30.9569,2.82038) 101 -dictGet test_01037.dict_array (30.9598,-0.641011) 101 -dictGet test_01037.dict_array (30.9601,-0.254928) 101 -dictGet test_01037.dict_array (30.9623,-1.3886) 101 -dictGet test_01037.dict_array (30.9707,0.888854) 101 -dictGet test_01037.dict_array (30.9766,2.81957) 101 -dictGet test_01037.dict_array (30.9775,2.69273) 101 -dictGet test_01037.dict_array (30.9821,0.587715) 101 -dictGet test_01037.dict_array (30.9887,4.0233) 101 -dictGet test_01037.dict_array (30.9914,0.259542) 101 -dictGet test_01037.dict_array (30.9986,-1.36832) 101 -dictGet test_01037.dict_array (31.008,0.628999) 101 -dictGet test_01037.dict_array (31.0168,-1.17462) 101 -dictGet test_01037.dict_array (31.0237,3.52547) 101 -dictGet test_01037.dict_array (31.0306,3.78522) 101 -dictGet test_01037.dict_array (31.0308,-0.72453) 101 -dictGet test_01037.dict_array (31.0463,2.41997) 101 -dictGet test_01037.dict_array (31.047,0.624184) 101 -dictGet test_01037.dict_array (31.0569,0.0706393) 5994232 -dictGet test_01037.dict_array (31.0583,1.3244099999999999) 101 -dictGet test_01037.dict_array (31.063,3.23861) 101 -dictGet test_01037.dict_array (31.068,0.695575) 101 -dictGet test_01037.dict_array (31.0687,1.85675) 101 -dictGet test_01037.dict_array (31.0692,0.254793) 101 -dictGet test_01037.dict_array (31.0766,0.828128) 101 -dictGet test_01037.dict_array (31.0833,0.0612782) 5994232 -dictGet test_01037.dict_array (31.0833,2.59748) 101 -dictGet test_01037.dict_array (31.0861,-1.3778299999999999) 101 -dictGet test_01037.dict_array (31.0874,3.07258) 101 -dictGet test_01037.dict_array (31.0882,1.4882) 101 -dictGet test_01037.dict_array (31.0924,3.42242) 101 -dictGet test_01037.dict_array (31.0927,2.67448) 101 -dictGet test_01037.dict_array (31.0936,1.12292) 101 -dictGet test_01037.dict_array (31.0952,-0.336928) 101 -dictGet test_01037.dict_array (31.0978,3.48482) 101 -dictGet test_01037.dict_array (31.1107,3.7513199999999998) 101 -dictGet test_01037.dict_array (31.1156,1.19171) 101 -dictGet test_01037.dict_array (31.1176,0.223509) 5994232 -dictGet test_01037.dict_array (31.1249,0.946838) 101 -dictGet test_01037.dict_array (31.1267,1.48983) 101 -dictGet test_01037.dict_array (31.138,-0.289981) 101 -dictGet test_01037.dict_array (31.1382,3.02904) 101 -dictGet test_01037.dict_array (31.1475,2.6178) 101 -dictGet test_01037.dict_array (31.1491,1.37873) 101 -dictGet test_01037.dict_array (31.1525,3.72105) 101 -dictGet test_01037.dict_array (31.1526,-1.4129800000000001) 101 -dictGet test_01037.dict_array (31.1526,-0.186457) 101 -dictGet test_01037.dict_array (31.1539,2.78789) 101 -dictGet test_01037.dict_array (31.1548,-1.08552) 101 -dictGet test_01037.dict_array (31.1567,-0.0768925) 101 -dictGet test_01037.dict_array (31.1613,1.49617) 101 -dictGet test_01037.dict_array (31.1653,1.03777) 101 -dictGet test_01037.dict_array (31.1662,3.4214700000000002) 101 -dictGet test_01037.dict_array (31.1672,-0.0813169) 101 -dictGet test_01037.dict_array (31.177,0.440843) 101 -dictGet test_01037.dict_array (31.1788,-0.737151) 101 -dictGet test_01037.dict_array (31.1856,-0.144396) 101 -dictGet test_01037.dict_array (31.1959,3.66813) 101 -dictGet test_01037.dict_array (31.1996,-0.353983) 101 -dictGet test_01037.dict_array (31.2019,2.86802) 101 -dictGet test_01037.dict_array (31.2087,2.31245) 101 -dictGet test_01037.dict_array (31.2125,3.2713200000000002) 101 -dictGet test_01037.dict_array (31.2137,-0.108129) 101 -dictGet test_01037.dict_array (31.216,3.9156) 101 -dictGet test_01037.dict_array (31.2201,-0.202141) 101 -dictGet test_01037.dict_array (31.2285,2.09058) 101 -dictGet test_01037.dict_array (31.2502,4.01526) 101 -dictGet test_01037.dict_array (31.2585,3.11524) 101 -dictGet test_01037.dict_array (31.2645,-0.620418) 101 -dictGet test_01037.dict_array (31.2684,2.74277) 101 -dictGet test_01037.dict_array (31.2821,-1.12772) 101 -dictGet test_01037.dict_array (31.2821,2.46769) 101 -dictGet test_01037.dict_array (31.2887,3.91396) 101 -dictGet test_01037.dict_array (31.295,1.49942) 101 -dictGet test_01037.dict_array (31.2997,3.46122) 101 -dictGet test_01037.dict_array (31.3017,3.3263) 101 -dictGet test_01037.dict_array (31.3022,3.16754) 101 -dictGet test_01037.dict_array (31.3048,0.364962) 101 -dictGet test_01037.dict_array (31.305,3.1967) 101 -dictGet test_01037.dict_array (31.3061,1.84303) 101 -dictGet test_01037.dict_array (31.3082,-0.173851) 101 -dictGet test_01037.dict_array (31.3315,3.90932) 101 -dictGet test_01037.dict_array (31.3351,2.80164) 101 -dictGet test_01037.dict_array (31.3388,0.168765) 5994233 -dictGet test_01037.dict_array (31.339,0.25535) 101 -dictGet test_01037.dict_array (31.3423,1.7036799999999999) 101 -dictGet test_01037.dict_array (31.349,0.386456) 101 -dictGet test_01037.dict_array (31.3558,-1.04336) 101 -dictGet test_01037.dict_array (31.3564,0.478876) 101 -dictGet test_01037.dict_array (31.3607,-0.0860507) 5994233 -dictGet test_01037.dict_array (31.3831,3.84469) 101 -dictGet test_01037.dict_array (31.3886,-0.731137) 101 -dictGet test_01037.dict_array (31.4043,-0.348907) 101 -dictGet test_01037.dict_array (31.4081,1.47391) 101 -dictGet test_01037.dict_array (31.4176,-0.583645) 101 -dictGet test_01037.dict_array (31.4177,1.36972) 101 -dictGet test_01037.dict_array (31.4182,0.958303) 101 -dictGet test_01037.dict_array (31.4199,3.1738) 101 -dictGet test_01037.dict_array (31.4221,2.74876) 101 -dictGet test_01037.dict_array (31.4301,-0.122643) 5994233 -dictGet test_01037.dict_array (31.4344,1.00661) 101 -dictGet test_01037.dict_array (31.4375,4.20304) 101 -dictGet test_01037.dict_array (31.4377,0.289608) 101 -dictGet test_01037.dict_array (31.4379,0.54744) 101 -dictGet test_01037.dict_array (31.4459,3.94945) 101 -dictGet test_01037.dict_array (31.4559,-0.345063) 101 -dictGet test_01037.dict_array (31.464,0.726129) 101 -dictGet test_01037.dict_array (31.4662,-0.299019) 5994233 -dictGet test_01037.dict_array (31.4671,1.9605299999999999) 101 -dictGet test_01037.dict_array (31.4673,-0.403676) 101 -dictGet test_01037.dict_array (31.4712,-0.237941) 5994233 -dictGet test_01037.dict_array (31.4816,0.120264) 5994233 -dictGet test_01037.dict_array (31.4875,0.323483) 101 -dictGet test_01037.dict_array (31.490099999999998,-0.338163) 101 -dictGet test_01037.dict_array (31.4932,0.517674) 101 -dictGet test_01037.dict_array (31.5112,1.9689299999999998) 101 -dictGet test_01037.dict_array (31.5122,2.92785) 101 -dictGet test_01037.dict_array (31.5151,0.166429) 101 -dictGet test_01037.dict_array (31.5174,2.94802) 101 -dictGet test_01037.dict_array (31.5182,4.18776) 101 -dictGet test_01037.dict_array (31.5238,1.18793) 101 -dictGet test_01037.dict_array (31.5271,3.07446) 101 -dictGet test_01037.dict_array (31.5393,1.58061) 101 -dictGet test_01037.dict_array (31.5421,3.13711) 101 -dictGet test_01037.dict_array (31.5479,2.39897) 101 -dictGet test_01037.dict_array (31.5519,0.99285) 101 -dictGet test_01037.dict_array (31.5685,3.47987) 101 -dictGet test_01037.dict_array (31.5959,0.437382) 101 -dictGet test_01037.dict_array (31.6003,0.194376) 101 -dictGet test_01037.dict_array (31.6026,2.15457) 101 -dictGet test_01037.dict_array (31.606,2.45365) 101 -dictGet test_01037.dict_array (31.6062,-0.453441) 101 -dictGet test_01037.dict_array (31.6107,1.35247) 101 -dictGet test_01037.dict_array (31.6155,3.85588) 101 -dictGet test_01037.dict_array (31.6222,2.03326) 101 -dictGet test_01037.dict_array (31.6231,-0.123059) 101 -dictGet test_01037.dict_array (31.6244,1.6885599999999998) 101 -dictGet test_01037.dict_array (31.6459,0.669716) 101 -dictGet test_01037.dict_array (31.6563,-0.0644741) 101 -dictGet test_01037.dict_array (31.6618,-0.551121) 101 -dictGet test_01037.dict_array (31.6725,-0.38922) 101 -dictGet test_01037.dict_array (31.6727,4.10336) 101 -dictGet test_01037.dict_array (31.6739,4.1391) 101 -dictGet test_01037.dict_array (31.6897,2.8694699999999997) 101 -dictGet test_01037.dict_array (31.6902,3.98792) 101 -dictGet test_01037.dict_array (31.6945,2.46687) 101 -dictGet test_01037.dict_array (31.6987,-1.3796) 101 -dictGet test_01037.dict_array (31.7012,2.34845) 101 -dictGet test_01037.dict_array (31.7036,0.0228348) 101 -dictGet test_01037.dict_array (31.7046,3.68111) 101 -dictGet test_01037.dict_array (31.7055,2.92556) 101 -dictGet test_01037.dict_array (31.7102,1.04532) 101 -dictGet test_01037.dict_array (31.7149,-0.443302) 101 -dictGet test_01037.dict_array (31.7195,2.99311) 101 -dictGet test_01037.dict_array (31.7274,0.166719) 101 -dictGet test_01037.dict_array (31.7565,-0.565382) 101 -dictGet test_01037.dict_array (31.7615,0.771626) 101 -dictGet test_01037.dict_array (31.7739,1.8970099999999999) 101 -dictGet test_01037.dict_array (31.7848,1.2623199999999999) 101 -dictGet test_01037.dict_array (31.7912,-0.788599) 101 -dictGet test_01037.dict_array (31.8011,2.65853) 101 -dictGet test_01037.dict_array (31.8032,-0.0590108) 101 -dictGet test_01037.dict_array (31.8038,1.9618799999999998) 101 -dictGet test_01037.dict_array (31.8098,-1.46851) 101 -dictGet test_01037.dict_array (31.8131,3.41982) 101 -dictGet test_01037.dict_array (31.8169,3.31059) 101 -dictGet test_01037.dict_array (31.8202,-0.193692) 101 -dictGet test_01037.dict_array (31.8306,1.57586) 101 -dictGet test_01037.dict_array (31.8382,-0.787948) 101 -dictGet test_01037.dict_array (31.8433,2.49692) 101 -dictGet test_01037.dict_array (31.8436,2.41851) 101 -dictGet test_01037.dict_array (31.8563,-1.10787) 101 -dictGet test_01037.dict_array (31.8683,0.996504) 101 -dictGet test_01037.dict_array (31.8693,-0.828142) 101 -dictGet test_01037.dict_array (31.8723,1.08929) 101 -dictGet test_01037.dict_array (31.8737,0.881127) 101 -dictGet test_01037.dict_array (31.8881,-0.58441) 101 -dictGet test_01037.dict_array (31.9011,0.121349) 101 -dictGet test_01037.dict_array (31.9066,2.13045) 101 -dictGet test_01037.dict_array (31.9142,1.03368) 101 -dictGet test_01037.dict_array (31.9155,3.38363) 101 -dictGet test_01037.dict_array (31.9168,1.3166) 101 -dictGet test_01037.dict_array (31.9185,-1.11879) 101 -dictGet test_01037.dict_array (31.9186,-0.647948) 101 -dictGet test_01037.dict_array (31.9311,3.96928) 101 -dictGet test_01037.dict_array (31.9335,1.47048) 101 -dictGet test_01037.dict_array (31.9443,-1.36175) 101 -dictGet test_01037.dict_array (31.9481,2.34231) 101 -dictGet test_01037.dict_array (31.9526,1.36565) 101 -dictGet test_01037.dict_array (31.9629,2.5208399999999997) 101 -dictGet test_01037.dict_array (31.9765,0.975783) 101 -dictGet test_01037.dict_array (31.9923,3.31773) 101 -dictGet test_01037.dict_array (31.9994,0.972816) 101 -dictGet test_01037.dict_array (32.001,3.47425) 101 -dictGet test_01037.dict_array (32.0127,2.13874) 101 -dictGet test_01037.dict_array (32.0244,3.2092) 101 -dictGet test_01037.dict_array (32.029,1.18039) 101 -dictGet test_01037.dict_array (32.0315,0.566073) 101 -dictGet test_01037.dict_array (32.0354,1.0766499999999999) 101 -dictGet test_01037.dict_array (32.0399,-1.11576) 101 -dictGet test_01037.dict_array (32.053,2.16849) 101 -dictGet test_01037.dict_array (32.0542,0.042328) 101 -dictGet test_01037.dict_array (32.0576,2.47001) 101 -dictGet test_01037.dict_array (32.061,3.7498899999999997) 101 -dictGet test_01037.dict_array (32.0623,1.25134) 101 -dictGet test_01037.dict_array (32.0626,1.9611399999999999) 101 -dictGet test_01037.dict_array (32.0666,-0.0904247) 101 -dictGet test_01037.dict_array (32.0681,2.28442) 101 -dictGet test_01037.dict_array (32.0692,1.50869) 101 -dictGet test_01037.dict_array (32.0724,4.03314) 101 -dictGet test_01037.dict_array (32.0729,-0.064324) 101 -dictGet test_01037.dict_array (32.079,0.293758) 101 -dictGet test_01037.dict_array (32.0847,-1.19814) 101 -dictGet test_01037.dict_array (32.0974,-0.91927) 101 -dictGet test_01037.dict_array (32.0979,-0.736979) 101 -dictGet test_01037.dict_array (32.106,-1.33063) 101 -dictGet test_01037.dict_array (32.1189,0.246715) 101 -dictGet test_01037.dict_array (32.1207,4.00883) 101 -dictGet test_01037.dict_array (32.1396,1.12402) 101 -dictGet test_01037.dict_array (32.1413,1.5668) 101 -dictGet test_01037.dict_array (32.143,1.35559) 101 -dictGet test_01037.dict_array (32.1538,1.32881) 101 -dictGet test_01037.dict_array (32.1549,4.06552) 101 -dictGet test_01037.dict_array (32.1555,-0.79275) 101 -dictGet test_01037.dict_array (32.163,1.17733) 101 -dictGet test_01037.dict_array (32.1634,2.94273) 101 -dictGet test_01037.dict_array (32.1644,1.85666) 101 -dictGet test_01037.dict_array (32.1745,0.435458) 101 -dictGet test_01037.dict_array (32.1765,1.65149) 101 -dictGet test_01037.dict_array (32.1893,2.08924) 101 -dictGet test_01037.dict_array (32.2024,0.222191) 101 -dictGet test_01037.dict_array (32.2107,1.34379) 101 -dictGet test_01037.dict_array (32.2109,3.9018699999999997) 101 -dictGet test_01037.dict_array (32.2123,1.85233) 101 -dictGet test_01037.dict_array (32.2144,3.72534) 101 -dictGet test_01037.dict_array (32.2218,2.5386699999999998) 101 -dictGet test_01037.dict_array (32.2279,2.84267) 101 -dictGet test_01037.dict_array (32.2345,3.33295) 101 -dictGet test_01037.dict_array (32.2435,3.85283) 101 -dictGet test_01037.dict_array (32.2527,-0.480608) 101 -dictGet test_01037.dict_array (32.2566,-0.837882) 101 -dictGet test_01037.dict_array (32.2627,2.57708) 101 -dictGet test_01037.dict_array (32.2733,0.244931) 101 -dictGet test_01037.dict_array (32.2761,4.05808) 101 -dictGet test_01037.dict_array (32.2764,3.78472) 101 -dictGet test_01037.dict_array (32.2814,-1.26011) 101 -dictGet test_01037.dict_array (32.2861,3.02427) 101 -dictGet test_01037.dict_array (32.2924,0.928609) 101 -dictGet test_01037.dict_array (32.2963,-0.78543) 101 -dictGet test_01037.dict_array (32.3039,3.21175) 101 -dictGet test_01037.dict_array (32.3107,0.698287) 101 -dictGet test_01037.dict_array (32.3138,0.0595677) 101 -dictGet test_01037.dict_array (32.3339,0.707056) 101 -dictGet test_01037.dict_array (32.3351,0.415474) 101 -dictGet test_01037.dict_array (32.342,-0.681023) 101 -dictGet test_01037.dict_array (32.3463,1.83196) 101 -dictGet test_01037.dict_array (32.3494,2.43799) 101 -dictGet test_01037.dict_array (32.3524,3.47049) 101 -dictGet test_01037.dict_array (32.3531,2.33115) 101 -dictGet test_01037.dict_array (32.3602,0.116106) 101 -dictGet test_01037.dict_array (32.3612,1.1598) 101 -dictGet test_01037.dict_array (32.3689,3.34847) 101 -dictGet test_01037.dict_array (32.3695,0.734055) 101 -dictGet test_01037.dict_array (32.3825,3.85017) 101 -dictGet test_01037.dict_array (32.3835,-1.25491) 101 -dictGet test_01037.dict_array (32.4018,-0.728568) 101 -dictGet test_01037.dict_array (32.4044,2.96727) 101 -dictGet test_01037.dict_array (32.4101,2.9988) 101 -dictGet test_01037.dict_array (32.417,-1.12908) 101 -dictGet test_01037.dict_array (32.4172,4.1952) 101 -dictGet test_01037.dict_array (32.4239,2.49512) 101 -dictGet test_01037.dict_array (32.4258,4.05137) 101 -dictGet test_01037.dict_array (32.4264,-0.427357) 101 -dictGet test_01037.dict_array (32.4274,3.59377) 101 -dictGet test_01037.dict_array (32.4286,-1.24757) 101 -dictGet test_01037.dict_array (32.4294,3.0665) 101 -dictGet test_01037.dict_array (32.4333,-0.353347) 101 -dictGet test_01037.dict_array (32.4391,3.64421) 101 -dictGet test_01037.dict_array (32.4401,3.70635) 101 -dictGet test_01037.dict_array (32.45,1.68918) 101 -dictGet test_01037.dict_array (32.4507,-0.133471) 101 -dictGet test_01037.dict_array (32.4592,0.976458) 101 -dictGet test_01037.dict_array (32.4595,1.89135) 101 -dictGet test_01037.dict_array (32.4604,0.280248) 101 -dictGet test_01037.dict_array (32.4835,0.472731) 101 -dictGet test_01037.dict_array (32.4855,2.01938) 101 -dictGet test_01037.dict_array (32.4872,2.01697) 101 -dictGet test_01037.dict_array (32.4911,0.613106) 101 -dictGet test_01037.dict_array (32.4918,2.17834) 101 -dictGet test_01037.dict_array (32.4947,2.34595) 101 -dictGet test_01037.dict_array (32.5035,2.92234) 101 -dictGet test_01037.dict_array (32.5132,-0.331206) 101 -dictGet test_01037.dict_array (32.5156,-0.412604) 7652581 -dictGet test_01037.dict_array (32.5158,2.9067499999999997) 101 -dictGet test_01037.dict_array (32.5249,2.44519) 101 -dictGet test_01037.dict_array (32.5293,-0.790952) 101 -dictGet test_01037.dict_array (32.5319,3.96854) 101 -dictGet test_01037.dict_array (32.5518,3.6093) 101 -dictGet test_01037.dict_array (32.5541,3.5225400000000002) 101 -dictGet test_01037.dict_array (32.5569,0.816123) 101 -dictGet test_01037.dict_array (32.5646,1.9775) 101 -dictGet test_01037.dict_array (32.5733,3.81271) 101 -dictGet test_01037.dict_array (32.5767,0.948327) 101 -dictGet test_01037.dict_array (32.5971,1.76179) 101 -dictGet test_01037.dict_array (32.6035,-0.716157) 101 -dictGet test_01037.dict_array (32.6087,4.21614) 101 -dictGet test_01037.dict_array (32.6171,0.024481) 101 -dictGet test_01037.dict_array (32.6189,-0.775391) 101 -dictGet test_01037.dict_array (32.6198,2.92081) 101 -dictGet test_01037.dict_array (32.621,-0.970784) 101 -dictGet test_01037.dict_array (32.6266,0.650009) 101 -dictGet test_01037.dict_array (32.6315,2.15144) 101 -dictGet test_01037.dict_array (32.6385,-0.436803) 101 -dictGet test_01037.dict_array (32.6449,-0.191292) 101 -dictGet test_01037.dict_array (32.6535,2.10385) 101 -dictGet test_01037.dict_array (32.6592,3.49973) 101 -dictGet test_01037.dict_array (32.6598,2.5980600000000003) 101 -dictGet test_01037.dict_array (32.6612,2.95681) 101 -dictGet test_01037.dict_array (32.6636,-0.57235) 101 -dictGet test_01037.dict_array (32.669,-0.382702) 101 -dictGet test_01037.dict_array (32.6752,1.30748) 101 -dictGet test_01037.dict_array (32.6811,2.9559800000000003) 101 -dictGet test_01037.dict_array (32.6821,0.57336) 101 -dictGet test_01037.dict_array (32.6828,3.91304) 101 -dictGet test_01037.dict_array (32.6979,3.96868) 101 -dictGet test_01037.dict_array (32.6983,3.15784) 101 -dictGet test_01037.dict_array (32.7122,0.794293) 101 -dictGet test_01037.dict_array (32.7131,-0.847256) 101 -dictGet test_01037.dict_array (32.7219,0.883461) 101 -dictGet test_01037.dict_array (32.7228,1.78808) 101 -dictGet test_01037.dict_array (32.7273,-0.206908) 101 -dictGet test_01037.dict_array (32.7292,0.259331) 101 -dictGet test_01037.dict_array (32.7304,-1.38317) 101 -dictGet test_01037.dict_array (32.7353,1.01601) 101 -dictGet test_01037.dict_array (32.7354,4.17574) 101 -dictGet test_01037.dict_array (32.7357,-0.190194) 101 -dictGet test_01037.dict_array (32.7465,-1.37598) 101 -dictGet test_01037.dict_array (32.7494,-0.275675) 101 -dictGet test_01037.dict_array (32.7514,0.128951) 101 -dictGet test_01037.dict_array (32.753,3.44207) 101 -dictGet test_01037.dict_array (32.7686,2.11713) 101 -dictGet test_01037.dict_array (32.7694,1.47159) 101 -dictGet test_01037.dict_array (32.7768,0.0401042) 101 -dictGet test_01037.dict_array (32.781,-1.34283) 101 -dictGet test_01037.dict_array (32.7814,1.73876) 101 -dictGet test_01037.dict_array (32.7856,-1.06363) 101 -dictGet test_01037.dict_array (32.792699999999996,-1.1255600000000001) 101 -dictGet test_01037.dict_array (32.7941,-0.645447) 101 -dictGet test_01037.dict_array (32.7946,1.48889) 101 -dictGet test_01037.dict_array (32.797,0.791753) 101 -dictGet test_01037.dict_array (32.7982,-0.537798) 101 -dictGet test_01037.dict_array (32.8091,2.3611) 101 -dictGet test_01037.dict_array (32.81,1.7130800000000002) 101 -dictGet test_01037.dict_array (32.8174,-0.288322) 101 -dictGet test_01037.dict_array (32.823,1.6546699999999999) 101 -dictGet test_01037.dict_array (32.8233,1.62108) 101 -dictGet test_01037.dict_array (32.8428,-0.400045) 101 -dictGet test_01037.dict_array (32.8479,2.13598) 101 -dictGet test_01037.dict_array (32.8524,0.199902) 101 -dictGet test_01037.dict_array (32.8543,3.23553) 101 -dictGet test_01037.dict_array (32.8562,1.31371) 101 -dictGet test_01037.dict_array (32.87,1.44256) 101 -dictGet test_01037.dict_array (32.8789,2.38192) 101 -dictGet test_01037.dict_array (32.8812,2.20734) 5999168 -dictGet test_01037.dict_array (32.8815,-0.54427) 101 -dictGet test_01037.dict_array (32.8853,2.4859) 5999168 -dictGet test_01037.dict_array (32.8909,0.513964) 101 -dictGet test_01037.dict_array (32.9035,2.38999) 101 -dictGet test_01037.dict_array (32.9097,2.48131) 5999168 -dictGet test_01037.dict_array (32.928,-0.943269) 101 -dictGet test_01037.dict_array (32.9322,1.13165) 101 -dictGet test_01037.dict_array (32.9348,1.22606) 101 -dictGet test_01037.dict_array (32.9417,3.77998) 101 -dictGet test_01037.dict_array (32.9428,3.11936) 101 -dictGet test_01037.dict_array (32.9482,1.18092) 101 -dictGet test_01037.dict_array (32.9506,0.0609364) 101 -dictGet test_01037.dict_array (32.953,-0.828308) 101 -dictGet test_01037.dict_array (32.9593,3.5209099999999998) 101 -dictGet test_01037.dict_array (32.9617,2.07711) 5999168 -dictGet test_01037.dict_array (32.966,0.693749) 101 -dictGet test_01037.dict_array (32.9668,-0.716432) 101 -dictGet test_01037.dict_array (32.9702,1.98555) 101 -dictGet test_01037.dict_array (32.9782,1.73819) 101 -dictGet test_01037.dict_array (32.9805,3.71151) 101 -dictGet test_01037.dict_array (32.9821,2.97225) 101 -dictGet test_01037.dict_array (32.995,-0.830301) 101 -dictGet test_01037.dict_array (33.0234,0.770848) 101 -dictGet test_01037.dict_array (33.0312,-0.340964) 101 -dictGet test_01037.dict_array (33.0366,-0.756795) 101 -dictGet test_01037.dict_array (33.0438,0.812871) 101 -dictGet test_01037.dict_array (33.0455,1.84843) 101 -dictGet test_01037.dict_array (33.0498,0.0913292) 101 -dictGet test_01037.dict_array (33.0506,1.53739) 101 -dictGet test_01037.dict_array (33.0554,2.4265) 101 -dictGet test_01037.dict_array (33.0741,3.61332) 101 -dictGet test_01037.dict_array (33.0765,-0.179985) 101 -dictGet test_01037.dict_array (33.087,1.46465) 101 -dictGet test_01037.dict_array (33.0906,-0.620383) 101 -dictGet test_01037.dict_array (33.1047,-1.28027) 101 -dictGet test_01037.dict_array (33.1072,1.96303) 101 -dictGet test_01037.dict_array (33.1081,-0.897874) 101 -dictGet test_01037.dict_array (33.1122,1.8950200000000001) 101 -dictGet test_01037.dict_array (33.1237,2.63993) 101 -dictGet test_01037.dict_array (33.1238,0.753963) 101 -dictGet test_01037.dict_array (33.1257,0.495668) 101 -dictGet test_01037.dict_array (33.1258,1.78341) 101 -dictGet test_01037.dict_array (33.127,2.59646) 101 -dictGet test_01037.dict_array (33.1324,-1.23742) 101 -dictGet test_01037.dict_array (33.1359,3.83491) 101 -dictGet test_01037.dict_array (33.1628,-0.379588) 101 -dictGet test_01037.dict_array (33.1679,1.25601) 101 -dictGet test_01037.dict_array (33.1688,-1.35553) 101 -dictGet test_01037.dict_array (33.181,2.10943) 101 -dictGet test_01037.dict_array (33.1871,2.81171) 101 -dictGet test_01037.dict_array (33.1877,0.771297) 101 -dictGet test_01037.dict_array (33.1883,-0.204797) 101 -dictGet test_01037.dict_array (33.1886,3.27998) 101 -dictGet test_01037.dict_array (33.1955,0.708907) 101 -dictGet test_01037.dict_array (33.2044,-0.769275) 101 -dictGet test_01037.dict_array (33.2182,3.36103) 101 -dictGet test_01037.dict_array (33.2192,3.43586) 101 -dictGet test_01037.dict_array (33.2322,-0.916753) 101 -dictGet test_01037.dict_array (33.2359,-0.81321) 101 -dictGet test_01037.dict_array (33.238,0.635072) 101 -dictGet test_01037.dict_array (33.2398,3.02588) 101 -dictGet test_01037.dict_array (33.2469,2.35698) 101 -dictGet test_01037.dict_array (33.247,2.3327) 101 -dictGet test_01037.dict_array (33.2579,2.8027100000000003) 101 -dictGet test_01037.dict_array (33.2607,0.321082) 101 -dictGet test_01037.dict_array (33.2653,0.243336) 101 -dictGet test_01037.dict_array (33.2758,0.831836) 101 -dictGet test_01037.dict_array (33.2771,0.886536) 101 -dictGet test_01037.dict_array (33.2914,1.16026) 101 -dictGet test_01037.dict_array (33.2914,1.38882) 101 -dictGet test_01037.dict_array (33.2982,-1.16604) 101 -dictGet test_01037.dict_array (33.2985,0.842556) 101 -dictGet test_01037.dict_array (33.3005,2.8338900000000002) 101 -dictGet test_01037.dict_array (33.305,0.0969475) 101 -dictGet test_01037.dict_array (33.3072,3.82163) 101 -dictGet test_01037.dict_array (33.312,3.41475) 101 -dictGet test_01037.dict_array (33.3129,2.46048) 101 -dictGet test_01037.dict_array (33.3134,3.46863) 101 -dictGet test_01037.dict_array (33.3203,2.33139) 101 -dictGet test_01037.dict_array (33.324,0.433701) 101 -dictGet test_01037.dict_array (33.3338,2.44705) 101 -dictGet test_01037.dict_array (33.337,4.06475) 101 -dictGet test_01037.dict_array (33.3469,1.08172) 101 -dictGet test_01037.dict_array (33.3538,0.717896) 101 -dictGet test_01037.dict_array (33.3618,1.37899) 101 -dictGet test_01037.dict_array (33.3698,0.547744) 101 -dictGet test_01037.dict_array (33.3705,0.957619) 101 -dictGet test_01037.dict_array (33.3821,3.07258) 101 -dictGet test_01037.dict_array (33.3881,3.0626) 101 -dictGet test_01037.dict_array (33.393,-0.816186) 101 -dictGet test_01037.dict_array (33.3945,0.869508) 101 -dictGet test_01037.dict_array (33.4001,1.24186) 101 -dictGet test_01037.dict_array (33.4008,2.34911) 101 -dictGet test_01037.dict_array (33.4166,-1.2808899999999999) 101 -dictGet test_01037.dict_array (33.4167,3.0655) 101 -dictGet test_01037.dict_array (33.4204,2.81887) 101 -dictGet test_01037.dict_array (33.4211,1.71128) 101 -dictGet test_01037.dict_array (33.4237,2.91761) 101 -dictGet test_01037.dict_array (33.4266,1.5955599999999999) 101 -dictGet test_01037.dict_array (33.4353,-0.391392) 101 -dictGet test_01037.dict_array (33.4362,-0.134658) 101 -dictGet test_01037.dict_array (33.4386,0.15396) 101 -dictGet test_01037.dict_array (33.4421,-0.50712) 101 -dictGet test_01037.dict_array (33.452,0.915829) 101 -dictGet test_01037.dict_array (33.463,-0.0882717) 101 -dictGet test_01037.dict_array (33.464,-1.00949) 101 -dictGet test_01037.dict_array (33.4692,0.954092) 101 -dictGet test_01037.dict_array (33.4716,1.9538799999999998) 101 -dictGet test_01037.dict_array (33.4756,1.85836) 101 -dictGet test_01037.dict_array (33.4859,4.0751) 101 -dictGet test_01037.dict_array (33.4899,3.54193) 101 -dictGet test_01037.dict_array (33.4935,3.49794) 101 -dictGet test_01037.dict_array (33.494,-0.983356) 101 -dictGet test_01037.dict_array (33.4955,-1.28128) 101 -dictGet test_01037.dict_array (33.4965,-0.278687) 101 -dictGet test_01037.dict_array (33.4991,0.647491) 101 -dictGet test_01037.dict_array (33.5076,2.2272) 101 -dictGet test_01037.dict_array (33.5079,-0.498199) 101 -dictGet test_01037.dict_array (33.5157,0.535034) 101 -dictGet test_01037.dict_array (33.5171,2.49677) 101 -dictGet test_01037.dict_array (33.5255,2.4447200000000002) 101 -dictGet test_01037.dict_array (33.526,4.01194) 101 -dictGet test_01037.dict_array (33.5288,0.789434) 101 -dictGet test_01037.dict_array (33.5356,-1.17671) 101 -dictGet test_01037.dict_array (33.5402,1.49152) 101 -dictGet test_01037.dict_array (33.5418,3.45757) 101 -dictGet test_01037.dict_array (33.5428,1.90712) 101 -dictGet test_01037.dict_array (33.5556,-0.55741) 101 -dictGet test_01037.dict_array (33.5564,0.876858) 101 -dictGet test_01037.dict_array (33.5567,-0.10208) 101 -dictGet test_01037.dict_array (33.5645,-0.124824) 101 -dictGet test_01037.dict_array (33.5663,3.4872) 101 -dictGet test_01037.dict_array (33.5716,-0.0107611) 101 -dictGet test_01037.dict_array (33.578,3.55714) 101 -dictGet test_01037.dict_array (33.5826,-0.49076) 101 -dictGet test_01037.dict_array (33.5909,0.773737) 101 -dictGet test_01037.dict_array (33.5958,2.9619999999999997) 5994231 -dictGet test_01037.dict_array (33.6193,-0.919755) 101 -dictGet test_01037.dict_array (33.6313,0.652132) 101 -dictGet test_01037.dict_array (33.632,0.823351) 101 -dictGet test_01037.dict_array (33.66,2.18998) 101 -dictGet test_01037.dict_array (33.6621,0.535395) 101 -dictGet test_01037.dict_array (33.6726,3.19367) 101 -dictGet test_01037.dict_array (33.6912,1.74522) 101 -dictGet test_01037.dict_array (33.705,0.706397) 101 -dictGet test_01037.dict_array (33.7076,0.7622) 101 -dictGet test_01037.dict_array (33.7112,1.70187) 101 -dictGet test_01037.dict_array (33.7246,-1.14837) 101 -dictGet test_01037.dict_array (33.7326,2.62413) 5994231 -dictGet test_01037.dict_array (33.7332,2.82137) 5994231 -dictGet test_01037.dict_array (33.7434,0.394672) 101 -dictGet test_01037.dict_array (33.7443,1.54557) 101 -dictGet test_01037.dict_array (33.7506,1.57317) 101 -dictGet test_01037.dict_array (33.7526,1.8578999999999999) 101 -dictGet test_01037.dict_array (33.766,4.15013) 101 -dictGet test_01037.dict_array (33.7834,2.41789) 101 -dictGet test_01037.dict_array (33.7864,0.230935) 101 -dictGet test_01037.dict_array (33.7965,3.05709) 101 -dictGet test_01037.dict_array (33.7998,3.32881) 101 -dictGet test_01037.dict_array (33.8003,2.97338) 5994231 -dictGet test_01037.dict_array (33.8007,-1.08962) 101 -dictGet test_01037.dict_array (33.8022,-0.139488) 101 -dictGet test_01037.dict_array (33.8065,2.70857) 5994231 -dictGet test_01037.dict_array (33.8169,-0.607788) 101 -dictGet test_01037.dict_array (33.8203,0.108512) 101 -dictGet test_01037.dict_array (33.8231,-1.03449) 101 -dictGet test_01037.dict_array (33.8312,3.49458) 101 -dictGet test_01037.dict_array (33.8342,0.297518) 101 -dictGet test_01037.dict_array (33.8352,0.165872) 101 -dictGet test_01037.dict_array (33.8354,1.87277) 101 -dictGet test_01037.dict_array (33.8371,1.60103) 101 -dictGet test_01037.dict_array (33.8387,1.9968) 101 -dictGet test_01037.dict_array (33.8403,3.5805) 101 -dictGet test_01037.dict_array (33.8414,-0.703067) 101 -dictGet test_01037.dict_array (33.844,-0.179472) 101 -dictGet test_01037.dict_array (33.8468,3.40137) 101 -dictGet test_01037.dict_array (33.8509,4.15334) 101 -dictGet test_01037.dict_array (33.8539,2.38339) 101 -dictGet test_01037.dict_array (33.858,-1.3122500000000001) 101 -dictGet test_01037.dict_array (33.859,3.72626) 101 -dictGet test_01037.dict_array (33.8616,2.24433) 101 -dictGet test_01037.dict_array (33.8621,3.01035) 101 -dictGet test_01037.dict_array (33.8623,1.17559) 101 -dictGet test_01037.dict_array (33.8682,2.706) 5994231 -dictGet test_01037.dict_array (33.8684,0.189231) 101 -dictGet test_01037.dict_array (33.872,1.93574) 101 -dictGet test_01037.dict_array (33.8844,3.80404) 101 -dictGet test_01037.dict_array (33.8888,0.594884) 101 -dictGet test_01037.dict_array (33.8946,2.74161) 101 -dictGet test_01037.dict_array (33.9023,0.6239) 101 -dictGet test_01037.dict_array (33.9057,0.873222) 101 -dictGet test_01037.dict_array (33.9157,-1.26607) 101 -dictGet test_01037.dict_array (33.92,2.06848) 101 -dictGet test_01037.dict_array (33.9298,-0.00526229) 101 -dictGet test_01037.dict_array (33.932,3.07063) 101 -dictGet test_01037.dict_array (33.9322,0.629385) 101 -dictGet test_01037.dict_array (33.9367,-1.41955) 101 -dictGet test_01037.dict_array (33.937,1.42532) 101 -dictGet test_01037.dict_array (33.9375,1.1467100000000001) 101 -dictGet test_01037.dict_array (33.9434,-1.05739) 101 -dictGet test_01037.dict_array (33.9477,3.34809) 101 -dictGet test_01037.dict_array (33.95,2.21715) 101 -dictGet test_01037.dict_array (33.955799999999996,0.305176) 101 -dictGet test_01037.dict_array (33.9686,-0.28273) 101 -dictGet test_01037.dict_array (33.9703,4.1255) 101 -dictGet test_01037.dict_array (33.9707,3.08199) 101 -dictGet test_01037.dict_array (33.9754,1.06203) 101 -dictGet test_01037.dict_array (33.9757,3.72468) 101 -dictGet test_01037.dict_array (33.9775,-0.0440599) 101 -dictGet test_01037.dict_array (33.9777,-0.251484) 101 -dictGet test_01037.dict_array (33.9789,-0.339374) 101 -dictGet test_01037.dict_array (33.9849,2.54515) 5994231 -dictGet test_01037.dict_array (33.9885,-0.318557) 101 -dictGet test_01037.dict_array (33.9977,1.07175) 101 -dictGet test_01037.dict_array (33.9984,-0.700517) 101 -dictGet test_01037.dict_array (34.0149,3.53338) 101 -dictGet test_01037.dict_array (34.0173,3.39155) 101 -dictGet test_01037.dict_array (34.0317,3.9579) 101 -dictGet test_01037.dict_array (34.0369,3.83612) 101 -dictGet test_01037.dict_array (34.043,-0.0887221) 101 -dictGet test_01037.dict_array (34.0487,1.14252) 101 -dictGet test_01037.dict_array (34.052,1.74832) 101 -dictGet test_01037.dict_array (34.0711,-0.898071) 101 -dictGet test_01037.dict_array (34.0747,1.55057) 101 -dictGet test_01037.dict_array (34.0803,3.16763) 101 -dictGet test_01037.dict_array (34.0872,3.75555) 101 -dictGet test_01037.dict_array (34.0965,1.62038) 101 -dictGet test_01037.dict_array (34.0977,-0.412691) 101 -dictGet test_01037.dict_array (34.0986,0.0294206) 101 -dictGet test_01037.dict_array (34.1072,3.15823) 101 -dictGet test_01037.dict_array (34.1092,3.09599) 101 -dictGet test_01037.dict_array (34.1206,1.04637) 5940222 -dictGet test_01037.dict_array (34.1209,3.13826) 101 -dictGet test_01037.dict_array (34.1265,3.95881) 101 -dictGet test_01037.dict_array (34.1286,-0.539319) 101 -dictGet test_01037.dict_array (34.1358,3.67451) 101 -dictGet test_01037.dict_array (34.1428,0.136115) 101 -dictGet test_01037.dict_array (34.157,1.73522) 101 -dictGet test_01037.dict_array (34.1581,1.48001) 101 -dictGet test_01037.dict_array (34.1682,3.42373) 101 -dictGet test_01037.dict_array (34.1683,-1.26511) 101 -dictGet test_01037.dict_array (34.1684,4.20007) 101 -dictGet test_01037.dict_array (34.1854,3.32089) 101 -dictGet test_01037.dict_array (34.2022,0.749536) 101 -dictGet test_01037.dict_array (34.2044,3.04865) 101 -dictGet test_01037.dict_array (34.22,-0.500055) 101 -dictGet test_01037.dict_array (34.2249,0.743775) 101 -dictGet test_01037.dict_array (34.2254,1.34702) 101 -dictGet test_01037.dict_array (34.2355,-0.898843) 101 -dictGet test_01037.dict_array (34.2394,2.0203699999999998) 101 -dictGet test_01037.dict_array (34.2466,1.83785) 101 -dictGet test_01037.dict_array (34.247,4.09563) 101 -dictGet test_01037.dict_array (34.2508,2.61312) 101 -dictGet test_01037.dict_array (34.2517,1.69642) 101 -dictGet test_01037.dict_array (34.2564,4.13033) 101 -dictGet test_01037.dict_array (34.2574,4.18928) 101 -dictGet test_01037.dict_array (34.2614,-0.478719) 101 -dictGet test_01037.dict_array (34.2625,2.38088) 101 -dictGet test_01037.dict_array (34.2666,3.1503) 101 -dictGet test_01037.dict_array (34.271,4.02223) 101 -dictGet test_01037.dict_array (34.2727,0.514755) 101 -dictGet test_01037.dict_array (34.278,1.98929) 101 -dictGet test_01037.dict_array (34.2798,-0.199208) 101 -dictGet test_01037.dict_array (34.2804,2.05184) 101 -dictGet test_01037.dict_array (34.2945,-1.11051) 101 -dictGet test_01037.dict_array (34.3168,-0.0829721) 101 -dictGet test_01037.dict_array (34.3345,3.4358) 101 -dictGet test_01037.dict_array (34.3377,1.13527) 5940222 -dictGet test_01037.dict_array (34.3383,1.27891) 5940222 -dictGet test_01037.dict_array (34.3391,1.47945) 5940222 -dictGet test_01037.dict_array (34.3441,0.627014) 101 -dictGet test_01037.dict_array (34.347,2.4853) 101 -dictGet test_01037.dict_array (34.3514,2.16247) 101 -dictGet test_01037.dict_array (34.3627,2.64533) 101 -dictGet test_01037.dict_array (34.3682,-0.227501) 101 -dictGet test_01037.dict_array (34.3756,4.21248) 101 -dictGet test_01037.dict_array (34.379,3.96604) 101 -dictGet test_01037.dict_array (34.3827,1.7518) 101 -dictGet test_01037.dict_array (34.3912,2.8834) 101 -dictGet test_01037.dict_array (34.3919,0.668829) 101 -dictGet test_01037.dict_array (34.3949,2.00338) 101 -dictGet test_01037.dict_array (34.3987,0.557268) 101 -dictGet test_01037.dict_array (34.4111,0.768558) 101 -dictGet test_01037.dict_array (34.4119,2.8742) 101 -dictGet test_01037.dict_array (34.416,3.50841) 101 -dictGet test_01037.dict_array (34.4212,1.24916) 5940222 -dictGet test_01037.dict_array (34.4251,0.457029) 101 -dictGet test_01037.dict_array (34.4274,-0.902559) 101 -dictGet test_01037.dict_array (34.4325,4.03159) 101 -dictGet test_01037.dict_array (34.438,1.63994) 101 -dictGet test_01037.dict_array (34.4403,-0.177594) 101 -dictGet test_01037.dict_array (34.4421,0.726712) 101 -dictGet test_01037.dict_array (34.4517,2.98611) 101 -dictGet test_01037.dict_array (34.4658,-1.312) 101 -dictGet test_01037.dict_array (34.4732,-0.0681338) 101 -dictGet test_01037.dict_array (34.4752,2.81646) 101 -dictGet test_01037.dict_array (34.4914,2.3858) 101 -dictGet test_01037.dict_array (34.4923,0.855231) 101 -dictGet test_01037.dict_array (34.5235,1.78468) 101 -dictGet test_01037.dict_array (34.5305,4.10608) 101 -dictGet test_01037.dict_array (34.5389,0.621937) 101 -dictGet test_01037.dict_array (34.5406,3.17145) 101 -dictGet test_01037.dict_array (34.5434,-0.56306) 101 -dictGet test_01037.dict_array (34.5449,3.13311) 101 -dictGet test_01037.dict_array (34.5491,2.31572) 101 -dictGet test_01037.dict_array (34.5539,2.94028) 101 -dictGet test_01037.dict_array (34.5546,-0.208825) 101 -dictGet test_01037.dict_array (34.5549,3.78486) 101 -dictGet test_01037.dict_array (34.5676,0.307148) 101 -dictGet test_01037.dict_array (34.5743,1.5217399999999999) 101 -dictGet test_01037.dict_array (34.5775,3.48046) 101 -dictGet test_01037.dict_array (34.5815,2.5243700000000002) 101 -dictGet test_01037.dict_array (34.5841,4.21191) 101 -dictGet test_01037.dict_array (34.5887,2.65083) 101 -dictGet test_01037.dict_array (34.5937,3.2143) 101 -dictGet test_01037.dict_array (34.6013,-1.0612) 101 -dictGet test_01037.dict_array (34.6089,1.36066) 101 -dictGet test_01037.dict_array (34.6103,3.40227) 101 -dictGet test_01037.dict_array (34.6128,1.92276) 101 -dictGet test_01037.dict_array (34.6175,2.43627) 101 -dictGet test_01037.dict_array (34.6209,3.43776) 101 -dictGet test_01037.dict_array (34.6234,2.60237) 101 -dictGet test_01037.dict_array (34.6275,3.52479) 101 -dictGet test_01037.dict_array (34.635,0.568558) 101 -dictGet test_01037.dict_array (34.6373,2.37692) 101 -dictGet test_01037.dict_array (34.6375,3.52234) 101 -dictGet test_01037.dict_array (34.6426,2.12397) 101 -dictGet test_01037.dict_array (34.6513,2.80915) 101 -dictGet test_01037.dict_array (34.6632,2.30039) 101 -dictGet test_01037.dict_array (34.6691,1.86582) 101 -dictGet test_01037.dict_array (34.6739,0.15342) 101 -dictGet test_01037.dict_array (34.6825,0.0499679) 101 -dictGet test_01037.dict_array (34.6893,0.454326) 101 -dictGet test_01037.dict_array (34.6957,-0.358598) 101 -dictGet test_01037.dict_array (34.6986,0.562679) 101 -dictGet test_01037.dict_array (34.712,1.12114) 101 -dictGet test_01037.dict_array (34.7126,-0.0057301) 101 -dictGet test_01037.dict_array (34.7137,0.0248501) 101 -dictGet test_01037.dict_array (34.7162,1.15623) 101 -dictGet test_01037.dict_array (34.7258,3.95142) 101 -dictGet test_01037.dict_array (34.7347,3.5232099999999997) 101 -dictGet test_01037.dict_array (34.7363,2.23374) 101 -dictGet test_01037.dict_array (34.7375,0.397841) 101 -dictGet test_01037.dict_array (34.7423,3.09198) 101 -dictGet test_01037.dict_array (34.7452,3.09029) 101 -dictGet test_01037.dict_array (34.7539,-1.06943) 101 -dictGet test_01037.dict_array (34.7733,-0.00912717) 101 -dictGet test_01037.dict_array (34.774,2.71088) 101 -dictGet test_01037.dict_array (34.7771,1.46009) 101 -dictGet test_01037.dict_array (34.7782,-1.28308) 101 -dictGet test_01037.dict_array (34.7924,3.63564) 101 -dictGet test_01037.dict_array (34.7939,-0.416676) 101 -dictGet test_01037.dict_array (34.7964,-0.401773) 101 -dictGet test_01037.dict_array (34.7974,0.0286873) 101 -dictGet test_01037.dict_array (34.7975,3.05965) 101 -dictGet test_01037.dict_array (34.8037,3.07263) 101 -dictGet test_01037.dict_array (34.8254,-0.390284) 101 -dictGet test_01037.dict_array (34.828,1.91869) 101 -dictGet test_01037.dict_array (34.8289,3.71058) 101 -dictGet test_01037.dict_array (34.8403,2.14606) 101 -dictGet test_01037.dict_array (34.8437,2.20617) 101 -dictGet test_01037.dict_array (34.8469,2.38435) 101 -dictGet test_01037.dict_array (34.86,1.45705) 101 -dictGet test_01037.dict_array (34.8612,0.914248) 101 -dictGet test_01037.dict_array (34.8663,3.4215400000000002) 101 -dictGet test_01037.dict_array (34.8724,-0.375144) 101 -dictGet test_01037.dict_array (34.8795,3.29317) 101 -dictGet test_01037.dict_array (34.8823,1.21988) 101 -dictGet test_01037.dict_array (34.8834,1.07657) 101 -dictGet test_01037.dict_array (34.8837,0.157648) 101 -dictGet test_01037.dict_array (34.8871,-0.9755) 101 -dictGet test_01037.dict_array (34.8871,1.8943699999999999) 101 -dictGet test_01037.dict_array (34.889,3.36756) 101 -dictGet test_01037.dict_array (34.8907,1.24874) 101 -dictGet test_01037.dict_array (34.8965,3.13508) 101 -dictGet test_01037.dict_array (34.9042,2.62092) 101 -dictGet test_01037.dict_array (34.9055,-0.0448967) 101 -dictGet test_01037.dict_array (34.9122,0.110576) 101 -dictGet test_01037.dict_array (34.9228,3.60183) 101 -dictGet test_01037.dict_array (34.9237,1.21715) 101 -dictGet test_01037.dict_array (34.9296,1.70459) 101 -dictGet test_01037.dict_array (34.941,-1.14663) 101 -dictGet test_01037.dict_array (34.9448,1.18923) 101 -dictGet test_01037.dict_array (34.9462,3.81678) 101 -dictGet test_01037.dict_array (34.9466,0.593463) 101 -dictGet test_01037.dict_array (34.9485,0.150307) 101 -dictGet test_01037.dict_array (34.9542,0.487238) 101 -dictGet test_01037.dict_array (34.9559,2.03473) 101 -dictGet test_01037.dict_array (34.9671,-0.960225) 101 -dictGet test_01037.dict_array (34.9711,2.63444) 101 -dictGet test_01037.dict_array (34.9892,0.354775) 101 -dictGet test_01037.dict_array (34.9907,1.40724) 101 -dictGet test_01037.dict_array (34.9916,-0.00173097) 101 -dictGet test_01037.dict_array (34.9919,2.06167) 101 +dictGet dict_array (29.5699,2.50068) 101 +dictGet dict_array (29.5796,1.55456) 101 +dictGet dict_array (29.5796,2.36864) 101 +dictGet dict_array (29.5844,1.59626) 101 +dictGet dict_array (29.5886,4.03321) 101 +dictGet dict_array (29.5914,3.02628) 101 +dictGet dict_array (29.5926,-0.0965169) 101 +dictGet dict_array (29.5968,2.37773) 101 +dictGet dict_array (29.5984,0.755853) 101 +dictGet dict_array (29.6066,3.47173) 101 +dictGet dict_array (29.6085,-1.26007) 101 +dictGet dict_array (29.6131,0.246565) 101 +dictGet dict_array (29.6157,-0.266687) 101 +dictGet dict_array (29.6164,2.94674) 101 +dictGet dict_array (29.6195,-0.591941) 101 +dictGet dict_array (29.6231,1.54818) 101 +dictGet dict_array (29.6379,0.764114) 101 +dictGet dict_array (29.6462,-0.772059) 934570 +dictGet dict_array (29.6579,-1.07336) 101 +dictGet dict_array (29.6618,-0.271842) 101 +dictGet dict_array (29.6629,-0.303602) 101 +dictGet dict_array (29.6659,-0.782823) 934570 +dictGet dict_array (29.6736,-0.113832) 101 +dictGet dict_array (29.6759,3.02905) 101 +dictGet dict_array (29.6778,3.71898) 101 +dictGet dict_array (29.6796,1.10433) 101 +dictGet dict_array (29.6809,2.13677) 101 +dictGet dict_array (29.6935,4.11894) 101 +dictGet dict_array (29.6991,-1.4458199999999999) 101 +dictGet dict_array (29.6997,3.17297) 101 +dictGet dict_array (29.7043,3.6145899999999997) 101 +dictGet dict_array (29.7065,3.24885) 101 +dictGet dict_array (29.7126,0.28108) 101 +dictGet dict_array (29.7192,0.174273) 101 +dictGet dict_array (29.7217,-0.523481) 934570 +dictGet dict_array (29.7271,1.67967) 101 +dictGet dict_array (29.7311,4.12444) 101 +dictGet dict_array (29.7347,1.88378) 101 +dictGet dict_array (29.7358,0.67944) 101 +dictGet dict_array (29.7366,-0.2973) 101 +dictGet dict_array (29.7446,0.646536) 101 +dictGet dict_array (29.7453,-0.567963) 101 +dictGet dict_array (29.764,4.04217) 101 +dictGet dict_array (29.7655,1.51372) 101 +dictGet dict_array (29.7744,1.12435) 101 +dictGet dict_array (29.7774,-0.0681196) 101 +dictGet dict_array (29.7784,1.54864) 101 +dictGet dict_array (29.7785,2.24139) 101 +dictGet dict_array (29.7922,0.220808) 101 +dictGet dict_array (29.7936,2.37709) 101 +dictGet dict_array (29.8008,0.948536) 101 +dictGet dict_array (29.8115,0.201227) 101 +dictGet dict_array (29.814,0.149601) 101 +dictGet dict_array (29.8193,-1.35858) 101 +dictGet dict_array (29.8201,0.965518) 101 +dictGet dict_array (29.8265,-0.727286) 101 +dictGet dict_array (29.8277,-0.531746) 101 +dictGet dict_array (29.8289,3.63009) 101 +dictGet dict_array (29.8548,0.838047) 101 +dictGet dict_array (29.8641,-0.845265) 101 +dictGet dict_array (29.8649,0.0562212) 101 +dictGet dict_array (29.8701,-1.02045) 101 +dictGet dict_array (29.8733,2.76654) 101 +dictGet dict_array (29.876,0.555475) 101 +dictGet dict_array (29.8794,-0.800108) 101 +dictGet dict_array (29.8813,2.7426399999999997) 101 +dictGet dict_array (29.897100000000002,2.66193) 101 +dictGet dict_array (29.908,4.01339) 101 +dictGet dict_array (29.9165,-1.08246) 101 +dictGet dict_array (29.9201,-0.420861) 101 +dictGet dict_array (29.9217,3.03778) 101 +dictGet dict_array (29.9355,0.773833) 101 +dictGet dict_array (29.947,3.76517) 101 +dictGet dict_array (29.9518,-0.60557) 101 +dictGet dict_array (29.9564,-0.600163) 101 +dictGet dict_array (29.959600000000002,4.16591) 101 +dictGet dict_array (29.9615,-1.33708) 101 +dictGet dict_array (29.9699,-0.392375) 101 +dictGet dict_array (29.9776,1.04552) 101 +dictGet dict_array (29.9784,4.02756) 101 +dictGet dict_array (29.9819,4.00597) 101 +dictGet dict_array (29.9826,1.2816100000000001) 101 +dictGet dict_array (30.0026,2.76257) 101 +dictGet dict_array (30.0126,3.68255) 101 +dictGet dict_array (30.0131,0.796576) 101 +dictGet dict_array (30.018,1.16523) 101 +dictGet dict_array (30.0261,-0.210653) 101 +dictGet dict_array (30.0472,-1.11007) 101 +dictGet dict_array (30.0542,-0.479585) 101 +dictGet dict_array (30.0613,1.6278000000000001) 101 +dictGet dict_array (30.0617,-0.0551152) 101 +dictGet dict_array (30.0637,2.62066) 101 +dictGet dict_array (30.0721,1.6424400000000001) 101 +dictGet dict_array (30.0769,-0.402636) 101 +dictGet dict_array (30.0791,-0.277435) 101 +dictGet dict_array (30.0931,0.0327512) 101 +dictGet dict_array (30.1059,3.52623) 101 +dictGet dict_array (30.1103,0.865466) 101 +dictGet dict_array (30.1115,2.95243) 101 +dictGet dict_array (30.1144,1.71029) 101 +dictGet dict_array (30.1311,-0.864751) 101 +dictGet dict_array (30.1336,-0.851386) 101 +dictGet dict_array (30.1393,3.89901) 101 +dictGet dict_array (30.1456,-0.531898) 101 +dictGet dict_array (30.1492,2.07833) 101 +dictGet dict_array (30.1575,2.43856) 101 +dictGet dict_array (30.1682,1.19771) 101 +dictGet dict_array (30.1716,3.9853300000000003) 101 +dictGet dict_array (30.1849,2.78374) 101 +dictGet dict_array (30.1866,0.65658) 101 +dictGet dict_array (30.1885,1.56943) 101 +dictGet dict_array (30.1959,-1.38202) 101 +dictGet dict_array (30.1999,1.58413) 101 +dictGet dict_array (30.2024,0.713081) 101 +dictGet dict_array (30.2054,0.620143) 101 +dictGet dict_array (30.2091,1.51641) 101 +dictGet dict_array (30.2124,-0.331782) 101 +dictGet dict_array (30.226,3.03527) 101 +dictGet dict_array (30.2261,3.18486) 101 +dictGet dict_array (30.2288,2.48407) 101 +dictGet dict_array (30.2345,3.7462400000000002) 101 +dictGet dict_array (30.2375,0.62046) 101 +dictGet dict_array (30.2425,-0.472914) 101 +dictGet dict_array (30.247,3.95863) 101 +dictGet dict_array (30.2494,-0.305093) 101 +dictGet dict_array (30.2499,2.54337) 101 +dictGet dict_array (30.2606,2.16644) 101 +dictGet dict_array (30.2672,3.94847) 101 +dictGet dict_array (30.2709,-0.136264) 101 +dictGet dict_array (30.2764,1.18654) 101 +dictGet dict_array (30.2765,1.20383) 101 +dictGet dict_array (30.2839,1.05762) 101 +dictGet dict_array (30.286,0.469327) 101 +dictGet dict_array (30.2927,3.1693) 101 +dictGet dict_array (30.2935,3.49854) 101 +dictGet dict_array (30.307,0.312338) 101 +dictGet dict_array (30.3085,1.07791) 101 +dictGet dict_array (30.3139,2.77248) 101 +dictGet dict_array (30.314,0.822823) 101 +dictGet dict_array (30.3227,-0.587351) 101 +dictGet dict_array (30.332,1.00174) 101 +dictGet dict_array (30.3388,0.844148) 101 +dictGet dict_array (30.3485,0.561902) 101 +dictGet dict_array (30.3497,0.180362) 101 +dictGet dict_array (30.361,4.13016) 101 +dictGet dict_array (30.3623,-0.0484027) 101 +dictGet dict_array (30.3638,3.9845800000000002) 101 +dictGet dict_array (30.3853,3.16051) 101 +dictGet dict_array (30.3974,2.6617800000000003) 101 +dictGet dict_array (30.4002,-1.15886) 101 +dictGet dict_array (30.4008,-0.387015) 101 +dictGet dict_array (30.4018,1.86493) 101 +dictGet dict_array (30.4239,1.16818) 101 +dictGet dict_array (30.4363,3.63938) 101 +dictGet dict_array (30.4377,-0.81315) 101 +dictGet dict_array (30.4391,3.54703) 101 +dictGet dict_array (30.4424,-1.39435) 101 +dictGet dict_array (30.4441,2.8463000000000003) 101 +dictGet dict_array (30.4517,3.28117) 101 +dictGet dict_array (30.4658,2.6928) 101 +dictGet dict_array (30.4734,2.66161) 101 +dictGet dict_array (30.4799,-1.07578) 101 +dictGet dict_array (30.4837,-1.02486) 101 +dictGet dict_array (30.485,1.06326) 101 +dictGet dict_array (30.495,1.12306) 101 +dictGet dict_array (30.501,2.27264) 101 +dictGet dict_array (30.5027,1.99382) 101 +dictGet dict_array (30.5194,-1.03943) 101 +dictGet dict_array (30.5239,1.04328) 101 +dictGet dict_array (30.528,3.82041) 101 +dictGet dict_array (30.5299,-0.715248) 101 +dictGet dict_array (30.5331,1.19603) 101 +dictGet dict_array (30.535800000000002,2.71485) 101 +dictGet dict_array (30.5405,0.804694) 101 +dictGet dict_array (30.542,1.23739) 101 +dictGet dict_array (30.5432,4.04189) 101 +dictGet dict_array (30.5457,-0.956121) 101 +dictGet dict_array (30.5506,3.07443) 101 +dictGet dict_array (30.5539,3.87084) 101 +dictGet dict_array (30.5578,3.78837) 101 +dictGet dict_array (30.5588,0.966135) 101 +dictGet dict_array (30.5637,2.5605) 101 +dictGet dict_array (30.5647,-1.27328) 101 +dictGet dict_array (30.5656,-0.0581332) 101 +dictGet dict_array (30.5715,0.65755) 101 +dictGet dict_array (30.5727,3.01604) 101 +dictGet dict_array (30.5729,-0.976857) 101 +dictGet dict_array (30.5751,0.60204) 101 +dictGet dict_array (30.5854,3.02473) 101 +dictGet dict_array (30.5866,0.174099) 101 +dictGet dict_array (30.5947,0.875193) 101 +dictGet dict_array (30.5992,-0.403901) 101 +dictGet dict_array (30.6002,4.18891) 101 +dictGet dict_array (30.6025,0.217712) 101 +dictGet dict_array (30.6054,0.927203) 101 +dictGet dict_array (30.6075,3.79359) 101 +dictGet dict_array (30.6159,3.82773) 101 +dictGet dict_array (30.627,3.84039) 101 +dictGet dict_array (30.6308,0.77517) 101 +dictGet dict_array (30.6338,0.179565) 101 +dictGet dict_array (30.6461,1.3293599999999999) 101 +dictGet dict_array (30.6674,-0.424547) 101 +dictGet dict_array (30.669,1.76539) 101 +dictGet dict_array (30.6788,4.01239) 101 +dictGet dict_array (30.6864,3.59158) 101 +dictGet dict_array (30.7049,-0.875413) 101 +dictGet dict_array (30.705,1.3307) 101 +dictGet dict_array (30.7063,-0.473192) 101 +dictGet dict_array (30.7075,-1.1958199999999999) 101 +dictGet dict_array (30.7101,-0.367562) 101 +dictGet dict_array (30.7203,2.98725) 101 +dictGet dict_array (30.7213,2.2745699999999998) 101 +dictGet dict_array (30.7446,-0.334144) 101 +dictGet dict_array (30.7468,3.82967) 101 +dictGet dict_array (30.747,-0.384779) 101 +dictGet dict_array (30.7681,0.904198) 101 +dictGet dict_array (30.7757,1.78743) 101 +dictGet dict_array (30.8021,-0.479212) 101 +dictGet dict_array (30.8079,-1.40869) 101 +dictGet dict_array (30.8206,-0.0608489) 101 +dictGet dict_array (30.8218,0.43909) 101 +dictGet dict_array (30.8239,0.10014) 101 +dictGet dict_array (30.8282,4.15409) 101 +dictGet dict_array (30.8288,-0.709528) 101 +dictGet dict_array (30.8326,0.156011) 101 +dictGet dict_array (30.8328,-1.03704) 101 +dictGet dict_array (30.839,2.15528) 101 +dictGet dict_array (30.8452,0.219377) 101 +dictGet dict_array (30.8463,0.0515355) 101 +dictGet dict_array (30.8526,2.06614) 101 +dictGet dict_array (30.8566,0.517876) 101 +dictGet dict_array (30.8588,-1.31738) 101 +dictGet dict_array (30.8681,0.44207) 101 +dictGet dict_array (30.8914,1.0072) 101 +dictGet dict_array (30.897,0.483425) 101 +dictGet dict_array (30.905,2.8731999999999998) 101 +dictGet dict_array (30.9051,2.21956) 101 +dictGet dict_array (30.9115,4.00663) 101 +dictGet dict_array (30.9167,-0.834462) 101 +dictGet dict_array (30.9252,-1.3289900000000001) 101 +dictGet dict_array (30.9314,1.85384) 101 +dictGet dict_array (30.9392,2.53236) 101 +dictGet dict_array (30.9569,2.82038) 101 +dictGet dict_array (30.9598,-0.641011) 101 +dictGet dict_array (30.9601,-0.254928) 101 +dictGet dict_array (30.9623,-1.3886) 101 +dictGet dict_array (30.9707,0.888854) 101 +dictGet dict_array (30.9766,2.81957) 101 +dictGet dict_array (30.9775,2.69273) 101 +dictGet dict_array (30.9821,0.587715) 101 +dictGet dict_array (30.9887,4.0233) 101 +dictGet dict_array (30.9914,0.259542) 101 +dictGet dict_array (30.9986,-1.36832) 101 +dictGet dict_array (31.008,0.628999) 101 +dictGet dict_array (31.0168,-1.17462) 101 +dictGet dict_array (31.0237,3.52547) 101 +dictGet dict_array (31.0306,3.78522) 101 +dictGet dict_array (31.0308,-0.72453) 101 +dictGet dict_array (31.0463,2.41997) 101 +dictGet dict_array (31.047,0.624184) 101 +dictGet dict_array (31.0569,0.0706393) 5994232 +dictGet dict_array (31.0583,1.3244099999999999) 101 +dictGet dict_array (31.063,3.23861) 101 +dictGet dict_array (31.068,0.695575) 101 +dictGet dict_array (31.0687,1.85675) 101 +dictGet dict_array (31.0692,0.254793) 101 +dictGet dict_array (31.0766,0.828128) 101 +dictGet dict_array (31.0833,0.0612782) 5994232 +dictGet dict_array (31.0833,2.59748) 101 +dictGet dict_array (31.0861,-1.3778299999999999) 101 +dictGet dict_array (31.0874,3.07258) 101 +dictGet dict_array (31.0882,1.4882) 101 +dictGet dict_array (31.0924,3.42242) 101 +dictGet dict_array (31.0927,2.67448) 101 +dictGet dict_array (31.0936,1.12292) 101 +dictGet dict_array (31.0952,-0.336928) 101 +dictGet dict_array (31.0978,3.48482) 101 +dictGet dict_array (31.1107,3.7513199999999998) 101 +dictGet dict_array (31.1156,1.19171) 101 +dictGet dict_array (31.1176,0.223509) 5994232 +dictGet dict_array (31.1249,0.946838) 101 +dictGet dict_array (31.1267,1.48983) 101 +dictGet dict_array (31.138,-0.289981) 101 +dictGet dict_array (31.1382,3.02904) 101 +dictGet dict_array (31.1475,2.6178) 101 +dictGet dict_array (31.1491,1.37873) 101 +dictGet dict_array (31.1525,3.72105) 101 +dictGet dict_array (31.1526,-1.4129800000000001) 101 +dictGet dict_array (31.1526,-0.186457) 101 +dictGet dict_array (31.1539,2.78789) 101 +dictGet dict_array (31.1548,-1.08552) 101 +dictGet dict_array (31.1567,-0.0768925) 101 +dictGet dict_array (31.1613,1.49617) 101 +dictGet dict_array (31.1653,1.03777) 101 +dictGet dict_array (31.1662,3.4214700000000002) 101 +dictGet dict_array (31.1672,-0.0813169) 101 +dictGet dict_array (31.177,0.440843) 101 +dictGet dict_array (31.1788,-0.737151) 101 +dictGet dict_array (31.1856,-0.144396) 101 +dictGet dict_array (31.1959,3.66813) 101 +dictGet dict_array (31.1996,-0.353983) 101 +dictGet dict_array (31.2019,2.86802) 101 +dictGet dict_array (31.2087,2.31245) 101 +dictGet dict_array (31.2125,3.2713200000000002) 101 +dictGet dict_array (31.2137,-0.108129) 101 +dictGet dict_array (31.216,3.9156) 101 +dictGet dict_array (31.2201,-0.202141) 101 +dictGet dict_array (31.2285,2.09058) 101 +dictGet dict_array (31.2502,4.01526) 101 +dictGet dict_array (31.2585,3.11524) 101 +dictGet dict_array (31.2645,-0.620418) 101 +dictGet dict_array (31.2684,2.74277) 101 +dictGet dict_array (31.2821,-1.12772) 101 +dictGet dict_array (31.2821,2.46769) 101 +dictGet dict_array (31.2887,3.91396) 101 +dictGet dict_array (31.295,1.49942) 101 +dictGet dict_array (31.2997,3.46122) 101 +dictGet dict_array (31.3017,3.3263) 101 +dictGet dict_array (31.3022,3.16754) 101 +dictGet dict_array (31.3048,0.364962) 101 +dictGet dict_array (31.305,3.1967) 101 +dictGet dict_array (31.3061,1.84303) 101 +dictGet dict_array (31.3082,-0.173851) 101 +dictGet dict_array (31.3315,3.90932) 101 +dictGet dict_array (31.3351,2.80164) 101 +dictGet dict_array (31.3388,0.168765) 5994233 +dictGet dict_array (31.339,0.25535) 101 +dictGet dict_array (31.3423,1.7036799999999999) 101 +dictGet dict_array (31.349,0.386456) 101 +dictGet dict_array (31.3558,-1.04336) 101 +dictGet dict_array (31.3564,0.478876) 101 +dictGet dict_array (31.3607,-0.0860507) 5994233 +dictGet dict_array (31.3831,3.84469) 101 +dictGet dict_array (31.3886,-0.731137) 101 +dictGet dict_array (31.4043,-0.348907) 101 +dictGet dict_array (31.4081,1.47391) 101 +dictGet dict_array (31.4176,-0.583645) 101 +dictGet dict_array (31.4177,1.36972) 101 +dictGet dict_array (31.4182,0.958303) 101 +dictGet dict_array (31.4199,3.1738) 101 +dictGet dict_array (31.4221,2.74876) 101 +dictGet dict_array (31.4301,-0.122643) 5994233 +dictGet dict_array (31.4344,1.00661) 101 +dictGet dict_array (31.4375,4.20304) 101 +dictGet dict_array (31.4377,0.289608) 101 +dictGet dict_array (31.4379,0.54744) 101 +dictGet dict_array (31.4459,3.94945) 101 +dictGet dict_array (31.4559,-0.345063) 101 +dictGet dict_array (31.464,0.726129) 101 +dictGet dict_array (31.4662,-0.299019) 5994233 +dictGet dict_array (31.4671,1.9605299999999999) 101 +dictGet dict_array (31.4673,-0.403676) 101 +dictGet dict_array (31.4712,-0.237941) 5994233 +dictGet dict_array (31.4816,0.120264) 5994233 +dictGet dict_array (31.4875,0.323483) 101 +dictGet dict_array (31.490099999999998,-0.338163) 101 +dictGet dict_array (31.4932,0.517674) 101 +dictGet dict_array (31.5112,1.9689299999999998) 101 +dictGet dict_array (31.5122,2.92785) 101 +dictGet dict_array (31.5151,0.166429) 101 +dictGet dict_array (31.5174,2.94802) 101 +dictGet dict_array (31.5182,4.18776) 101 +dictGet dict_array (31.5238,1.18793) 101 +dictGet dict_array (31.5271,3.07446) 101 +dictGet dict_array (31.5393,1.58061) 101 +dictGet dict_array (31.5421,3.13711) 101 +dictGet dict_array (31.5479,2.39897) 101 +dictGet dict_array (31.5519,0.99285) 101 +dictGet dict_array (31.5685,3.47987) 101 +dictGet dict_array (31.5959,0.437382) 101 +dictGet dict_array (31.6003,0.194376) 101 +dictGet dict_array (31.6026,2.15457) 101 +dictGet dict_array (31.606,2.45365) 101 +dictGet dict_array (31.6062,-0.453441) 101 +dictGet dict_array (31.6107,1.35247) 101 +dictGet dict_array (31.6155,3.85588) 101 +dictGet dict_array (31.6222,2.03326) 101 +dictGet dict_array (31.6231,-0.123059) 101 +dictGet dict_array (31.6244,1.6885599999999998) 101 +dictGet dict_array (31.6459,0.669716) 101 +dictGet dict_array (31.6563,-0.0644741) 101 +dictGet dict_array (31.6618,-0.551121) 101 +dictGet dict_array (31.6725,-0.38922) 101 +dictGet dict_array (31.6727,4.10336) 101 +dictGet dict_array (31.6739,4.1391) 101 +dictGet dict_array (31.6897,2.8694699999999997) 101 +dictGet dict_array (31.6902,3.98792) 101 +dictGet dict_array (31.6945,2.46687) 101 +dictGet dict_array (31.6987,-1.3796) 101 +dictGet dict_array (31.7012,2.34845) 101 +dictGet dict_array (31.7036,0.0228348) 101 +dictGet dict_array (31.7046,3.68111) 101 +dictGet dict_array (31.7055,2.92556) 101 +dictGet dict_array (31.7102,1.04532) 101 +dictGet dict_array (31.7149,-0.443302) 101 +dictGet dict_array (31.7195,2.99311) 101 +dictGet dict_array (31.7274,0.166719) 101 +dictGet dict_array (31.7565,-0.565382) 101 +dictGet dict_array (31.7615,0.771626) 101 +dictGet dict_array (31.7739,1.8970099999999999) 101 +dictGet dict_array (31.7848,1.2623199999999999) 101 +dictGet dict_array (31.7912,-0.788599) 101 +dictGet dict_array (31.8011,2.65853) 101 +dictGet dict_array (31.8032,-0.0590108) 101 +dictGet dict_array (31.8038,1.9618799999999998) 101 +dictGet dict_array (31.8098,-1.46851) 101 +dictGet dict_array (31.8131,3.41982) 101 +dictGet dict_array (31.8169,3.31059) 101 +dictGet dict_array (31.8202,-0.193692) 101 +dictGet dict_array (31.8306,1.57586) 101 +dictGet dict_array (31.8382,-0.787948) 101 +dictGet dict_array (31.8433,2.49692) 101 +dictGet dict_array (31.8436,2.41851) 101 +dictGet dict_array (31.8563,-1.10787) 101 +dictGet dict_array (31.8683,0.996504) 101 +dictGet dict_array (31.8693,-0.828142) 101 +dictGet dict_array (31.8723,1.08929) 101 +dictGet dict_array (31.8737,0.881127) 101 +dictGet dict_array (31.8881,-0.58441) 101 +dictGet dict_array (31.9011,0.121349) 101 +dictGet dict_array (31.9066,2.13045) 101 +dictGet dict_array (31.9142,1.03368) 101 +dictGet dict_array (31.9155,3.38363) 101 +dictGet dict_array (31.9168,1.3166) 101 +dictGet dict_array (31.9185,-1.11879) 101 +dictGet dict_array (31.9186,-0.647948) 101 +dictGet dict_array (31.9311,3.96928) 101 +dictGet dict_array (31.9335,1.47048) 101 +dictGet dict_array (31.9443,-1.36175) 101 +dictGet dict_array (31.9481,2.34231) 101 +dictGet dict_array (31.9526,1.36565) 101 +dictGet dict_array (31.9629,2.5208399999999997) 101 +dictGet dict_array (31.9765,0.975783) 101 +dictGet dict_array (31.9923,3.31773) 101 +dictGet dict_array (31.9994,0.972816) 101 +dictGet dict_array (32.001,3.47425) 101 +dictGet dict_array (32.0127,2.13874) 101 +dictGet dict_array (32.0244,3.2092) 101 +dictGet dict_array (32.029,1.18039) 101 +dictGet dict_array (32.0315,0.566073) 101 +dictGet dict_array (32.0354,1.0766499999999999) 101 +dictGet dict_array (32.0399,-1.11576) 101 +dictGet dict_array (32.053,2.16849) 101 +dictGet dict_array (32.0542,0.042328) 101 +dictGet dict_array (32.0576,2.47001) 101 +dictGet dict_array (32.061,3.7498899999999997) 101 +dictGet dict_array (32.0623,1.25134) 101 +dictGet dict_array (32.0626,1.9611399999999999) 101 +dictGet dict_array (32.0666,-0.0904247) 101 +dictGet dict_array (32.0681,2.28442) 101 +dictGet dict_array (32.0692,1.50869) 101 +dictGet dict_array (32.0724,4.03314) 101 +dictGet dict_array (32.0729,-0.064324) 101 +dictGet dict_array (32.079,0.293758) 101 +dictGet dict_array (32.0847,-1.19814) 101 +dictGet dict_array (32.0974,-0.91927) 101 +dictGet dict_array (32.0979,-0.736979) 101 +dictGet dict_array (32.106,-1.33063) 101 +dictGet dict_array (32.1189,0.246715) 101 +dictGet dict_array (32.1207,4.00883) 101 +dictGet dict_array (32.1396,1.12402) 101 +dictGet dict_array (32.1413,1.5668) 101 +dictGet dict_array (32.143,1.35559) 101 +dictGet dict_array (32.1538,1.32881) 101 +dictGet dict_array (32.1549,4.06552) 101 +dictGet dict_array (32.1555,-0.79275) 101 +dictGet dict_array (32.163,1.17733) 101 +dictGet dict_array (32.1634,2.94273) 101 +dictGet dict_array (32.1644,1.85666) 101 +dictGet dict_array (32.1745,0.435458) 101 +dictGet dict_array (32.1765,1.65149) 101 +dictGet dict_array (32.1893,2.08924) 101 +dictGet dict_array (32.2024,0.222191) 101 +dictGet dict_array (32.2107,1.34379) 101 +dictGet dict_array (32.2109,3.9018699999999997) 101 +dictGet dict_array (32.2123,1.85233) 101 +dictGet dict_array (32.2144,3.72534) 101 +dictGet dict_array (32.2218,2.5386699999999998) 101 +dictGet dict_array (32.2279,2.84267) 101 +dictGet dict_array (32.2345,3.33295) 101 +dictGet dict_array (32.2435,3.85283) 101 +dictGet dict_array (32.2527,-0.480608) 101 +dictGet dict_array (32.2566,-0.837882) 101 +dictGet dict_array (32.2627,2.57708) 101 +dictGet dict_array (32.2733,0.244931) 101 +dictGet dict_array (32.2761,4.05808) 101 +dictGet dict_array (32.2764,3.78472) 101 +dictGet dict_array (32.2814,-1.26011) 101 +dictGet dict_array (32.2861,3.02427) 101 +dictGet dict_array (32.2924,0.928609) 101 +dictGet dict_array (32.2963,-0.78543) 101 +dictGet dict_array (32.3039,3.21175) 101 +dictGet dict_array (32.3107,0.698287) 101 +dictGet dict_array (32.3138,0.0595677) 101 +dictGet dict_array (32.3339,0.707056) 101 +dictGet dict_array (32.3351,0.415474) 101 +dictGet dict_array (32.342,-0.681023) 101 +dictGet dict_array (32.3463,1.83196) 101 +dictGet dict_array (32.3494,2.43799) 101 +dictGet dict_array (32.3524,3.47049) 101 +dictGet dict_array (32.3531,2.33115) 101 +dictGet dict_array (32.3602,0.116106) 101 +dictGet dict_array (32.3612,1.1598) 101 +dictGet dict_array (32.3689,3.34847) 101 +dictGet dict_array (32.3695,0.734055) 101 +dictGet dict_array (32.3825,3.85017) 101 +dictGet dict_array (32.3835,-1.25491) 101 +dictGet dict_array (32.4018,-0.728568) 101 +dictGet dict_array (32.4044,2.96727) 101 +dictGet dict_array (32.4101,2.9988) 101 +dictGet dict_array (32.417,-1.12908) 101 +dictGet dict_array (32.4172,4.1952) 101 +dictGet dict_array (32.4239,2.49512) 101 +dictGet dict_array (32.4258,4.05137) 101 +dictGet dict_array (32.4264,-0.427357) 101 +dictGet dict_array (32.4274,3.59377) 101 +dictGet dict_array (32.4286,-1.24757) 101 +dictGet dict_array (32.4294,3.0665) 101 +dictGet dict_array (32.4333,-0.353347) 101 +dictGet dict_array (32.4391,3.64421) 101 +dictGet dict_array (32.4401,3.70635) 101 +dictGet dict_array (32.45,1.68918) 101 +dictGet dict_array (32.4507,-0.133471) 101 +dictGet dict_array (32.4592,0.976458) 101 +dictGet dict_array (32.4595,1.89135) 101 +dictGet dict_array (32.4604,0.280248) 101 +dictGet dict_array (32.4835,0.472731) 101 +dictGet dict_array (32.4855,2.01938) 101 +dictGet dict_array (32.4872,2.01697) 101 +dictGet dict_array (32.4911,0.613106) 101 +dictGet dict_array (32.4918,2.17834) 101 +dictGet dict_array (32.4947,2.34595) 101 +dictGet dict_array (32.5035,2.92234) 101 +dictGet dict_array (32.5132,-0.331206) 101 +dictGet dict_array (32.5156,-0.412604) 7652581 +dictGet dict_array (32.5158,2.9067499999999997) 101 +dictGet dict_array (32.5249,2.44519) 101 +dictGet dict_array (32.5293,-0.790952) 101 +dictGet dict_array (32.5319,3.96854) 101 +dictGet dict_array (32.5518,3.6093) 101 +dictGet dict_array (32.5541,3.5225400000000002) 101 +dictGet dict_array (32.5569,0.816123) 101 +dictGet dict_array (32.5646,1.9775) 101 +dictGet dict_array (32.5733,3.81271) 101 +dictGet dict_array (32.5767,0.948327) 101 +dictGet dict_array (32.5971,1.76179) 101 +dictGet dict_array (32.6035,-0.716157) 101 +dictGet dict_array (32.6087,4.21614) 101 +dictGet dict_array (32.6171,0.024481) 101 +dictGet dict_array (32.6189,-0.775391) 101 +dictGet dict_array (32.6198,2.92081) 101 +dictGet dict_array (32.621,-0.970784) 101 +dictGet dict_array (32.6266,0.650009) 101 +dictGet dict_array (32.6315,2.15144) 101 +dictGet dict_array (32.6385,-0.436803) 101 +dictGet dict_array (32.6449,-0.191292) 101 +dictGet dict_array (32.6535,2.10385) 101 +dictGet dict_array (32.6592,3.49973) 101 +dictGet dict_array (32.6598,2.5980600000000003) 101 +dictGet dict_array (32.6612,2.95681) 101 +dictGet dict_array (32.6636,-0.57235) 101 +dictGet dict_array (32.669,-0.382702) 101 +dictGet dict_array (32.6752,1.30748) 101 +dictGet dict_array (32.6811,2.9559800000000003) 101 +dictGet dict_array (32.6821,0.57336) 101 +dictGet dict_array (32.6828,3.91304) 101 +dictGet dict_array (32.6979,3.96868) 101 +dictGet dict_array (32.6983,3.15784) 101 +dictGet dict_array (32.7122,0.794293) 101 +dictGet dict_array (32.7131,-0.847256) 101 +dictGet dict_array (32.7219,0.883461) 101 +dictGet dict_array (32.7228,1.78808) 101 +dictGet dict_array (32.7273,-0.206908) 101 +dictGet dict_array (32.7292,0.259331) 101 +dictGet dict_array (32.7304,-1.38317) 101 +dictGet dict_array (32.7353,1.01601) 101 +dictGet dict_array (32.7354,4.17574) 101 +dictGet dict_array (32.7357,-0.190194) 101 +dictGet dict_array (32.7465,-1.37598) 101 +dictGet dict_array (32.7494,-0.275675) 101 +dictGet dict_array (32.7514,0.128951) 101 +dictGet dict_array (32.753,3.44207) 101 +dictGet dict_array (32.7686,2.11713) 101 +dictGet dict_array (32.7694,1.47159) 101 +dictGet dict_array (32.7768,0.0401042) 101 +dictGet dict_array (32.781,-1.34283) 101 +dictGet dict_array (32.7814,1.73876) 101 +dictGet dict_array (32.7856,-1.06363) 101 +dictGet dict_array (32.792699999999996,-1.1255600000000001) 101 +dictGet dict_array (32.7941,-0.645447) 101 +dictGet dict_array (32.7946,1.48889) 101 +dictGet dict_array (32.797,0.791753) 101 +dictGet dict_array (32.7982,-0.537798) 101 +dictGet dict_array (32.8091,2.3611) 101 +dictGet dict_array (32.81,1.7130800000000002) 101 +dictGet dict_array (32.8174,-0.288322) 101 +dictGet dict_array (32.823,1.6546699999999999) 101 +dictGet dict_array (32.8233,1.62108) 101 +dictGet dict_array (32.8428,-0.400045) 101 +dictGet dict_array (32.8479,2.13598) 101 +dictGet dict_array (32.8524,0.199902) 101 +dictGet dict_array (32.8543,3.23553) 101 +dictGet dict_array (32.8562,1.31371) 101 +dictGet dict_array (32.87,1.44256) 101 +dictGet dict_array (32.8789,2.38192) 101 +dictGet dict_array (32.8812,2.20734) 5999168 +dictGet dict_array (32.8815,-0.54427) 101 +dictGet dict_array (32.8853,2.4859) 5999168 +dictGet dict_array (32.8909,0.513964) 101 +dictGet dict_array (32.9035,2.38999) 101 +dictGet dict_array (32.9097,2.48131) 5999168 +dictGet dict_array (32.928,-0.943269) 101 +dictGet dict_array (32.9322,1.13165) 101 +dictGet dict_array (32.9348,1.22606) 101 +dictGet dict_array (32.9417,3.77998) 101 +dictGet dict_array (32.9428,3.11936) 101 +dictGet dict_array (32.9482,1.18092) 101 +dictGet dict_array (32.9506,0.0609364) 101 +dictGet dict_array (32.953,-0.828308) 101 +dictGet dict_array (32.9593,3.5209099999999998) 101 +dictGet dict_array (32.9617,2.07711) 5999168 +dictGet dict_array (32.966,0.693749) 101 +dictGet dict_array (32.9668,-0.716432) 101 +dictGet dict_array (32.9702,1.98555) 101 +dictGet dict_array (32.9782,1.73819) 101 +dictGet dict_array (32.9805,3.71151) 101 +dictGet dict_array (32.9821,2.97225) 101 +dictGet dict_array (32.995,-0.830301) 101 +dictGet dict_array (33.0234,0.770848) 101 +dictGet dict_array (33.0312,-0.340964) 101 +dictGet dict_array (33.0366,-0.756795) 101 +dictGet dict_array (33.0438,0.812871) 101 +dictGet dict_array (33.0455,1.84843) 101 +dictGet dict_array (33.0498,0.0913292) 101 +dictGet dict_array (33.0506,1.53739) 101 +dictGet dict_array (33.0554,2.4265) 101 +dictGet dict_array (33.0741,3.61332) 101 +dictGet dict_array (33.0765,-0.179985) 101 +dictGet dict_array (33.087,1.46465) 101 +dictGet dict_array (33.0906,-0.620383) 101 +dictGet dict_array (33.1047,-1.28027) 101 +dictGet dict_array (33.1072,1.96303) 101 +dictGet dict_array (33.1081,-0.897874) 101 +dictGet dict_array (33.1122,1.8950200000000001) 101 +dictGet dict_array (33.1237,2.63993) 101 +dictGet dict_array (33.1238,0.753963) 101 +dictGet dict_array (33.1257,0.495668) 101 +dictGet dict_array (33.1258,1.78341) 101 +dictGet dict_array (33.127,2.59646) 101 +dictGet dict_array (33.1324,-1.23742) 101 +dictGet dict_array (33.1359,3.83491) 101 +dictGet dict_array (33.1628,-0.379588) 101 +dictGet dict_array (33.1679,1.25601) 101 +dictGet dict_array (33.1688,-1.35553) 101 +dictGet dict_array (33.181,2.10943) 101 +dictGet dict_array (33.1871,2.81171) 101 +dictGet dict_array (33.1877,0.771297) 101 +dictGet dict_array (33.1883,-0.204797) 101 +dictGet dict_array (33.1886,3.27998) 101 +dictGet dict_array (33.1955,0.708907) 101 +dictGet dict_array (33.2044,-0.769275) 101 +dictGet dict_array (33.2182,3.36103) 101 +dictGet dict_array (33.2192,3.43586) 101 +dictGet dict_array (33.2322,-0.916753) 101 +dictGet dict_array (33.2359,-0.81321) 101 +dictGet dict_array (33.238,0.635072) 101 +dictGet dict_array (33.2398,3.02588) 101 +dictGet dict_array (33.2469,2.35698) 101 +dictGet dict_array (33.247,2.3327) 101 +dictGet dict_array (33.2579,2.8027100000000003) 101 +dictGet dict_array (33.2607,0.321082) 101 +dictGet dict_array (33.2653,0.243336) 101 +dictGet dict_array (33.2758,0.831836) 101 +dictGet dict_array (33.2771,0.886536) 101 +dictGet dict_array (33.2914,1.16026) 101 +dictGet dict_array (33.2914,1.38882) 101 +dictGet dict_array (33.2982,-1.16604) 101 +dictGet dict_array (33.2985,0.842556) 101 +dictGet dict_array (33.3005,2.8338900000000002) 101 +dictGet dict_array (33.305,0.0969475) 101 +dictGet dict_array (33.3072,3.82163) 101 +dictGet dict_array (33.312,3.41475) 101 +dictGet dict_array (33.3129,2.46048) 101 +dictGet dict_array (33.3134,3.46863) 101 +dictGet dict_array (33.3203,2.33139) 101 +dictGet dict_array (33.324,0.433701) 101 +dictGet dict_array (33.3338,2.44705) 101 +dictGet dict_array (33.337,4.06475) 101 +dictGet dict_array (33.3469,1.08172) 101 +dictGet dict_array (33.3538,0.717896) 101 +dictGet dict_array (33.3618,1.37899) 101 +dictGet dict_array (33.3698,0.547744) 101 +dictGet dict_array (33.3705,0.957619) 101 +dictGet dict_array (33.3821,3.07258) 101 +dictGet dict_array (33.3881,3.0626) 101 +dictGet dict_array (33.393,-0.816186) 101 +dictGet dict_array (33.3945,0.869508) 101 +dictGet dict_array (33.4001,1.24186) 101 +dictGet dict_array (33.4008,2.34911) 101 +dictGet dict_array (33.4166,-1.2808899999999999) 101 +dictGet dict_array (33.4167,3.0655) 101 +dictGet dict_array (33.4204,2.81887) 101 +dictGet dict_array (33.4211,1.71128) 101 +dictGet dict_array (33.4237,2.91761) 101 +dictGet dict_array (33.4266,1.5955599999999999) 101 +dictGet dict_array (33.4353,-0.391392) 101 +dictGet dict_array (33.4362,-0.134658) 101 +dictGet dict_array (33.4386,0.15396) 101 +dictGet dict_array (33.4421,-0.50712) 101 +dictGet dict_array (33.452,0.915829) 101 +dictGet dict_array (33.463,-0.0882717) 101 +dictGet dict_array (33.464,-1.00949) 101 +dictGet dict_array (33.4692,0.954092) 101 +dictGet dict_array (33.4716,1.9538799999999998) 101 +dictGet dict_array (33.4756,1.85836) 101 +dictGet dict_array (33.4859,4.0751) 101 +dictGet dict_array (33.4899,3.54193) 101 +dictGet dict_array (33.4935,3.49794) 101 +dictGet dict_array (33.494,-0.983356) 101 +dictGet dict_array (33.4955,-1.28128) 101 +dictGet dict_array (33.4965,-0.278687) 101 +dictGet dict_array (33.4991,0.647491) 101 +dictGet dict_array (33.5076,2.2272) 101 +dictGet dict_array (33.5079,-0.498199) 101 +dictGet dict_array (33.5157,0.535034) 101 +dictGet dict_array (33.5171,2.49677) 101 +dictGet dict_array (33.5255,2.4447200000000002) 101 +dictGet dict_array (33.526,4.01194) 101 +dictGet dict_array (33.5288,0.789434) 101 +dictGet dict_array (33.5356,-1.17671) 101 +dictGet dict_array (33.5402,1.49152) 101 +dictGet dict_array (33.5418,3.45757) 101 +dictGet dict_array (33.5428,1.90712) 101 +dictGet dict_array (33.5556,-0.55741) 101 +dictGet dict_array (33.5564,0.876858) 101 +dictGet dict_array (33.5567,-0.10208) 101 +dictGet dict_array (33.5645,-0.124824) 101 +dictGet dict_array (33.5663,3.4872) 101 +dictGet dict_array (33.5716,-0.0107611) 101 +dictGet dict_array (33.578,3.55714) 101 +dictGet dict_array (33.5826,-0.49076) 101 +dictGet dict_array (33.5909,0.773737) 101 +dictGet dict_array (33.5958,2.9619999999999997) 5994231 +dictGet dict_array (33.6193,-0.919755) 101 +dictGet dict_array (33.6313,0.652132) 101 +dictGet dict_array (33.632,0.823351) 101 +dictGet dict_array (33.66,2.18998) 101 +dictGet dict_array (33.6621,0.535395) 101 +dictGet dict_array (33.6726,3.19367) 101 +dictGet dict_array (33.6912,1.74522) 101 +dictGet dict_array (33.705,0.706397) 101 +dictGet dict_array (33.7076,0.7622) 101 +dictGet dict_array (33.7112,1.70187) 101 +dictGet dict_array (33.7246,-1.14837) 101 +dictGet dict_array (33.7326,2.62413) 5994231 +dictGet dict_array (33.7332,2.82137) 5994231 +dictGet dict_array (33.7434,0.394672) 101 +dictGet dict_array (33.7443,1.54557) 101 +dictGet dict_array (33.7506,1.57317) 101 +dictGet dict_array (33.7526,1.8578999999999999) 101 +dictGet dict_array (33.766,4.15013) 101 +dictGet dict_array (33.7834,2.41789) 101 +dictGet dict_array (33.7864,0.230935) 101 +dictGet dict_array (33.7965,3.05709) 101 +dictGet dict_array (33.7998,3.32881) 101 +dictGet dict_array (33.8003,2.97338) 5994231 +dictGet dict_array (33.8007,-1.08962) 101 +dictGet dict_array (33.8022,-0.139488) 101 +dictGet dict_array (33.8065,2.70857) 5994231 +dictGet dict_array (33.8169,-0.607788) 101 +dictGet dict_array (33.8203,0.108512) 101 +dictGet dict_array (33.8231,-1.03449) 101 +dictGet dict_array (33.8312,3.49458) 101 +dictGet dict_array (33.8342,0.297518) 101 +dictGet dict_array (33.8352,0.165872) 101 +dictGet dict_array (33.8354,1.87277) 101 +dictGet dict_array (33.8371,1.60103) 101 +dictGet dict_array (33.8387,1.9968) 101 +dictGet dict_array (33.8403,3.5805) 101 +dictGet dict_array (33.8414,-0.703067) 101 +dictGet dict_array (33.844,-0.179472) 101 +dictGet dict_array (33.8468,3.40137) 101 +dictGet dict_array (33.8509,4.15334) 101 +dictGet dict_array (33.8539,2.38339) 101 +dictGet dict_array (33.858,-1.3122500000000001) 101 +dictGet dict_array (33.859,3.72626) 101 +dictGet dict_array (33.8616,2.24433) 101 +dictGet dict_array (33.8621,3.01035) 101 +dictGet dict_array (33.8623,1.17559) 101 +dictGet dict_array (33.8682,2.706) 5994231 +dictGet dict_array (33.8684,0.189231) 101 +dictGet dict_array (33.872,1.93574) 101 +dictGet dict_array (33.8844,3.80404) 101 +dictGet dict_array (33.8888,0.594884) 101 +dictGet dict_array (33.8946,2.74161) 101 +dictGet dict_array (33.9023,0.6239) 101 +dictGet dict_array (33.9057,0.873222) 101 +dictGet dict_array (33.9157,-1.26607) 101 +dictGet dict_array (33.92,2.06848) 101 +dictGet dict_array (33.9298,-0.00526229) 101 +dictGet dict_array (33.932,3.07063) 101 +dictGet dict_array (33.9322,0.629385) 101 +dictGet dict_array (33.9367,-1.41955) 101 +dictGet dict_array (33.937,1.42532) 101 +dictGet dict_array (33.9375,1.1467100000000001) 101 +dictGet dict_array (33.9434,-1.05739) 101 +dictGet dict_array (33.9477,3.34809) 101 +dictGet dict_array (33.95,2.21715) 101 +dictGet dict_array (33.955799999999996,0.305176) 101 +dictGet dict_array (33.9686,-0.28273) 101 +dictGet dict_array (33.9703,4.1255) 101 +dictGet dict_array (33.9707,3.08199) 101 +dictGet dict_array (33.9754,1.06203) 101 +dictGet dict_array (33.9757,3.72468) 101 +dictGet dict_array (33.9775,-0.0440599) 101 +dictGet dict_array (33.9777,-0.251484) 101 +dictGet dict_array (33.9789,-0.339374) 101 +dictGet dict_array (33.9849,2.54515) 5994231 +dictGet dict_array (33.9885,-0.318557) 101 +dictGet dict_array (33.9977,1.07175) 101 +dictGet dict_array (33.9984,-0.700517) 101 +dictGet dict_array (34.0149,3.53338) 101 +dictGet dict_array (34.0173,3.39155) 101 +dictGet dict_array (34.0317,3.9579) 101 +dictGet dict_array (34.0369,3.83612) 101 +dictGet dict_array (34.043,-0.0887221) 101 +dictGet dict_array (34.0487,1.14252) 101 +dictGet dict_array (34.052,1.74832) 101 +dictGet dict_array (34.0711,-0.898071) 101 +dictGet dict_array (34.0747,1.55057) 101 +dictGet dict_array (34.0803,3.16763) 101 +dictGet dict_array (34.0872,3.75555) 101 +dictGet dict_array (34.0965,1.62038) 101 +dictGet dict_array (34.0977,-0.412691) 101 +dictGet dict_array (34.0986,0.0294206) 101 +dictGet dict_array (34.1072,3.15823) 101 +dictGet dict_array (34.1092,3.09599) 101 +dictGet dict_array (34.1206,1.04637) 5940222 +dictGet dict_array (34.1209,3.13826) 101 +dictGet dict_array (34.1265,3.95881) 101 +dictGet dict_array (34.1286,-0.539319) 101 +dictGet dict_array (34.1358,3.67451) 101 +dictGet dict_array (34.1428,0.136115) 101 +dictGet dict_array (34.157,1.73522) 101 +dictGet dict_array (34.1581,1.48001) 101 +dictGet dict_array (34.1682,3.42373) 101 +dictGet dict_array (34.1683,-1.26511) 101 +dictGet dict_array (34.1684,4.20007) 101 +dictGet dict_array (34.1854,3.32089) 101 +dictGet dict_array (34.2022,0.749536) 101 +dictGet dict_array (34.2044,3.04865) 101 +dictGet dict_array (34.22,-0.500055) 101 +dictGet dict_array (34.2249,0.743775) 101 +dictGet dict_array (34.2254,1.34702) 101 +dictGet dict_array (34.2355,-0.898843) 101 +dictGet dict_array (34.2394,2.0203699999999998) 101 +dictGet dict_array (34.2466,1.83785) 101 +dictGet dict_array (34.247,4.09563) 101 +dictGet dict_array (34.2508,2.61312) 101 +dictGet dict_array (34.2517,1.69642) 101 +dictGet dict_array (34.2564,4.13033) 101 +dictGet dict_array (34.2574,4.18928) 101 +dictGet dict_array (34.2614,-0.478719) 101 +dictGet dict_array (34.2625,2.38088) 101 +dictGet dict_array (34.2666,3.1503) 101 +dictGet dict_array (34.271,4.02223) 101 +dictGet dict_array (34.2727,0.514755) 101 +dictGet dict_array (34.278,1.98929) 101 +dictGet dict_array (34.2798,-0.199208) 101 +dictGet dict_array (34.2804,2.05184) 101 +dictGet dict_array (34.2945,-1.11051) 101 +dictGet dict_array (34.3168,-0.0829721) 101 +dictGet dict_array (34.3345,3.4358) 101 +dictGet dict_array (34.3377,1.13527) 5940222 +dictGet dict_array (34.3383,1.27891) 5940222 +dictGet dict_array (34.3391,1.47945) 5940222 +dictGet dict_array (34.3441,0.627014) 101 +dictGet dict_array (34.347,2.4853) 101 +dictGet dict_array (34.3514,2.16247) 101 +dictGet dict_array (34.3627,2.64533) 101 +dictGet dict_array (34.3682,-0.227501) 101 +dictGet dict_array (34.3756,4.21248) 101 +dictGet dict_array (34.379,3.96604) 101 +dictGet dict_array (34.3827,1.7518) 101 +dictGet dict_array (34.3912,2.8834) 101 +dictGet dict_array (34.3919,0.668829) 101 +dictGet dict_array (34.3949,2.00338) 101 +dictGet dict_array (34.3987,0.557268) 101 +dictGet dict_array (34.4111,0.768558) 101 +dictGet dict_array (34.4119,2.8742) 101 +dictGet dict_array (34.416,3.50841) 101 +dictGet dict_array (34.4212,1.24916) 5940222 +dictGet dict_array (34.4251,0.457029) 101 +dictGet dict_array (34.4274,-0.902559) 101 +dictGet dict_array (34.4325,4.03159) 101 +dictGet dict_array (34.438,1.63994) 101 +dictGet dict_array (34.4403,-0.177594) 101 +dictGet dict_array (34.4421,0.726712) 101 +dictGet dict_array (34.4517,2.98611) 101 +dictGet dict_array (34.4658,-1.312) 101 +dictGet dict_array (34.4732,-0.0681338) 101 +dictGet dict_array (34.4752,2.81646) 101 +dictGet dict_array (34.4914,2.3858) 101 +dictGet dict_array (34.4923,0.855231) 101 +dictGet dict_array (34.5235,1.78468) 101 +dictGet dict_array (34.5305,4.10608) 101 +dictGet dict_array (34.5389,0.621937) 101 +dictGet dict_array (34.5406,3.17145) 101 +dictGet dict_array (34.5434,-0.56306) 101 +dictGet dict_array (34.5449,3.13311) 101 +dictGet dict_array (34.5491,2.31572) 101 +dictGet dict_array (34.5539,2.94028) 101 +dictGet dict_array (34.5546,-0.208825) 101 +dictGet dict_array (34.5549,3.78486) 101 +dictGet dict_array (34.5676,0.307148) 101 +dictGet dict_array (34.5743,1.5217399999999999) 101 +dictGet dict_array (34.5775,3.48046) 101 +dictGet dict_array (34.5815,2.5243700000000002) 101 +dictGet dict_array (34.5841,4.21191) 101 +dictGet dict_array (34.5887,2.65083) 101 +dictGet dict_array (34.5937,3.2143) 101 +dictGet dict_array (34.6013,-1.0612) 101 +dictGet dict_array (34.6089,1.36066) 101 +dictGet dict_array (34.6103,3.40227) 101 +dictGet dict_array (34.6128,1.92276) 101 +dictGet dict_array (34.6175,2.43627) 101 +dictGet dict_array (34.6209,3.43776) 101 +dictGet dict_array (34.6234,2.60237) 101 +dictGet dict_array (34.6275,3.52479) 101 +dictGet dict_array (34.635,0.568558) 101 +dictGet dict_array (34.6373,2.37692) 101 +dictGet dict_array (34.6375,3.52234) 101 +dictGet dict_array (34.6426,2.12397) 101 +dictGet dict_array (34.6513,2.80915) 101 +dictGet dict_array (34.6632,2.30039) 101 +dictGet dict_array (34.6691,1.86582) 101 +dictGet dict_array (34.6739,0.15342) 101 +dictGet dict_array (34.6825,0.0499679) 101 +dictGet dict_array (34.6893,0.454326) 101 +dictGet dict_array (34.6957,-0.358598) 101 +dictGet dict_array (34.6986,0.562679) 101 +dictGet dict_array (34.712,1.12114) 101 +dictGet dict_array (34.7126,-0.0057301) 101 +dictGet dict_array (34.7137,0.0248501) 101 +dictGet dict_array (34.7162,1.15623) 101 +dictGet dict_array (34.7258,3.95142) 101 +dictGet dict_array (34.7347,3.5232099999999997) 101 +dictGet dict_array (34.7363,2.23374) 101 +dictGet dict_array (34.7375,0.397841) 101 +dictGet dict_array (34.7423,3.09198) 101 +dictGet dict_array (34.7452,3.09029) 101 +dictGet dict_array (34.7539,-1.06943) 101 +dictGet dict_array (34.7733,-0.00912717) 101 +dictGet dict_array (34.774,2.71088) 101 +dictGet dict_array (34.7771,1.46009) 101 +dictGet dict_array (34.7782,-1.28308) 101 +dictGet dict_array (34.7924,3.63564) 101 +dictGet dict_array (34.7939,-0.416676) 101 +dictGet dict_array (34.7964,-0.401773) 101 +dictGet dict_array (34.7974,0.0286873) 101 +dictGet dict_array (34.7975,3.05965) 101 +dictGet dict_array (34.8037,3.07263) 101 +dictGet dict_array (34.8254,-0.390284) 101 +dictGet dict_array (34.828,1.91869) 101 +dictGet dict_array (34.8289,3.71058) 101 +dictGet dict_array (34.8403,2.14606) 101 +dictGet dict_array (34.8437,2.20617) 101 +dictGet dict_array (34.8469,2.38435) 101 +dictGet dict_array (34.86,1.45705) 101 +dictGet dict_array (34.8612,0.914248) 101 +dictGet dict_array (34.8663,3.4215400000000002) 101 +dictGet dict_array (34.8724,-0.375144) 101 +dictGet dict_array (34.8795,3.29317) 101 +dictGet dict_array (34.8823,1.21988) 101 +dictGet dict_array (34.8834,1.07657) 101 +dictGet dict_array (34.8837,0.157648) 101 +dictGet dict_array (34.8871,-0.9755) 101 +dictGet dict_array (34.8871,1.8943699999999999) 101 +dictGet dict_array (34.889,3.36756) 101 +dictGet dict_array (34.8907,1.24874) 101 +dictGet dict_array (34.8965,3.13508) 101 +dictGet dict_array (34.9042,2.62092) 101 +dictGet dict_array (34.9055,-0.0448967) 101 +dictGet dict_array (34.9122,0.110576) 101 +dictGet dict_array (34.9228,3.60183) 101 +dictGet dict_array (34.9237,1.21715) 101 +dictGet dict_array (34.9296,1.70459) 101 +dictGet dict_array (34.941,-1.14663) 101 +dictGet dict_array (34.9448,1.18923) 101 +dictGet dict_array (34.9462,3.81678) 101 +dictGet dict_array (34.9466,0.593463) 101 +dictGet dict_array (34.9485,0.150307) 101 +dictGet dict_array (34.9542,0.487238) 101 +dictGet dict_array (34.9559,2.03473) 101 +dictGet dict_array (34.9671,-0.960225) 101 +dictGet dict_array (34.9711,2.63444) 101 +dictGet dict_array (34.9892,0.354775) 101 +dictGet dict_array (34.9907,1.40724) 101 +dictGet dict_array (34.9916,-0.00173097) 101 +dictGet dict_array (34.9919,2.06167) 101 diff --git a/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh b/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh index c2a35a3ef63..9a26f78a8ee 100755 --- a/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh +++ b/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh @@ -1,31 +1,32 @@ #!/usr/bin/env bash -# Tags: no-debug, no-parallel +# Tags: no-debug CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -TMP_DIR="/tmp" +TMP_DIR=${CLICKHOUSE_TMP}/tmp +DATA_DIR=${CLICKHOUSE_TMP}/data +mkdir -p $TMP_DIR +mkdir -p $DATA_DIR declare -a SearchTypes=("POLYGON" "POLYGON_SIMPLE" "POLYGON_INDEX_EACH" "POLYGON_INDEX_CELL") -tar -xf "${CURDIR}"/01037_test_data_search.tar.gz -C "${CURDIR}" +tar -xf "${CURDIR}"/01037_test_data_search.tar.gz -C "${DATA_DIR}" $CLICKHOUSE_CLIENT -n --query=" -DROP DATABASE IF EXISTS test_01037; -CREATE DATABASE test_01037; -DROP TABLE IF EXISTS test_01037.points; -CREATE TABLE test_01037.points (x Float64, y Float64) ENGINE = Memory; +DROP TABLE IF EXISTS points; +CREATE TABLE points (x Float64, y Float64) ENGINE = Memory; " -$CLICKHOUSE_CLIENT --query="INSERT INTO test_01037.points FORMAT TSV" --max_insert_block_size=100000 < "${CURDIR}/01037_point_data" +$CLICKHOUSE_CLIENT --query="INSERT INTO points FORMAT TSV" --max_insert_block_size=100000 < "${DATA_DIR}/01037_point_data" -rm "${CURDIR}"/01037_point_data +rm "${DATA_DIR}"/01037_point_data $CLICKHOUSE_CLIENT -n --query=" -DROP TABLE IF EXISTS test_01037.polygons_array; +DROP TABLE IF EXISTS polygons_array; -CREATE TABLE test_01037.polygons_array +CREATE TABLE polygons_array ( key Array(Array(Array(Array(Float64)))), name String, @@ -34,36 +35,31 @@ CREATE TABLE test_01037.polygons_array ENGINE = Memory; " -$CLICKHOUSE_CLIENT --query="INSERT INTO test_01037.polygons_array FORMAT JSONEachRow" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${CURDIR}/01037_polygon_data" +$CLICKHOUSE_CLIENT --query="INSERT INTO polygons_array FORMAT JSONEachRow" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${DATA_DIR}/01037_polygon_data" -rm "${CURDIR}"/01037_polygon_data +rm "${DATA_DIR}"/01037_polygon_data for type in "${SearchTypes[@]}"; do outputFile="${TMP_DIR}/results${type}.out" $CLICKHOUSE_CLIENT -n --query=" - DROP DICTIONARY IF EXISTS test_01037.dict_array; + DROP DICTIONARY IF EXISTS dict_array; - CREATE DICTIONARY test_01037.dict_array + CREATE DICTIONARY dict_array ( key Array(Array(Array(Array(Float64)))), name String DEFAULT 'qqq', value UInt64 DEFAULT 101 ) PRIMARY KEY key - SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'polygons_array' PASSWORD '' DB 'test_01037')) + SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'polygons_array' PASSWORD '' DB currentDatabase())) LIFETIME(0) LAYOUT($type()); - select 'dictGet', 'test_01037.dict_array' as dict_name, tuple(x, y) as key, - dictGet(dict_name, 'value', key) from test_01037.points order by x, y; + select 'dictGet', 'dict_array' as dict_name, tuple(x, y) as key, + dictGet(dict_name, 'value', key) from points order by x, y; " > "$outputFile" diff -q "${CURDIR}/01037_polygon_dicts_correctness_all.ans" "$outputFile" done - -$CLICKHOUSE_CLIENT -n --query=" -DROP TABLE test_01037.points; -DROP DATABASE test_01037; -" diff --git a/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.ans b/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.ans index 45fa7637421..297c8416096 100644 --- a/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.ans +++ b/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.ans @@ -1,1000 +1,1000 @@ -dictGet test_01037.dict_array (29.5699,2.50068) 101 -dictGet test_01037.dict_array (29.5796,1.55456) 101 -dictGet test_01037.dict_array (29.5796,2.36864) 101 -dictGet test_01037.dict_array (29.5844,1.59626) 101 -dictGet test_01037.dict_array (29.5886,4.03321) 101 -dictGet test_01037.dict_array (29.5914,3.02628) 101 -dictGet test_01037.dict_array (29.5926,-0.0965169) 101 -dictGet test_01037.dict_array (29.5968,2.37773) 101 -dictGet test_01037.dict_array (29.5984,0.755853) 101 -dictGet test_01037.dict_array (29.6066,3.47173) 101 -dictGet test_01037.dict_array (29.6085,-1.26007) 6489978 -dictGet test_01037.dict_array (29.6131,0.246565) 101 -dictGet test_01037.dict_array (29.6157,-0.266687) 101 -dictGet test_01037.dict_array (29.6164,2.94674) 101 -dictGet test_01037.dict_array (29.6195,-0.591941) 101 -dictGet test_01037.dict_array (29.6231,1.54818) 101 -dictGet test_01037.dict_array (29.6379,0.764114) 101 -dictGet test_01037.dict_array (29.6462,-0.772059) 934530 -dictGet test_01037.dict_array (29.6579,-1.07336) 6489978 -dictGet test_01037.dict_array (29.6618,-0.271842) 101 -dictGet test_01037.dict_array (29.6629,-0.303602) 101 -dictGet test_01037.dict_array (29.6659,-0.782823) 934530 -dictGet test_01037.dict_array (29.6736,-0.113832) 101 -dictGet test_01037.dict_array (29.6759,3.02905) 101 -dictGet test_01037.dict_array (29.6778,3.71898) 101 -dictGet test_01037.dict_array (29.6796,1.10433) 101 -dictGet test_01037.dict_array (29.6809,2.13677) 101 -dictGet test_01037.dict_array (29.6935,4.11894) 101 -dictGet test_01037.dict_array (29.6991,-1.4458199999999999) 101 -dictGet test_01037.dict_array (29.6997,3.17297) 101 -dictGet test_01037.dict_array (29.7043,3.6145899999999997) 101 -dictGet test_01037.dict_array (29.7065,3.24885) 101 -dictGet test_01037.dict_array (29.7126,0.28108) 101 -dictGet test_01037.dict_array (29.7192,0.174273) 101 -dictGet test_01037.dict_array (29.7217,-0.523481) 3501900 -dictGet test_01037.dict_array (29.7271,1.67967) 101 -dictGet test_01037.dict_array (29.7311,4.12444) 101 -dictGet test_01037.dict_array (29.7347,1.88378) 101 -dictGet test_01037.dict_array (29.7358,0.67944) 101 -dictGet test_01037.dict_array (29.7366,-0.2973) 101 -dictGet test_01037.dict_array (29.7446,0.646536) 101 -dictGet test_01037.dict_array (29.7453,-0.567963) 3501900 -dictGet test_01037.dict_array (29.764,4.04217) 101 -dictGet test_01037.dict_array (29.7655,1.51372) 101 -dictGet test_01037.dict_array (29.7744,1.12435) 101 -dictGet test_01037.dict_array (29.7774,-0.0681196) 3501895 -dictGet test_01037.dict_array (29.7784,1.54864) 101 -dictGet test_01037.dict_array (29.7785,2.24139) 101 -dictGet test_01037.dict_array (29.7922,0.220808) 101 -dictGet test_01037.dict_array (29.7936,2.37709) 101 -dictGet test_01037.dict_array (29.8008,0.948536) 101 -dictGet test_01037.dict_array (29.8115,0.201227) 101 -dictGet test_01037.dict_array (29.814,0.149601) 3501895 -dictGet test_01037.dict_array (29.8193,-1.35858) 101 -dictGet test_01037.dict_array (29.8201,0.965518) 101 -dictGet test_01037.dict_array (29.8265,-0.727286) 3501900 -dictGet test_01037.dict_array (29.8277,-0.531746) 3501900 -dictGet test_01037.dict_array (29.8289,3.63009) 101 -dictGet test_01037.dict_array (29.8548,0.838047) 101 -dictGet test_01037.dict_array (29.8641,-0.845265) 3501900 -dictGet test_01037.dict_array (29.8649,0.0562212) 3501895 -dictGet test_01037.dict_array (29.8701,-1.02045) 934530 -dictGet test_01037.dict_array (29.8733,2.76654) 101 -dictGet test_01037.dict_array (29.876,0.555475) 101 -dictGet test_01037.dict_array (29.8794,-0.800108) 3501900 -dictGet test_01037.dict_array (29.8813,2.7426399999999997) 101 -dictGet test_01037.dict_array (29.897100000000002,2.66193) 101 -dictGet test_01037.dict_array (29.908,4.01339) 101 -dictGet test_01037.dict_array (29.9165,-1.08246) 3501894 -dictGet test_01037.dict_array (29.9201,-0.420861) 3498054 -dictGet test_01037.dict_array (29.9217,3.03778) 101 -dictGet test_01037.dict_array (29.9355,0.773833) 101 -dictGet test_01037.dict_array (29.947,3.76517) 101 -dictGet test_01037.dict_array (29.9518,-0.60557) 3498056 -dictGet test_01037.dict_array (29.9564,-0.600163) 3498056 -dictGet test_01037.dict_array (29.959600000000002,4.16591) 101 -dictGet test_01037.dict_array (29.9615,-1.33708) 3501894 -dictGet test_01037.dict_array (29.9699,-0.392375) 3498054 -dictGet test_01037.dict_array (29.9776,1.04552) 101 -dictGet test_01037.dict_array (29.9784,4.02756) 101 -dictGet test_01037.dict_array (29.9819,4.00597) 101 -dictGet test_01037.dict_array (29.9826,1.2816100000000001) 101 -dictGet test_01037.dict_array (30.0026,2.76257) 101 -dictGet test_01037.dict_array (30.0126,3.68255) 101 -dictGet test_01037.dict_array (30.0131,0.796576) 3501892 -dictGet test_01037.dict_array (30.018,1.16523) 101 -dictGet test_01037.dict_array (30.0261,-0.210653) 3501896 -dictGet test_01037.dict_array (30.0472,-1.11007) 3501894 -dictGet test_01037.dict_array (30.0542,-0.479585) 3498054 -dictGet test_01037.dict_array (30.0613,1.6278000000000001) 101 -dictGet test_01037.dict_array (30.0617,-0.0551152) 3501895 -dictGet test_01037.dict_array (30.0637,2.62066) 101 -dictGet test_01037.dict_array (30.0721,1.6424400000000001) 101 -dictGet test_01037.dict_array (30.0769,-0.402636) 3498054 -dictGet test_01037.dict_array (30.0791,-0.277435) 3501896 -dictGet test_01037.dict_array (30.0931,0.0327512) 3501895 -dictGet test_01037.dict_array (30.1059,3.52623) 101 -dictGet test_01037.dict_array (30.1103,0.865466) 3501892 -dictGet test_01037.dict_array (30.1115,2.95243) 101 -dictGet test_01037.dict_array (30.1144,1.71029) 101 -dictGet test_01037.dict_array (30.1311,-0.864751) 3501899 -dictGet test_01037.dict_array (30.1336,-0.851386) 3501899 -dictGet test_01037.dict_array (30.1393,3.89901) 101 -dictGet test_01037.dict_array (30.1456,-0.531898) 3498054 -dictGet test_01037.dict_array (30.1492,2.07833) 101 -dictGet test_01037.dict_array (30.1575,2.43856) 101 -dictGet test_01037.dict_array (30.1682,1.19771) 101 -dictGet test_01037.dict_array (30.1716,3.9853300000000003) 101 -dictGet test_01037.dict_array (30.1849,2.78374) 101 -dictGet test_01037.dict_array (30.1866,0.65658) 3498021 -dictGet test_01037.dict_array (30.1885,1.56943) 101 -dictGet test_01037.dict_array (30.1959,-1.38202) 101 -dictGet test_01037.dict_array (30.1999,1.58413) 101 -dictGet test_01037.dict_array (30.2024,0.713081) 3498021 -dictGet test_01037.dict_array (30.2054,0.620143) 3498021 -dictGet test_01037.dict_array (30.2091,1.51641) 101 -dictGet test_01037.dict_array (30.2124,-0.331782) 3498031 -dictGet test_01037.dict_array (30.226,3.03527) 101 -dictGet test_01037.dict_array (30.2261,3.18486) 101 -dictGet test_01037.dict_array (30.2288,2.48407) 101 -dictGet test_01037.dict_array (30.2345,3.7462400000000002) 101 -dictGet test_01037.dict_array (30.2375,0.62046) 3498021 -dictGet test_01037.dict_array (30.2425,-0.472914) 3498054 -dictGet test_01037.dict_array (30.247,3.95863) 101 -dictGet test_01037.dict_array (30.2494,-0.305093) 3498031 -dictGet test_01037.dict_array (30.2499,2.54337) 101 -dictGet test_01037.dict_array (30.2606,2.16644) 101 -dictGet test_01037.dict_array (30.2672,3.94847) 101 -dictGet test_01037.dict_array (30.2709,-0.136264) 6088794 -dictGet test_01037.dict_array (30.2764,1.18654) 101 -dictGet test_01037.dict_array (30.2765,1.20383) 101 -dictGet test_01037.dict_array (30.2839,1.05762) 3498024 -dictGet test_01037.dict_array (30.286,0.469327) 3498021 -dictGet test_01037.dict_array (30.2927,3.1693) 101 -dictGet test_01037.dict_array (30.2935,3.49854) 101 -dictGet test_01037.dict_array (30.307,0.312338) 3498021 -dictGet test_01037.dict_array (30.3085,1.07791) 3498024 -dictGet test_01037.dict_array (30.3139,2.77248) 101 -dictGet test_01037.dict_array (30.314,0.822823) 3498024 -dictGet test_01037.dict_array (30.3227,-0.587351) 3498055 -dictGet test_01037.dict_array (30.332,1.00174) 3498024 -dictGet test_01037.dict_array (30.3388,0.844148) 3498024 -dictGet test_01037.dict_array (30.3485,0.561902) 3498021 -dictGet test_01037.dict_array (30.3497,0.180362) 6489998 -dictGet test_01037.dict_array (30.361,4.13016) 101 -dictGet test_01037.dict_array (30.3623,-0.0484027) 6489998 -dictGet test_01037.dict_array (30.3638,3.9845800000000002) 101 -dictGet test_01037.dict_array (30.3853,3.16051) 101 -dictGet test_01037.dict_array (30.3974,2.6617800000000003) 101 -dictGet test_01037.dict_array (30.4002,-1.15886) 101 -dictGet test_01037.dict_array (30.4008,-0.387015) 3498031 -dictGet test_01037.dict_array (30.4018,1.86493) 101 -dictGet test_01037.dict_array (30.4239,1.16818) 3498024 -dictGet test_01037.dict_array (30.4363,3.63938) 101 -dictGet test_01037.dict_array (30.4377,-0.81315) 3498063 -dictGet test_01037.dict_array (30.4391,3.54703) 101 -dictGet test_01037.dict_array (30.4424,-1.39435) 101 -dictGet test_01037.dict_array (30.4441,2.8463000000000003) 101 -dictGet test_01037.dict_array (30.4517,3.28117) 101 -dictGet test_01037.dict_array (30.4658,2.6928) 101 -dictGet test_01037.dict_array (30.4734,2.66161) 101 -dictGet test_01037.dict_array (30.4799,-1.07578) 101 -dictGet test_01037.dict_array (30.4837,-1.02486) 3501899 -dictGet test_01037.dict_array (30.485,1.06326) 3498024 -dictGet test_01037.dict_array (30.495,1.12306) 101 -dictGet test_01037.dict_array (30.501,2.27264) 101 -dictGet test_01037.dict_array (30.5027,1.99382) 101 -dictGet test_01037.dict_array (30.5194,-1.03943) 3501893 -dictGet test_01037.dict_array (30.5239,1.04328) 101 -dictGet test_01037.dict_array (30.528,3.82041) 101 -dictGet test_01037.dict_array (30.5299,-0.715248) 3498063 -dictGet test_01037.dict_array (30.5331,1.19603) 101 -dictGet test_01037.dict_array (30.535800000000002,2.71485) 101 -dictGet test_01037.dict_array (30.5405,0.804694) 3498023 -dictGet test_01037.dict_array (30.542,1.23739) 101 -dictGet test_01037.dict_array (30.5432,4.04189) 101 -dictGet test_01037.dict_array (30.5457,-0.956121) 3501893 -dictGet test_01037.dict_array (30.5506,3.07443) 101 -dictGet test_01037.dict_array (30.5539,3.87084) 101 -dictGet test_01037.dict_array (30.5578,3.78837) 101 -dictGet test_01037.dict_array (30.5588,0.966135) 3498022 -dictGet test_01037.dict_array (30.5637,2.5605) 101 -dictGet test_01037.dict_array (30.5647,-1.27328) 101 -dictGet test_01037.dict_array (30.5656,-0.0581332) 6088794 -dictGet test_01037.dict_array (30.5715,0.65755) 3498023 -dictGet test_01037.dict_array (30.5727,3.01604) 101 -dictGet test_01037.dict_array (30.5729,-0.976857) 3501893 -dictGet test_01037.dict_array (30.5751,0.60204) 3498023 -dictGet test_01037.dict_array (30.5854,3.02473) 101 -dictGet test_01037.dict_array (30.5866,0.174099) 6489998 -dictGet test_01037.dict_array (30.5947,0.875193) 3498023 -dictGet test_01037.dict_array (30.5992,-0.403901) 3498063 -dictGet test_01037.dict_array (30.6002,4.18891) 101 -dictGet test_01037.dict_array (30.6025,0.217712) 6489998 -dictGet test_01037.dict_array (30.6054,0.927203) 3498022 -dictGet test_01037.dict_array (30.6075,3.79359) 101 -dictGet test_01037.dict_array (30.6159,3.82773) 101 -dictGet test_01037.dict_array (30.627,3.84039) 101 -dictGet test_01037.dict_array (30.6308,0.77517) 3498023 -dictGet test_01037.dict_array (30.6338,0.179565) 6489998 -dictGet test_01037.dict_array (30.6461,1.3293599999999999) 101 -dictGet test_01037.dict_array (30.6674,-0.424547) 3498063 -dictGet test_01037.dict_array (30.669,1.76539) 101 -dictGet test_01037.dict_array (30.6788,4.01239) 101 -dictGet test_01037.dict_array (30.6864,3.59158) 101 -dictGet test_01037.dict_array (30.7049,-0.875413) 3501893 -dictGet test_01037.dict_array (30.705,1.3307) 101 -dictGet test_01037.dict_array (30.7063,-0.473192) 3498063 -dictGet test_01037.dict_array (30.7075,-1.1958199999999999) 101 -dictGet test_01037.dict_array (30.7101,-0.367562) 3498012 -dictGet test_01037.dict_array (30.7203,2.98725) 101 -dictGet test_01037.dict_array (30.7213,2.2745699999999998) 101 -dictGet test_01037.dict_array (30.7446,-0.334144) 3498012 -dictGet test_01037.dict_array (30.7468,3.82967) 101 -dictGet test_01037.dict_array (30.747,-0.384779) 3498012 -dictGet test_01037.dict_array (30.7681,0.904198) 3498022 -dictGet test_01037.dict_array (30.7757,1.78743) 101 -dictGet test_01037.dict_array (30.8021,-0.479212) 3498012 -dictGet test_01037.dict_array (30.8079,-1.40869) 101 -dictGet test_01037.dict_array (30.8206,-0.0608489) 3498012 -dictGet test_01037.dict_array (30.8218,0.43909) 3498023 -dictGet test_01037.dict_array (30.8239,0.10014) 3498012 -dictGet test_01037.dict_array (30.8282,4.15409) 101 -dictGet test_01037.dict_array (30.8288,-0.709528) 3501893 -dictGet test_01037.dict_array (30.8326,0.156011) 3498012 -dictGet test_01037.dict_array (30.8328,-1.03704) 101 -dictGet test_01037.dict_array (30.839,2.15528) 101 -dictGet test_01037.dict_array (30.8452,0.219377) 3498013 -dictGet test_01037.dict_array (30.8463,0.0515355) 3498012 -dictGet test_01037.dict_array (30.8526,2.06614) 101 -dictGet test_01037.dict_array (30.8566,0.517876) 3498023 -dictGet test_01037.dict_array (30.8588,-1.31738) 101 -dictGet test_01037.dict_array (30.8681,0.44207) 3498013 -dictGet test_01037.dict_array (30.8914,1.0072) 3498022 -dictGet test_01037.dict_array (30.897,0.483425) 3498013 -dictGet test_01037.dict_array (30.905,2.8731999999999998) 3501793 -dictGet test_01037.dict_array (30.9051,2.21956) 101 -dictGet test_01037.dict_array (30.9115,4.00663) 101 -dictGet test_01037.dict_array (30.9167,-0.834462) 3501893 -dictGet test_01037.dict_array (30.9252,-1.3289900000000001) 101 -dictGet test_01037.dict_array (30.9314,1.85384) 101 -dictGet test_01037.dict_array (30.9392,2.53236) 3501827 -dictGet test_01037.dict_array (30.9569,2.82038) 3501793 -dictGet test_01037.dict_array (30.9598,-0.641011) 3498012 -dictGet test_01037.dict_array (30.9601,-0.254928) 3498012 -dictGet test_01037.dict_array (30.9623,-1.3886) 101 -dictGet test_01037.dict_array (30.9707,0.888854) 3498022 -dictGet test_01037.dict_array (30.9766,2.81957) 3501793 -dictGet test_01037.dict_array (30.9775,2.69273) 3501793 -dictGet test_01037.dict_array (30.9821,0.587715) 3498013 -dictGet test_01037.dict_array (30.9887,4.0233) 101 -dictGet test_01037.dict_array (30.9914,0.259542) 3498013 -dictGet test_01037.dict_array (30.9986,-1.36832) 101 -dictGet test_01037.dict_array (31.008,0.628999) 3498013 -dictGet test_01037.dict_array (31.0168,-1.17462) 101 -dictGet test_01037.dict_array (31.0237,3.52547) 3501821 -dictGet test_01037.dict_array (31.0306,3.78522) 101 -dictGet test_01037.dict_array (31.0308,-0.72453) 3501893 -dictGet test_01037.dict_array (31.0463,2.41997) 3501825 -dictGet test_01037.dict_array (31.047,0.624184) 3498013 -dictGet test_01037.dict_array (31.0569,0.0706393) 3498015 -dictGet test_01037.dict_array (31.0583,1.3244099999999999) 3501926 -dictGet test_01037.dict_array (31.063,3.23861) 3501793 -dictGet test_01037.dict_array (31.068,0.695575) 3498022 -dictGet test_01037.dict_array (31.0687,1.85675) 101 -dictGet test_01037.dict_array (31.0692,0.254793) 3498014 -dictGet test_01037.dict_array (31.0766,0.828128) 3498022 -dictGet test_01037.dict_array (31.0833,0.0612782) 3498015 -dictGet test_01037.dict_array (31.0833,2.59748) 3501793 -dictGet test_01037.dict_array (31.0861,-1.3778299999999999) 101 -dictGet test_01037.dict_array (31.0874,3.07258) 3501793 -dictGet test_01037.dict_array (31.0882,1.4882) 3501926 -dictGet test_01037.dict_array (31.0924,3.42242) 3501821 -dictGet test_01037.dict_array (31.0927,2.67448) 3501793 -dictGet test_01037.dict_array (31.0936,1.12292) 3498022 -dictGet test_01037.dict_array (31.0952,-0.336928) 3498012 -dictGet test_01037.dict_array (31.0978,3.48482) 3501826 -dictGet test_01037.dict_array (31.1107,3.7513199999999998) 3501826 -dictGet test_01037.dict_array (31.1156,1.19171) 3501926 -dictGet test_01037.dict_array (31.1176,0.223509) 3498015 -dictGet test_01037.dict_array (31.1249,0.946838) 3498022 -dictGet test_01037.dict_array (31.1267,1.48983) 3501926 -dictGet test_01037.dict_array (31.138,-0.289981) 3501898 -dictGet test_01037.dict_array (31.1382,3.02904) 3501793 -dictGet test_01037.dict_array (31.1475,2.6178) 3501793 -dictGet test_01037.dict_array (31.1491,1.37873) 3501926 -dictGet test_01037.dict_array (31.1525,3.72105) 3501826 -dictGet test_01037.dict_array (31.1526,-1.4129800000000001) 101 -dictGet test_01037.dict_array (31.1526,-0.186457) 3501898 -dictGet test_01037.dict_array (31.1539,2.78789) 3501793 -dictGet test_01037.dict_array (31.1548,-1.08552) 101 -dictGet test_01037.dict_array (31.1567,-0.0768925) 3501898 -dictGet test_01037.dict_array (31.1613,1.49617) 3501926 -dictGet test_01037.dict_array (31.1653,1.03777) 3498022 -dictGet test_01037.dict_array (31.1662,3.4214700000000002) 3501826 -dictGet test_01037.dict_array (31.1672,-0.0813169) 3501898 -dictGet test_01037.dict_array (31.177,0.440843) 3498014 -dictGet test_01037.dict_array (31.1788,-0.737151) 3501893 -dictGet test_01037.dict_array (31.1856,-0.144396) 3501898 -dictGet test_01037.dict_array (31.1959,3.66813) 3501826 -dictGet test_01037.dict_array (31.1996,-0.353983) 3501898 -dictGet test_01037.dict_array (31.2019,2.86802) 3501793 -dictGet test_01037.dict_array (31.2087,2.31245) 3501825 -dictGet test_01037.dict_array (31.2125,3.2713200000000002) 3501793 -dictGet test_01037.dict_array (31.2137,-0.108129) 3501898 -dictGet test_01037.dict_array (31.216,3.9156) 101 -dictGet test_01037.dict_array (31.2201,-0.202141) 3501898 -dictGet test_01037.dict_array (31.2285,2.09058) 101 -dictGet test_01037.dict_array (31.2502,4.01526) 101 -dictGet test_01037.dict_array (31.2585,3.11524) 3501793 -dictGet test_01037.dict_array (31.2645,-0.620418) 3501890 -dictGet test_01037.dict_array (31.2684,2.74277) 3501793 -dictGet test_01037.dict_array (31.2821,-1.12772) 101 -dictGet test_01037.dict_array (31.2821,2.46769) 3501825 -dictGet test_01037.dict_array (31.2887,3.91396) 101 -dictGet test_01037.dict_array (31.295,1.49942) 3501926 -dictGet test_01037.dict_array (31.2997,3.46122) 3501826 -dictGet test_01037.dict_array (31.3017,3.3263) 3501826 -dictGet test_01037.dict_array (31.3022,3.16754) 3501793 -dictGet test_01037.dict_array (31.3048,0.364962) 3498014 -dictGet test_01037.dict_array (31.305,3.1967) 3501793 -dictGet test_01037.dict_array (31.3061,1.84303) 101 -dictGet test_01037.dict_array (31.3082,-0.173851) 3501898 -dictGet test_01037.dict_array (31.3315,3.90932) 101 -dictGet test_01037.dict_array (31.3351,2.80164) 3501793 -dictGet test_01037.dict_array (31.3388,0.168765) 3498015 -dictGet test_01037.dict_array (31.339,0.25535) 3498094 -dictGet test_01037.dict_array (31.3423,1.7036799999999999) 3501926 -dictGet test_01037.dict_array (31.349,0.386456) 3498014 -dictGet test_01037.dict_array (31.3558,-1.04336) 101 -dictGet test_01037.dict_array (31.3564,0.478876) 3498014 -dictGet test_01037.dict_array (31.3607,-0.0860507) 3498015 -dictGet test_01037.dict_array (31.3831,3.84469) 101 -dictGet test_01037.dict_array (31.3886,-0.731137) 3501890 -dictGet test_01037.dict_array (31.4043,-0.348907) 5457271 -dictGet test_01037.dict_array (31.4081,1.47391) 3501926 -dictGet test_01037.dict_array (31.4176,-0.583645) 5457271 -dictGet test_01037.dict_array (31.4177,1.36972) 3501926 -dictGet test_01037.dict_array (31.4182,0.958303) 3498022 -dictGet test_01037.dict_array (31.4199,3.1738) 3501793 -dictGet test_01037.dict_array (31.4221,2.74876) 3501825 -dictGet test_01037.dict_array (31.4301,-0.122643) 3498015 -dictGet test_01037.dict_array (31.4344,1.00661) 3498022 -dictGet test_01037.dict_array (31.4375,4.20304) 101 -dictGet test_01037.dict_array (31.4377,0.289608) 3498094 -dictGet test_01037.dict_array (31.4379,0.54744) 3498014 -dictGet test_01037.dict_array (31.4459,3.94945) 101 -dictGet test_01037.dict_array (31.4559,-0.345063) 5457271 -dictGet test_01037.dict_array (31.464,0.726129) 3498014 -dictGet test_01037.dict_array (31.4662,-0.299019) 3498015 -dictGet test_01037.dict_array (31.4671,1.9605299999999999) 3501794 -dictGet test_01037.dict_array (31.4673,-0.403676) 5457271 -dictGet test_01037.dict_array (31.4712,-0.237941) 3498015 -dictGet test_01037.dict_array (31.4816,0.120264) 3498015 -dictGet test_01037.dict_array (31.4875,0.323483) 3498014 -dictGet test_01037.dict_array (31.490099999999998,-0.338163) 5457271 -dictGet test_01037.dict_array (31.4932,0.517674) 3498014 -dictGet test_01037.dict_array (31.5112,1.9689299999999998) 3501794 -dictGet test_01037.dict_array (31.5122,2.92785) 3501791 -dictGet test_01037.dict_array (31.5151,0.166429) 3498094 -dictGet test_01037.dict_array (31.5174,2.94802) 3501791 -dictGet test_01037.dict_array (31.5182,4.18776) 101 -dictGet test_01037.dict_array (31.5238,1.18793) 3498003 -dictGet test_01037.dict_array (31.5271,3.07446) 3501791 -dictGet test_01037.dict_array (31.5393,1.58061) 3501794 -dictGet test_01037.dict_array (31.5421,3.13711) 3501791 -dictGet test_01037.dict_array (31.5479,2.39897) 3497970 -dictGet test_01037.dict_array (31.5519,0.99285) 3498003 -dictGet test_01037.dict_array (31.5685,3.47987) 3501824 -dictGet test_01037.dict_array (31.5959,0.437382) 3498014 -dictGet test_01037.dict_array (31.6003,0.194376) 3498094 -dictGet test_01037.dict_array (31.6026,2.15457) 3501794 -dictGet test_01037.dict_array (31.606,2.45365) 3497970 -dictGet test_01037.dict_array (31.6062,-0.453441) 3501890 -dictGet test_01037.dict_array (31.6107,1.35247) 3497974 -dictGet test_01037.dict_array (31.6155,3.85588) 101 -dictGet test_01037.dict_array (31.6222,2.03326) 3501794 -dictGet test_01037.dict_array (31.6231,-0.123059) 3498083 -dictGet test_01037.dict_array (31.6244,1.6885599999999998) 3497974 -dictGet test_01037.dict_array (31.6459,0.669716) 3498014 -dictGet test_01037.dict_array (31.6563,-0.0644741) 3498083 -dictGet test_01037.dict_array (31.6618,-0.551121) 3501890 -dictGet test_01037.dict_array (31.6725,-0.38922) 3498085 -dictGet test_01037.dict_array (31.6727,4.10336) 101 -dictGet test_01037.dict_array (31.6739,4.1391) 101 -dictGet test_01037.dict_array (31.6897,2.8694699999999997) 3501792 -dictGet test_01037.dict_array (31.6902,3.98792) 101 -dictGet test_01037.dict_array (31.6945,2.46687) 3497970 -dictGet test_01037.dict_array (31.6987,-1.3796) 101 -dictGet test_01037.dict_array (31.7012,2.34845) 3497970 -dictGet test_01037.dict_array (31.7036,0.0228348) 3501888 -dictGet test_01037.dict_array (31.7046,3.68111) 3501824 -dictGet test_01037.dict_array (31.7055,2.92556) 3501792 -dictGet test_01037.dict_array (31.7102,1.04532) 3498003 -dictGet test_01037.dict_array (31.7149,-0.443302) 3498085 -dictGet test_01037.dict_array (31.7195,2.99311) 3501791 -dictGet test_01037.dict_array (31.7274,0.166719) 3498094 -dictGet test_01037.dict_array (31.7565,-0.565382) 3498085 -dictGet test_01037.dict_array (31.7615,0.771626) 3498014 -dictGet test_01037.dict_array (31.7739,1.8970099999999999) 3497974 -dictGet test_01037.dict_array (31.7848,1.2623199999999999) 3498003 -dictGet test_01037.dict_array (31.7912,-0.788599) 101 -dictGet test_01037.dict_array (31.8011,2.65853) 3497970 -dictGet test_01037.dict_array (31.8032,-0.0590108) 3501888 -dictGet test_01037.dict_array (31.8038,1.9618799999999998) 3497974 -dictGet test_01037.dict_array (31.8098,-1.46851) 101 -dictGet test_01037.dict_array (31.8131,3.41982) 3501791 -dictGet test_01037.dict_array (31.8169,3.31059) 3501791 -dictGet test_01037.dict_array (31.8202,-0.193692) 3501888 -dictGet test_01037.dict_array (31.8306,1.57586) 3497974 -dictGet test_01037.dict_array (31.8382,-0.787948) 101 -dictGet test_01037.dict_array (31.8433,2.49692) 3497970 -dictGet test_01037.dict_array (31.8436,2.41851) 3497970 -dictGet test_01037.dict_array (31.8563,-1.10787) 101 -dictGet test_01037.dict_array (31.8683,0.996504) 3498002 -dictGet test_01037.dict_array (31.8693,-0.828142) 101 -dictGet test_01037.dict_array (31.8723,1.08929) 3498003 -dictGet test_01037.dict_array (31.8737,0.881127) 3498002 -dictGet test_01037.dict_array (31.8881,-0.58441) 101 -dictGet test_01037.dict_array (31.9011,0.121349) 3498094 -dictGet test_01037.dict_array (31.9066,2.13045) 3497965 -dictGet test_01037.dict_array (31.9142,1.03368) 3498002 -dictGet test_01037.dict_array (31.9155,3.38363) 3501791 -dictGet test_01037.dict_array (31.9168,1.3166) 3498004 -dictGet test_01037.dict_array (31.9185,-1.11879) 101 -dictGet test_01037.dict_array (31.9186,-0.647948) 101 -dictGet test_01037.dict_array (31.9311,3.96928) 101 -dictGet test_01037.dict_array (31.9335,1.47048) 3497974 -dictGet test_01037.dict_array (31.9443,-1.36175) 101 -dictGet test_01037.dict_array (31.9481,2.34231) 3497970 -dictGet test_01037.dict_array (31.9526,1.36565) 3498004 -dictGet test_01037.dict_array (31.9629,2.5208399999999997) 3497970 -dictGet test_01037.dict_array (31.9765,0.975783) 3498002 -dictGet test_01037.dict_array (31.9923,3.31773) 3501791 -dictGet test_01037.dict_array (31.9994,0.972816) 3498002 -dictGet test_01037.dict_array (32.001,3.47425) 3501791 -dictGet test_01037.dict_array (32.0127,2.13874) 3497965 -dictGet test_01037.dict_array (32.0244,3.2092) 3501792 -dictGet test_01037.dict_array (32.029,1.18039) 3498004 -dictGet test_01037.dict_array (32.0315,0.566073) 3498095 -dictGet test_01037.dict_array (32.0354,1.0766499999999999) 3498004 -dictGet test_01037.dict_array (32.0399,-1.11576) 101 -dictGet test_01037.dict_array (32.053,2.16849) 3497965 -dictGet test_01037.dict_array (32.0542,0.042328) 3498096 -dictGet test_01037.dict_array (32.0576,2.47001) 3497970 -dictGet test_01037.dict_array (32.061,3.7498899999999997) 101 -dictGet test_01037.dict_array (32.0623,1.25134) 3498004 -dictGet test_01037.dict_array (32.0626,1.9611399999999999) 3497965 -dictGet test_01037.dict_array (32.0666,-0.0904247) 3498096 -dictGet test_01037.dict_array (32.0681,2.28442) 3497970 -dictGet test_01037.dict_array (32.0692,1.50869) 3497981 -dictGet test_01037.dict_array (32.0724,4.03314) 101 -dictGet test_01037.dict_array (32.0729,-0.064324) 101 -dictGet test_01037.dict_array (32.079,0.293758) 3498094 -dictGet test_01037.dict_array (32.0847,-1.19814) 101 -dictGet test_01037.dict_array (32.0974,-0.91927) 101 -dictGet test_01037.dict_array (32.0979,-0.736979) 101 -dictGet test_01037.dict_array (32.106,-1.33063) 101 -dictGet test_01037.dict_array (32.1189,0.246715) 3498094 -dictGet test_01037.dict_array (32.1207,4.00883) 101 -dictGet test_01037.dict_array (32.1396,1.12402) 3498004 -dictGet test_01037.dict_array (32.1413,1.5668) 3497981 -dictGet test_01037.dict_array (32.143,1.35559) 3498004 -dictGet test_01037.dict_array (32.1538,1.32881) 3498004 -dictGet test_01037.dict_array (32.1549,4.06552) 101 -dictGet test_01037.dict_array (32.1555,-0.79275) 101 -dictGet test_01037.dict_array (32.163,1.17733) 3498004 -dictGet test_01037.dict_array (32.1634,2.94273) 3501792 -dictGet test_01037.dict_array (32.1644,1.85666) 3497965 -dictGet test_01037.dict_array (32.1745,0.435458) 3498095 -dictGet test_01037.dict_array (32.1765,1.65149) 3497981 -dictGet test_01037.dict_array (32.1893,2.08924) 3497965 -dictGet test_01037.dict_array (32.2024,0.222191) 3498093 -dictGet test_01037.dict_array (32.2107,1.34379) 3497981 -dictGet test_01037.dict_array (32.2109,3.9018699999999997) 101 -dictGet test_01037.dict_array (32.2123,1.85233) 3497965 -dictGet test_01037.dict_array (32.2144,3.72534) 101 -dictGet test_01037.dict_array (32.2218,2.5386699999999998) 3497970 -dictGet test_01037.dict_array (32.2279,2.84267) 3497245 -dictGet test_01037.dict_array (32.2345,3.33295) 3501792 -dictGet test_01037.dict_array (32.2435,3.85283) 101 -dictGet test_01037.dict_array (32.2527,-0.480608) 101 -dictGet test_01037.dict_array (32.2566,-0.837882) 101 -dictGet test_01037.dict_array (32.2627,2.57708) 3497970 -dictGet test_01037.dict_array (32.2733,0.244931) 3498096 -dictGet test_01037.dict_array (32.2761,4.05808) 101 -dictGet test_01037.dict_array (32.2764,3.78472) 101 -dictGet test_01037.dict_array (32.2814,-1.26011) 101 -dictGet test_01037.dict_array (32.2861,3.02427) 3497245 -dictGet test_01037.dict_array (32.2924,0.928609) 3498004 -dictGet test_01037.dict_array (32.2963,-0.78543) 101 -dictGet test_01037.dict_array (32.3039,3.21175) 3501792 -dictGet test_01037.dict_array (32.3107,0.698287) 3498004 -dictGet test_01037.dict_array (32.3138,0.0595677) 3498106 -dictGet test_01037.dict_array (32.3339,0.707056) 3498004 -dictGet test_01037.dict_array (32.3351,0.415474) 3498106 -dictGet test_01037.dict_array (32.342,-0.681023) 101 -dictGet test_01037.dict_array (32.3463,1.83196) 3497126 -dictGet test_01037.dict_array (32.3494,2.43799) 3497114 -dictGet test_01037.dict_array (32.3524,3.47049) 3501822 -dictGet test_01037.dict_array (32.3531,2.33115) 3497114 -dictGet test_01037.dict_array (32.3602,0.116106) 3498106 -dictGet test_01037.dict_array (32.3612,1.1598) 3498004 -dictGet test_01037.dict_array (32.3689,3.34847) 3501822 -dictGet test_01037.dict_array (32.3695,0.734055) 3498004 -dictGet test_01037.dict_array (32.3825,3.85017) 101 -dictGet test_01037.dict_array (32.3835,-1.25491) 101 -dictGet test_01037.dict_array (32.4018,-0.728568) 101 -dictGet test_01037.dict_array (32.4044,2.96727) 3497245 -dictGet test_01037.dict_array (32.4101,2.9988) 3497245 -dictGet test_01037.dict_array (32.417,-1.12908) 101 -dictGet test_01037.dict_array (32.4172,4.1952) 101 -dictGet test_01037.dict_array (32.4239,2.49512) 3497245 -dictGet test_01037.dict_array (32.4258,4.05137) 101 -dictGet test_01037.dict_array (32.4264,-0.427357) 101 -dictGet test_01037.dict_array (32.4274,3.59377) 3501822 -dictGet test_01037.dict_array (32.4286,-1.24757) 101 -dictGet test_01037.dict_array (32.4294,3.0665) 3497245 -dictGet test_01037.dict_array (32.4333,-0.353347) 101 -dictGet test_01037.dict_array (32.4391,3.64421) 3501822 -dictGet test_01037.dict_array (32.4401,3.70635) 3501822 -dictGet test_01037.dict_array (32.45,1.68918) 3497126 -dictGet test_01037.dict_array (32.4507,-0.133471) 101 -dictGet test_01037.dict_array (32.4592,0.976458) 3498105 -dictGet test_01037.dict_array (32.4595,1.89135) 3497126 -dictGet test_01037.dict_array (32.4604,0.280248) 3498106 -dictGet test_01037.dict_array (32.4835,0.472731) 3498106 -dictGet test_01037.dict_array (32.4855,2.01938) 3497126 -dictGet test_01037.dict_array (32.4872,2.01697) 3497126 -dictGet test_01037.dict_array (32.4911,0.613106) 3498105 -dictGet test_01037.dict_array (32.4918,2.17834) 3497114 -dictGet test_01037.dict_array (32.4947,2.34595) 3497114 -dictGet test_01037.dict_array (32.5035,2.92234) 3497245 -dictGet test_01037.dict_array (32.5132,-0.331206) 101 -dictGet test_01037.dict_array (32.5156,-0.412604) 3501887 -dictGet test_01037.dict_array (32.5158,2.9067499999999997) 3497245 -dictGet test_01037.dict_array (32.5249,2.44519) 3497114 -dictGet test_01037.dict_array (32.5293,-0.790952) 101 -dictGet test_01037.dict_array (32.5319,3.96854) 101 -dictGet test_01037.dict_array (32.5518,3.6093) 3501822 -dictGet test_01037.dict_array (32.5541,3.5225400000000002) 3501822 -dictGet test_01037.dict_array (32.5569,0.816123) 3498105 -dictGet test_01037.dict_array (32.5646,1.9775) 3497126 -dictGet test_01037.dict_array (32.5733,3.81271) 101 -dictGet test_01037.dict_array (32.5767,0.948327) 3498105 -dictGet test_01037.dict_array (32.5971,1.76179) 3497126 -dictGet test_01037.dict_array (32.6035,-0.716157) 101 -dictGet test_01037.dict_array (32.6087,4.21614) 101 -dictGet test_01037.dict_array (32.6171,0.024481) 101 -dictGet test_01037.dict_array (32.6189,-0.775391) 101 -dictGet test_01037.dict_array (32.6198,2.92081) 3497167 -dictGet test_01037.dict_array (32.621,-0.970784) 101 -dictGet test_01037.dict_array (32.6266,0.650009) 3498105 -dictGet test_01037.dict_array (32.6315,2.15144) 3497126 -dictGet test_01037.dict_array (32.6385,-0.436803) 101 -dictGet test_01037.dict_array (32.6449,-0.191292) 101 -dictGet test_01037.dict_array (32.6535,2.10385) 3497126 -dictGet test_01037.dict_array (32.6592,3.49973) 3501822 -dictGet test_01037.dict_array (32.6598,2.5980600000000003) 3497114 -dictGet test_01037.dict_array (32.6612,2.95681) 3497167 -dictGet test_01037.dict_array (32.6636,-0.57235) 101 -dictGet test_01037.dict_array (32.669,-0.382702) 101 -dictGet test_01037.dict_array (32.6752,1.30748) 3497981 -dictGet test_01037.dict_array (32.6811,2.9559800000000003) 3497167 -dictGet test_01037.dict_array (32.6821,0.57336) 3498105 -dictGet test_01037.dict_array (32.6828,3.91304) 101 -dictGet test_01037.dict_array (32.6979,3.96868) 101 -dictGet test_01037.dict_array (32.6983,3.15784) 3497167 -dictGet test_01037.dict_array (32.7122,0.794293) 3498105 -dictGet test_01037.dict_array (32.7131,-0.847256) 101 -dictGet test_01037.dict_array (32.7219,0.883461) 3498105 -dictGet test_01037.dict_array (32.7228,1.78808) 3497126 -dictGet test_01037.dict_array (32.7273,-0.206908) 101 -dictGet test_01037.dict_array (32.7292,0.259331) 3501889 -dictGet test_01037.dict_array (32.7304,-1.38317) 101 -dictGet test_01037.dict_array (32.7353,1.01601) 3498105 -dictGet test_01037.dict_array (32.7354,4.17574) 101 -dictGet test_01037.dict_array (32.7357,-0.190194) 101 -dictGet test_01037.dict_array (32.7465,-1.37598) 101 -dictGet test_01037.dict_array (32.7494,-0.275675) 101 -dictGet test_01037.dict_array (32.7514,0.128951) 3501889 -dictGet test_01037.dict_array (32.753,3.44207) 3501822 -dictGet test_01037.dict_array (32.7686,2.11713) 3497126 -dictGet test_01037.dict_array (32.7694,1.47159) 3497388 -dictGet test_01037.dict_array (32.7768,0.0401042) 101 -dictGet test_01037.dict_array (32.781,-1.34283) 101 -dictGet test_01037.dict_array (32.7814,1.73876) 3497388 -dictGet test_01037.dict_array (32.7856,-1.06363) 101 -dictGet test_01037.dict_array (32.792699999999996,-1.1255600000000001) 101 -dictGet test_01037.dict_array (32.7941,-0.645447) 101 -dictGet test_01037.dict_array (32.7946,1.48889) 3497388 -dictGet test_01037.dict_array (32.797,0.791753) 3501889 -dictGet test_01037.dict_array (32.7982,-0.537798) 101 -dictGet test_01037.dict_array (32.8091,2.3611) 3490438 -dictGet test_01037.dict_array (32.81,1.7130800000000002) 3497388 -dictGet test_01037.dict_array (32.8174,-0.288322) 101 -dictGet test_01037.dict_array (32.823,1.6546699999999999) 3497388 -dictGet test_01037.dict_array (32.8233,1.62108) 3497388 -dictGet test_01037.dict_array (32.8428,-0.400045) 101 -dictGet test_01037.dict_array (32.8479,2.13598) 3490438 -dictGet test_01037.dict_array (32.8524,0.199902) 3501889 -dictGet test_01037.dict_array (32.8543,3.23553) 3501820 -dictGet test_01037.dict_array (32.8562,1.31371) 3498117 -dictGet test_01037.dict_array (32.87,1.44256) 3498117 -dictGet test_01037.dict_array (32.8789,2.38192) 3490438 -dictGet test_01037.dict_array (32.8812,2.20734) 3497128 -dictGet test_01037.dict_array (32.8815,-0.54427) 101 -dictGet test_01037.dict_array (32.8853,2.4859) 3497128 -dictGet test_01037.dict_array (32.8909,0.513964) 3501889 -dictGet test_01037.dict_array (32.9035,2.38999) 3490438 -dictGet test_01037.dict_array (32.9097,2.48131) 3497128 -dictGet test_01037.dict_array (32.928,-0.943269) 101 -dictGet test_01037.dict_array (32.9322,1.13165) 3498104 -dictGet test_01037.dict_array (32.9348,1.22606) 3498117 -dictGet test_01037.dict_array (32.9417,3.77998) 3501822 -dictGet test_01037.dict_array (32.9428,3.11936) 3497167 -dictGet test_01037.dict_array (32.9482,1.18092) 3498118 -dictGet test_01037.dict_array (32.9506,0.0609364) 101 -dictGet test_01037.dict_array (32.953,-0.828308) 101 -dictGet test_01037.dict_array (32.9593,3.5209099999999998) 3501822 -dictGet test_01037.dict_array (32.9617,2.07711) 3497128 -dictGet test_01037.dict_array (32.966,0.693749) 3498104 -dictGet test_01037.dict_array (32.9668,-0.716432) 101 -dictGet test_01037.dict_array (32.9702,1.98555) 3497127 -dictGet test_01037.dict_array (32.9782,1.73819) 3497388 -dictGet test_01037.dict_array (32.9805,3.71151) 3501822 -dictGet test_01037.dict_array (32.9821,2.97225) 3497167 -dictGet test_01037.dict_array (32.995,-0.830301) 101 -dictGet test_01037.dict_array (33.0234,0.770848) 3498104 -dictGet test_01037.dict_array (33.0312,-0.340964) 101 -dictGet test_01037.dict_array (33.0366,-0.756795) 101 -dictGet test_01037.dict_array (33.0438,0.812871) 3498118 -dictGet test_01037.dict_array (33.0455,1.84843) 3497127 -dictGet test_01037.dict_array (33.0498,0.0913292) 101 -dictGet test_01037.dict_array (33.0506,1.53739) 3497364 -dictGet test_01037.dict_array (33.0554,2.4265) 3497363 -dictGet test_01037.dict_array (33.0741,3.61332) 3501822 -dictGet test_01037.dict_array (33.0765,-0.179985) 101 -dictGet test_01037.dict_array (33.087,1.46465) 3497399 -dictGet test_01037.dict_array (33.0906,-0.620383) 101 -dictGet test_01037.dict_array (33.1047,-1.28027) 101 -dictGet test_01037.dict_array (33.1072,1.96303) 3497127 -dictGet test_01037.dict_array (33.1081,-0.897874) 101 -dictGet test_01037.dict_array (33.1122,1.8950200000000001) 3497127 -dictGet test_01037.dict_array (33.1237,2.63993) 3497165 -dictGet test_01037.dict_array (33.1238,0.753963) 3498118 -dictGet test_01037.dict_array (33.1257,0.495668) 3498102 -dictGet test_01037.dict_array (33.1258,1.78341) 3497364 -dictGet test_01037.dict_array (33.127,2.59646) 3497166 -dictGet test_01037.dict_array (33.1324,-1.23742) 101 -dictGet test_01037.dict_array (33.1359,3.83491) 101 -dictGet test_01037.dict_array (33.1628,-0.379588) 101 -dictGet test_01037.dict_array (33.1679,1.25601) 3498117 -dictGet test_01037.dict_array (33.1688,-1.35553) 101 -dictGet test_01037.dict_array (33.181,2.10943) 3497363 -dictGet test_01037.dict_array (33.1871,2.81171) 3497165 -dictGet test_01037.dict_array (33.1877,0.771297) 3498118 -dictGet test_01037.dict_array (33.1883,-0.204797) 101 -dictGet test_01037.dict_array (33.1886,3.27998) 3501820 -dictGet test_01037.dict_array (33.1955,0.708907) 3498118 -dictGet test_01037.dict_array (33.2044,-0.769275) 101 -dictGet test_01037.dict_array (33.2182,3.36103) 3501820 -dictGet test_01037.dict_array (33.2192,3.43586) 3501822 -dictGet test_01037.dict_array (33.2322,-0.916753) 101 -dictGet test_01037.dict_array (33.2359,-0.81321) 101 -dictGet test_01037.dict_array (33.238,0.635072) 3498111 -dictGet test_01037.dict_array (33.2398,3.02588) 3497165 -dictGet test_01037.dict_array (33.2469,2.35698) 3497363 -dictGet test_01037.dict_array (33.247,2.3327) 3497363 -dictGet test_01037.dict_array (33.2579,2.8027100000000003) 3497165 -dictGet test_01037.dict_array (33.2607,0.321082) 101 -dictGet test_01037.dict_array (33.2653,0.243336) 101 -dictGet test_01037.dict_array (33.2758,0.831836) 3498118 -dictGet test_01037.dict_array (33.2771,0.886536) 3498118 -dictGet test_01037.dict_array (33.2914,1.16026) 3498117 -dictGet test_01037.dict_array (33.2914,1.38882) 3497399 -dictGet test_01037.dict_array (33.2982,-1.16604) 101 -dictGet test_01037.dict_array (33.2985,0.842556) 3498112 -dictGet test_01037.dict_array (33.3005,2.8338900000000002) 3497165 -dictGet test_01037.dict_array (33.305,0.0969475) 101 -dictGet test_01037.dict_array (33.3072,3.82163) 101 -dictGet test_01037.dict_array (33.312,3.41475) 3501820 -dictGet test_01037.dict_array (33.3129,2.46048) 3497166 -dictGet test_01037.dict_array (33.3134,3.46863) 3501820 -dictGet test_01037.dict_array (33.3203,2.33139) 3497166 -dictGet test_01037.dict_array (33.324,0.433701) 101 -dictGet test_01037.dict_array (33.3338,2.44705) 3497166 -dictGet test_01037.dict_array (33.337,4.06475) 101 -dictGet test_01037.dict_array (33.3469,1.08172) 3498126 -dictGet test_01037.dict_array (33.3538,0.717896) 3498112 -dictGet test_01037.dict_array (33.3618,1.37899) 3497399 -dictGet test_01037.dict_array (33.3698,0.547744) 3501862 -dictGet test_01037.dict_array (33.3705,0.957619) 3498112 -dictGet test_01037.dict_array (33.3821,3.07258) 3497165 -dictGet test_01037.dict_array (33.3881,3.0626) 3497165 -dictGet test_01037.dict_array (33.393,-0.816186) 101 -dictGet test_01037.dict_array (33.3945,0.869508) 3498110 -dictGet test_01037.dict_array (33.4001,1.24186) 3498117 -dictGet test_01037.dict_array (33.4008,2.34911) 3497166 -dictGet test_01037.dict_array (33.4166,-1.2808899999999999) 101 -dictGet test_01037.dict_array (33.4167,3.0655) 3497165 -dictGet test_01037.dict_array (33.4204,2.81887) 3497165 -dictGet test_01037.dict_array (33.4211,1.71128) 3497400 -dictGet test_01037.dict_array (33.4237,2.91761) 3497165 -dictGet test_01037.dict_array (33.4266,1.5955599999999999) 3497399 -dictGet test_01037.dict_array (33.4353,-0.391392) 101 -dictGet test_01037.dict_array (33.4362,-0.134658) 101 -dictGet test_01037.dict_array (33.4386,0.15396) 101 -dictGet test_01037.dict_array (33.4421,-0.50712) 101 -dictGet test_01037.dict_array (33.452,0.915829) 3498126 -dictGet test_01037.dict_array (33.463,-0.0882717) 101 -dictGet test_01037.dict_array (33.464,-1.00949) 101 -dictGet test_01037.dict_array (33.4692,0.954092) 3498126 -dictGet test_01037.dict_array (33.4716,1.9538799999999998) 3497400 -dictGet test_01037.dict_array (33.4756,1.85836) 3497400 -dictGet test_01037.dict_array (33.4859,4.0751) 101 -dictGet test_01037.dict_array (33.4899,3.54193) 3501820 -dictGet test_01037.dict_array (33.4935,3.49794) 3501820 -dictGet test_01037.dict_array (33.494,-0.983356) 101 -dictGet test_01037.dict_array (33.4955,-1.28128) 101 -dictGet test_01037.dict_array (33.4965,-0.278687) 101 -dictGet test_01037.dict_array (33.4991,0.647491) 3498110 -dictGet test_01037.dict_array (33.5076,2.2272) 3497424 -dictGet test_01037.dict_array (33.5079,-0.498199) 101 -dictGet test_01037.dict_array (33.5157,0.535034) 3501862 -dictGet test_01037.dict_array (33.5171,2.49677) 3497166 -dictGet test_01037.dict_array (33.5255,2.4447200000000002) 3497166 -dictGet test_01037.dict_array (33.526,4.01194) 101 -dictGet test_01037.dict_array (33.5288,0.789434) 3498110 -dictGet test_01037.dict_array (33.5356,-1.17671) 101 -dictGet test_01037.dict_array (33.5402,1.49152) 3497399 -dictGet test_01037.dict_array (33.5418,3.45757) 3501820 -dictGet test_01037.dict_array (33.5428,1.90712) 3497400 -dictGet test_01037.dict_array (33.5556,-0.55741) 101 -dictGet test_01037.dict_array (33.5564,0.876858) 3498128 -dictGet test_01037.dict_array (33.5567,-0.10208) 101 -dictGet test_01037.dict_array (33.5645,-0.124824) 101 -dictGet test_01037.dict_array (33.5663,3.4872) 3501820 -dictGet test_01037.dict_array (33.5716,-0.0107611) 101 -dictGet test_01037.dict_array (33.578,3.55714) 3501820 -dictGet test_01037.dict_array (33.5826,-0.49076) 101 -dictGet test_01037.dict_array (33.5909,0.773737) 3498110 -dictGet test_01037.dict_array (33.5958,2.9619999999999997) 3497425 -dictGet test_01037.dict_array (33.6193,-0.919755) 101 -dictGet test_01037.dict_array (33.6313,0.652132) 3498110 -dictGet test_01037.dict_array (33.632,0.823351) 3498128 -dictGet test_01037.dict_array (33.66,2.18998) 3497424 -dictGet test_01037.dict_array (33.6621,0.535395) 3498135 -dictGet test_01037.dict_array (33.6726,3.19367) 3497438 -dictGet test_01037.dict_array (33.6912,1.74522) 3497400 -dictGet test_01037.dict_array (33.705,0.706397) 3498135 -dictGet test_01037.dict_array (33.7076,0.7622) 3498128 -dictGet test_01037.dict_array (33.7112,1.70187) 3497400 -dictGet test_01037.dict_array (33.7246,-1.14837) 101 -dictGet test_01037.dict_array (33.7326,2.62413) 3497425 -dictGet test_01037.dict_array (33.7332,2.82137) 3497425 -dictGet test_01037.dict_array (33.7434,0.394672) 3498135 -dictGet test_01037.dict_array (33.7443,1.54557) 3497398 -dictGet test_01037.dict_array (33.7506,1.57317) 3497398 -dictGet test_01037.dict_array (33.7526,1.8578999999999999) 3497424 -dictGet test_01037.dict_array (33.766,4.15013) 101 -dictGet test_01037.dict_array (33.7834,2.41789) 3497439 -dictGet test_01037.dict_array (33.7864,0.230935) 101 -dictGet test_01037.dict_array (33.7965,3.05709) 3497438 -dictGet test_01037.dict_array (33.7998,3.32881) 3497438 -dictGet test_01037.dict_array (33.8003,2.97338) 3497425 -dictGet test_01037.dict_array (33.8007,-1.08962) 101 -dictGet test_01037.dict_array (33.8022,-0.139488) 101 -dictGet test_01037.dict_array (33.8065,2.70857) 3497425 -dictGet test_01037.dict_array (33.8169,-0.607788) 101 -dictGet test_01037.dict_array (33.8203,0.108512) 3501863 -dictGet test_01037.dict_array (33.8231,-1.03449) 101 -dictGet test_01037.dict_array (33.8312,3.49458) 3501829 -dictGet test_01037.dict_array (33.8342,0.297518) 3501863 -dictGet test_01037.dict_array (33.8352,0.165872) 101 -dictGet test_01037.dict_array (33.8354,1.87277) 3497424 -dictGet test_01037.dict_array (33.8371,1.60103) 3497398 -dictGet test_01037.dict_array (33.8387,1.9968) 3497424 -dictGet test_01037.dict_array (33.8403,3.5805) 3501829 -dictGet test_01037.dict_array (33.8414,-0.703067) 101 -dictGet test_01037.dict_array (33.844,-0.179472) 101 -dictGet test_01037.dict_array (33.8468,3.40137) 3501829 -dictGet test_01037.dict_array (33.8509,4.15334) 101 -dictGet test_01037.dict_array (33.8539,2.38339) 3497439 -dictGet test_01037.dict_array (33.858,-1.3122500000000001) 101 -dictGet test_01037.dict_array (33.859,3.72626) 3501829 -dictGet test_01037.dict_array (33.8616,2.24433) 3497424 -dictGet test_01037.dict_array (33.8621,3.01035) 3497438 -dictGet test_01037.dict_array (33.8623,1.17559) 3498129 -dictGet test_01037.dict_array (33.8682,2.706) 3497425 -dictGet test_01037.dict_array (33.8684,0.189231) 3501863 -dictGet test_01037.dict_array (33.872,1.93574) 3497424 -dictGet test_01037.dict_array (33.8844,3.80404) 3501829 -dictGet test_01037.dict_array (33.8888,0.594884) 3498135 -dictGet test_01037.dict_array (33.8946,2.74161) 3497438 -dictGet test_01037.dict_array (33.9023,0.6239) 3498135 -dictGet test_01037.dict_array (33.9057,0.873222) 3498136 -dictGet test_01037.dict_array (33.9157,-1.26607) 101 -dictGet test_01037.dict_array (33.92,2.06848) 3497397 -dictGet test_01037.dict_array (33.9298,-0.00526229) 101 -dictGet test_01037.dict_array (33.932,3.07063) 3497438 -dictGet test_01037.dict_array (33.9322,0.629385) 3501864 -dictGet test_01037.dict_array (33.9367,-1.41955) 101 -dictGet test_01037.dict_array (33.937,1.42532) 3498173 -dictGet test_01037.dict_array (33.9375,1.1467100000000001) 3498159 -dictGet test_01037.dict_array (33.9434,-1.05739) 101 -dictGet test_01037.dict_array (33.9477,3.34809) 3501829 -dictGet test_01037.dict_array (33.95,2.21715) 3497397 -dictGet test_01037.dict_array (33.955799999999996,0.305176) 3501859 -dictGet test_01037.dict_array (33.9686,-0.28273) 101 -dictGet test_01037.dict_array (33.9703,4.1255) 3501829 -dictGet test_01037.dict_array (33.9707,3.08199) 3497438 -dictGet test_01037.dict_array (33.9754,1.06203) 3498159 -dictGet test_01037.dict_array (33.9757,3.72468) 3501829 -dictGet test_01037.dict_array (33.9775,-0.0440599) 101 -dictGet test_01037.dict_array (33.9777,-0.251484) 101 -dictGet test_01037.dict_array (33.9789,-0.339374) 101 -dictGet test_01037.dict_array (33.9849,2.54515) 3497425 -dictGet test_01037.dict_array (33.9885,-0.318557) 101 -dictGet test_01037.dict_array (33.9977,1.07175) 3498159 -dictGet test_01037.dict_array (33.9984,-0.700517) 101 -dictGet test_01037.dict_array (34.0149,3.53338) 3501829 -dictGet test_01037.dict_array (34.0173,3.39155) 3501829 -dictGet test_01037.dict_array (34.0317,3.9579) 3501829 -dictGet test_01037.dict_array (34.0369,3.83612) 3501829 -dictGet test_01037.dict_array (34.043,-0.0887221) 101 -dictGet test_01037.dict_array (34.0487,1.14252) 3498159 -dictGet test_01037.dict_array (34.052,1.74832) 3497397 -dictGet test_01037.dict_array (34.0711,-0.898071) 101 -dictGet test_01037.dict_array (34.0747,1.55057) 3498173 -dictGet test_01037.dict_array (34.0803,3.16763) 3497438 -dictGet test_01037.dict_array (34.0872,3.75555) 3501829 -dictGet test_01037.dict_array (34.0965,1.62038) 3498173 -dictGet test_01037.dict_array (34.0977,-0.412691) 101 -dictGet test_01037.dict_array (34.0986,0.0294206) 101 -dictGet test_01037.dict_array (34.1072,3.15823) 3497438 -dictGet test_01037.dict_array (34.1092,3.09599) 3497438 -dictGet test_01037.dict_array (34.1206,1.04637) 3498160 -dictGet test_01037.dict_array (34.1209,3.13826) 3497438 -dictGet test_01037.dict_array (34.1265,3.95881) 3501829 -dictGet test_01037.dict_array (34.1286,-0.539319) 101 -dictGet test_01037.dict_array (34.1358,3.67451) 3501829 -dictGet test_01037.dict_array (34.1428,0.136115) 101 -dictGet test_01037.dict_array (34.157,1.73522) 3497397 -dictGet test_01037.dict_array (34.1581,1.48001) 3498172 -dictGet test_01037.dict_array (34.1682,3.42373) 3501829 -dictGet test_01037.dict_array (34.1683,-1.26511) 101 -dictGet test_01037.dict_array (34.1684,4.20007) 101 -dictGet test_01037.dict_array (34.1854,3.32089) 3501829 -dictGet test_01037.dict_array (34.2022,0.749536) 3501864 -dictGet test_01037.dict_array (34.2044,3.04865) 3497438 -dictGet test_01037.dict_array (34.22,-0.500055) 101 -dictGet test_01037.dict_array (34.2249,0.743775) 3501864 -dictGet test_01037.dict_array (34.2254,1.34702) 3498172 -dictGet test_01037.dict_array (34.2355,-0.898843) 101 -dictGet test_01037.dict_array (34.2394,2.0203699999999998) 3497439 -dictGet test_01037.dict_array (34.2466,1.83785) 3498251 -dictGet test_01037.dict_array (34.247,4.09563) 101 -dictGet test_01037.dict_array (34.2508,2.61312) 3497439 -dictGet test_01037.dict_array (34.2517,1.69642) 3498251 -dictGet test_01037.dict_array (34.2564,4.13033) 101 -dictGet test_01037.dict_array (34.2574,4.18928) 101 -dictGet test_01037.dict_array (34.2614,-0.478719) 101 -dictGet test_01037.dict_array (34.2625,2.38088) 3497439 -dictGet test_01037.dict_array (34.2666,3.1503) 3501829 -dictGet test_01037.dict_array (34.271,4.02223) 101 -dictGet test_01037.dict_array (34.2727,0.514755) 101 -dictGet test_01037.dict_array (34.278,1.98929) 3497439 -dictGet test_01037.dict_array (34.2798,-0.199208) 101 -dictGet test_01037.dict_array (34.2804,2.05184) 3497439 -dictGet test_01037.dict_array (34.2945,-1.11051) 101 -dictGet test_01037.dict_array (34.3168,-0.0829721) 101 -dictGet test_01037.dict_array (34.3345,3.4358) 3501829 -dictGet test_01037.dict_array (34.3377,1.13527) 3498162 -dictGet test_01037.dict_array (34.3383,1.27891) 3498161 -dictGet test_01037.dict_array (34.3391,1.47945) 3498161 -dictGet test_01037.dict_array (34.3441,0.627014) 101 -dictGet test_01037.dict_array (34.347,2.4853) 3497439 -dictGet test_01037.dict_array (34.3514,2.16247) 3497439 -dictGet test_01037.dict_array (34.3627,2.64533) 3497439 -dictGet test_01037.dict_array (34.3682,-0.227501) 101 -dictGet test_01037.dict_array (34.3756,4.21248) 101 -dictGet test_01037.dict_array (34.379,3.96604) 101 -dictGet test_01037.dict_array (34.3827,1.7518) 3498251 -dictGet test_01037.dict_array (34.3912,2.8834) 3501830 -dictGet test_01037.dict_array (34.3919,0.668829) 101 -dictGet test_01037.dict_array (34.3949,2.00338) 3497439 -dictGet test_01037.dict_array (34.3987,0.557268) 101 -dictGet test_01037.dict_array (34.4111,0.768558) 101 -dictGet test_01037.dict_array (34.4119,2.8742) 3501830 -dictGet test_01037.dict_array (34.416,3.50841) 3501829 -dictGet test_01037.dict_array (34.4212,1.24916) 3498161 -dictGet test_01037.dict_array (34.4251,0.457029) 101 -dictGet test_01037.dict_array (34.4274,-0.902559) 101 -dictGet test_01037.dict_array (34.4325,4.03159) 101 -dictGet test_01037.dict_array (34.438,1.63994) 3498251 -dictGet test_01037.dict_array (34.4403,-0.177594) 101 -dictGet test_01037.dict_array (34.4421,0.726712) 101 -dictGet test_01037.dict_array (34.4517,2.98611) 3501830 -dictGet test_01037.dict_array (34.4658,-1.312) 101 -dictGet test_01037.dict_array (34.4732,-0.0681338) 101 -dictGet test_01037.dict_array (34.4752,2.81646) 3501830 -dictGet test_01037.dict_array (34.4914,2.3858) 3497439 -dictGet test_01037.dict_array (34.4923,0.855231) 101 -dictGet test_01037.dict_array (34.5235,1.78468) 3498251 -dictGet test_01037.dict_array (34.5305,4.10608) 101 -dictGet test_01037.dict_array (34.5389,0.621937) 101 -dictGet test_01037.dict_array (34.5406,3.17145) 101 -dictGet test_01037.dict_array (34.5434,-0.56306) 101 -dictGet test_01037.dict_array (34.5449,3.13311) 3501829 -dictGet test_01037.dict_array (34.5491,2.31572) 3497439 -dictGet test_01037.dict_array (34.5539,2.94028) 3501830 -dictGet test_01037.dict_array (34.5546,-0.208825) 101 -dictGet test_01037.dict_array (34.5549,3.78486) 101 -dictGet test_01037.dict_array (34.5676,0.307148) 101 -dictGet test_01037.dict_array (34.5743,1.5217399999999999) 3501838 -dictGet test_01037.dict_array (34.5775,3.48046) 101 -dictGet test_01037.dict_array (34.5815,2.5243700000000002) 3501830 -dictGet test_01037.dict_array (34.5841,4.21191) 101 -dictGet test_01037.dict_array (34.5887,2.65083) 3501830 -dictGet test_01037.dict_array (34.5937,3.2143) 101 -dictGet test_01037.dict_array (34.6013,-1.0612) 101 -dictGet test_01037.dict_array (34.6089,1.36066) 3501838 -dictGet test_01037.dict_array (34.6103,3.40227) 101 -dictGet test_01037.dict_array (34.6128,1.92276) 3498251 -dictGet test_01037.dict_array (34.6175,2.43627) 3498251 -dictGet test_01037.dict_array (34.6209,3.43776) 101 -dictGet test_01037.dict_array (34.6234,2.60237) 3501830 -dictGet test_01037.dict_array (34.6275,3.52479) 101 -dictGet test_01037.dict_array (34.635,0.568558) 101 -dictGet test_01037.dict_array (34.6373,2.37692) 3498251 -dictGet test_01037.dict_array (34.6375,3.52234) 101 -dictGet test_01037.dict_array (34.6426,2.12397) 3498251 -dictGet test_01037.dict_array (34.6513,2.80915) 3501830 -dictGet test_01037.dict_array (34.6632,2.30039) 3498251 -dictGet test_01037.dict_array (34.6691,1.86582) 3498251 -dictGet test_01037.dict_array (34.6739,0.15342) 101 -dictGet test_01037.dict_array (34.6825,0.0499679) 101 -dictGet test_01037.dict_array (34.6893,0.454326) 101 -dictGet test_01037.dict_array (34.6957,-0.358598) 101 -dictGet test_01037.dict_array (34.6986,0.562679) 101 -dictGet test_01037.dict_array (34.712,1.12114) 101 -dictGet test_01037.dict_array (34.7126,-0.0057301) 101 -dictGet test_01037.dict_array (34.7137,0.0248501) 101 -dictGet test_01037.dict_array (34.7162,1.15623) 101 -dictGet test_01037.dict_array (34.7258,3.95142) 101 -dictGet test_01037.dict_array (34.7347,3.5232099999999997) 101 -dictGet test_01037.dict_array (34.7363,2.23374) 3501830 -dictGet test_01037.dict_array (34.7375,0.397841) 101 -dictGet test_01037.dict_array (34.7423,3.09198) 101 -dictGet test_01037.dict_array (34.7452,3.09029) 101 -dictGet test_01037.dict_array (34.7539,-1.06943) 101 -dictGet test_01037.dict_array (34.7733,-0.00912717) 101 -dictGet test_01037.dict_array (34.774,2.71088) 3501830 -dictGet test_01037.dict_array (34.7771,1.46009) 3501835 -dictGet test_01037.dict_array (34.7782,-1.28308) 101 -dictGet test_01037.dict_array (34.7924,3.63564) 101 -dictGet test_01037.dict_array (34.7939,-0.416676) 101 -dictGet test_01037.dict_array (34.7964,-0.401773) 101 -dictGet test_01037.dict_array (34.7974,0.0286873) 101 -dictGet test_01037.dict_array (34.7975,3.05965) 101 -dictGet test_01037.dict_array (34.8037,3.07263) 101 -dictGet test_01037.dict_array (34.8254,-0.390284) 101 -dictGet test_01037.dict_array (34.828,1.91869) 3498251 -dictGet test_01037.dict_array (34.8289,3.71058) 101 -dictGet test_01037.dict_array (34.8403,2.14606) 3501835 -dictGet test_01037.dict_array (34.8437,2.20617) 3501830 -dictGet test_01037.dict_array (34.8469,2.38435) 3501830 -dictGet test_01037.dict_array (34.86,1.45705) 101 -dictGet test_01037.dict_array (34.8612,0.914248) 101 -dictGet test_01037.dict_array (34.8663,3.4215400000000002) 101 -dictGet test_01037.dict_array (34.8724,-0.375144) 101 -dictGet test_01037.dict_array (34.8795,3.29317) 101 -dictGet test_01037.dict_array (34.8823,1.21988) 101 -dictGet test_01037.dict_array (34.8834,1.07657) 101 -dictGet test_01037.dict_array (34.8837,0.157648) 101 -dictGet test_01037.dict_array (34.8871,-0.9755) 101 -dictGet test_01037.dict_array (34.8871,1.8943699999999999) 3501835 -dictGet test_01037.dict_array (34.889,3.36756) 101 -dictGet test_01037.dict_array (34.8907,1.24874) 101 -dictGet test_01037.dict_array (34.8965,3.13508) 101 -dictGet test_01037.dict_array (34.9042,2.62092) 101 -dictGet test_01037.dict_array (34.9055,-0.0448967) 101 -dictGet test_01037.dict_array (34.9122,0.110576) 101 -dictGet test_01037.dict_array (34.9228,3.60183) 101 -dictGet test_01037.dict_array (34.9237,1.21715) 101 -dictGet test_01037.dict_array (34.9296,1.70459) 3501835 -dictGet test_01037.dict_array (34.941,-1.14663) 101 -dictGet test_01037.dict_array (34.9448,1.18923) 101 -dictGet test_01037.dict_array (34.9462,3.81678) 101 -dictGet test_01037.dict_array (34.9466,0.593463) 101 -dictGet test_01037.dict_array (34.9485,0.150307) 101 -dictGet test_01037.dict_array (34.9542,0.487238) 101 -dictGet test_01037.dict_array (34.9559,2.03473) 3501835 -dictGet test_01037.dict_array (34.9671,-0.960225) 101 -dictGet test_01037.dict_array (34.9711,2.63444) 101 -dictGet test_01037.dict_array (34.9892,0.354775) 101 -dictGet test_01037.dict_array (34.9907,1.40724) 101 -dictGet test_01037.dict_array (34.9916,-0.00173097) 101 -dictGet test_01037.dict_array (34.9919,2.06167) 101 +dictGet dict_array (29.5699,2.50068) 101 +dictGet dict_array (29.5796,1.55456) 101 +dictGet dict_array (29.5796,2.36864) 101 +dictGet dict_array (29.5844,1.59626) 101 +dictGet dict_array (29.5886,4.03321) 101 +dictGet dict_array (29.5914,3.02628) 101 +dictGet dict_array (29.5926,-0.0965169) 101 +dictGet dict_array (29.5968,2.37773) 101 +dictGet dict_array (29.5984,0.755853) 101 +dictGet dict_array (29.6066,3.47173) 101 +dictGet dict_array (29.6085,-1.26007) 6489978 +dictGet dict_array (29.6131,0.246565) 101 +dictGet dict_array (29.6157,-0.266687) 101 +dictGet dict_array (29.6164,2.94674) 101 +dictGet dict_array (29.6195,-0.591941) 101 +dictGet dict_array (29.6231,1.54818) 101 +dictGet dict_array (29.6379,0.764114) 101 +dictGet dict_array (29.6462,-0.772059) 934530 +dictGet dict_array (29.6579,-1.07336) 6489978 +dictGet dict_array (29.6618,-0.271842) 101 +dictGet dict_array (29.6629,-0.303602) 101 +dictGet dict_array (29.6659,-0.782823) 934530 +dictGet dict_array (29.6736,-0.113832) 101 +dictGet dict_array (29.6759,3.02905) 101 +dictGet dict_array (29.6778,3.71898) 101 +dictGet dict_array (29.6796,1.10433) 101 +dictGet dict_array (29.6809,2.13677) 101 +dictGet dict_array (29.6935,4.11894) 101 +dictGet dict_array (29.6991,-1.4458199999999999) 101 +dictGet dict_array (29.6997,3.17297) 101 +dictGet dict_array (29.7043,3.6145899999999997) 101 +dictGet dict_array (29.7065,3.24885) 101 +dictGet dict_array (29.7126,0.28108) 101 +dictGet dict_array (29.7192,0.174273) 101 +dictGet dict_array (29.7217,-0.523481) 3501900 +dictGet dict_array (29.7271,1.67967) 101 +dictGet dict_array (29.7311,4.12444) 101 +dictGet dict_array (29.7347,1.88378) 101 +dictGet dict_array (29.7358,0.67944) 101 +dictGet dict_array (29.7366,-0.2973) 101 +dictGet dict_array (29.7446,0.646536) 101 +dictGet dict_array (29.7453,-0.567963) 3501900 +dictGet dict_array (29.764,4.04217) 101 +dictGet dict_array (29.7655,1.51372) 101 +dictGet dict_array (29.7744,1.12435) 101 +dictGet dict_array (29.7774,-0.0681196) 3501895 +dictGet dict_array (29.7784,1.54864) 101 +dictGet dict_array (29.7785,2.24139) 101 +dictGet dict_array (29.7922,0.220808) 101 +dictGet dict_array (29.7936,2.37709) 101 +dictGet dict_array (29.8008,0.948536) 101 +dictGet dict_array (29.8115,0.201227) 101 +dictGet dict_array (29.814,0.149601) 3501895 +dictGet dict_array (29.8193,-1.35858) 101 +dictGet dict_array (29.8201,0.965518) 101 +dictGet dict_array (29.8265,-0.727286) 3501900 +dictGet dict_array (29.8277,-0.531746) 3501900 +dictGet dict_array (29.8289,3.63009) 101 +dictGet dict_array (29.8548,0.838047) 101 +dictGet dict_array (29.8641,-0.845265) 3501900 +dictGet dict_array (29.8649,0.0562212) 3501895 +dictGet dict_array (29.8701,-1.02045) 934530 +dictGet dict_array (29.8733,2.76654) 101 +dictGet dict_array (29.876,0.555475) 101 +dictGet dict_array (29.8794,-0.800108) 3501900 +dictGet dict_array (29.8813,2.7426399999999997) 101 +dictGet dict_array (29.897100000000002,2.66193) 101 +dictGet dict_array (29.908,4.01339) 101 +dictGet dict_array (29.9165,-1.08246) 3501894 +dictGet dict_array (29.9201,-0.420861) 3498054 +dictGet dict_array (29.9217,3.03778) 101 +dictGet dict_array (29.9355,0.773833) 101 +dictGet dict_array (29.947,3.76517) 101 +dictGet dict_array (29.9518,-0.60557) 3498056 +dictGet dict_array (29.9564,-0.600163) 3498056 +dictGet dict_array (29.959600000000002,4.16591) 101 +dictGet dict_array (29.9615,-1.33708) 3501894 +dictGet dict_array (29.9699,-0.392375) 3498054 +dictGet dict_array (29.9776,1.04552) 101 +dictGet dict_array (29.9784,4.02756) 101 +dictGet dict_array (29.9819,4.00597) 101 +dictGet dict_array (29.9826,1.2816100000000001) 101 +dictGet dict_array (30.0026,2.76257) 101 +dictGet dict_array (30.0126,3.68255) 101 +dictGet dict_array (30.0131,0.796576) 3501892 +dictGet dict_array (30.018,1.16523) 101 +dictGet dict_array (30.0261,-0.210653) 3501896 +dictGet dict_array (30.0472,-1.11007) 3501894 +dictGet dict_array (30.0542,-0.479585) 3498054 +dictGet dict_array (30.0613,1.6278000000000001) 101 +dictGet dict_array (30.0617,-0.0551152) 3501895 +dictGet dict_array (30.0637,2.62066) 101 +dictGet dict_array (30.0721,1.6424400000000001) 101 +dictGet dict_array (30.0769,-0.402636) 3498054 +dictGet dict_array (30.0791,-0.277435) 3501896 +dictGet dict_array (30.0931,0.0327512) 3501895 +dictGet dict_array (30.1059,3.52623) 101 +dictGet dict_array (30.1103,0.865466) 3501892 +dictGet dict_array (30.1115,2.95243) 101 +dictGet dict_array (30.1144,1.71029) 101 +dictGet dict_array (30.1311,-0.864751) 3501899 +dictGet dict_array (30.1336,-0.851386) 3501899 +dictGet dict_array (30.1393,3.89901) 101 +dictGet dict_array (30.1456,-0.531898) 3498054 +dictGet dict_array (30.1492,2.07833) 101 +dictGet dict_array (30.1575,2.43856) 101 +dictGet dict_array (30.1682,1.19771) 101 +dictGet dict_array (30.1716,3.9853300000000003) 101 +dictGet dict_array (30.1849,2.78374) 101 +dictGet dict_array (30.1866,0.65658) 3498021 +dictGet dict_array (30.1885,1.56943) 101 +dictGet dict_array (30.1959,-1.38202) 101 +dictGet dict_array (30.1999,1.58413) 101 +dictGet dict_array (30.2024,0.713081) 3498021 +dictGet dict_array (30.2054,0.620143) 3498021 +dictGet dict_array (30.2091,1.51641) 101 +dictGet dict_array (30.2124,-0.331782) 3498031 +dictGet dict_array (30.226,3.03527) 101 +dictGet dict_array (30.2261,3.18486) 101 +dictGet dict_array (30.2288,2.48407) 101 +dictGet dict_array (30.2345,3.7462400000000002) 101 +dictGet dict_array (30.2375,0.62046) 3498021 +dictGet dict_array (30.2425,-0.472914) 3498054 +dictGet dict_array (30.247,3.95863) 101 +dictGet dict_array (30.2494,-0.305093) 3498031 +dictGet dict_array (30.2499,2.54337) 101 +dictGet dict_array (30.2606,2.16644) 101 +dictGet dict_array (30.2672,3.94847) 101 +dictGet dict_array (30.2709,-0.136264) 6088794 +dictGet dict_array (30.2764,1.18654) 101 +dictGet dict_array (30.2765,1.20383) 101 +dictGet dict_array (30.2839,1.05762) 3498024 +dictGet dict_array (30.286,0.469327) 3498021 +dictGet dict_array (30.2927,3.1693) 101 +dictGet dict_array (30.2935,3.49854) 101 +dictGet dict_array (30.307,0.312338) 3498021 +dictGet dict_array (30.3085,1.07791) 3498024 +dictGet dict_array (30.3139,2.77248) 101 +dictGet dict_array (30.314,0.822823) 3498024 +dictGet dict_array (30.3227,-0.587351) 3498055 +dictGet dict_array (30.332,1.00174) 3498024 +dictGet dict_array (30.3388,0.844148) 3498024 +dictGet dict_array (30.3485,0.561902) 3498021 +dictGet dict_array (30.3497,0.180362) 6489998 +dictGet dict_array (30.361,4.13016) 101 +dictGet dict_array (30.3623,-0.0484027) 6489998 +dictGet dict_array (30.3638,3.9845800000000002) 101 +dictGet dict_array (30.3853,3.16051) 101 +dictGet dict_array (30.3974,2.6617800000000003) 101 +dictGet dict_array (30.4002,-1.15886) 101 +dictGet dict_array (30.4008,-0.387015) 3498031 +dictGet dict_array (30.4018,1.86493) 101 +dictGet dict_array (30.4239,1.16818) 3498024 +dictGet dict_array (30.4363,3.63938) 101 +dictGet dict_array (30.4377,-0.81315) 3498063 +dictGet dict_array (30.4391,3.54703) 101 +dictGet dict_array (30.4424,-1.39435) 101 +dictGet dict_array (30.4441,2.8463000000000003) 101 +dictGet dict_array (30.4517,3.28117) 101 +dictGet dict_array (30.4658,2.6928) 101 +dictGet dict_array (30.4734,2.66161) 101 +dictGet dict_array (30.4799,-1.07578) 101 +dictGet dict_array (30.4837,-1.02486) 3501899 +dictGet dict_array (30.485,1.06326) 3498024 +dictGet dict_array (30.495,1.12306) 101 +dictGet dict_array (30.501,2.27264) 101 +dictGet dict_array (30.5027,1.99382) 101 +dictGet dict_array (30.5194,-1.03943) 3501893 +dictGet dict_array (30.5239,1.04328) 101 +dictGet dict_array (30.528,3.82041) 101 +dictGet dict_array (30.5299,-0.715248) 3498063 +dictGet dict_array (30.5331,1.19603) 101 +dictGet dict_array (30.535800000000002,2.71485) 101 +dictGet dict_array (30.5405,0.804694) 3498023 +dictGet dict_array (30.542,1.23739) 101 +dictGet dict_array (30.5432,4.04189) 101 +dictGet dict_array (30.5457,-0.956121) 3501893 +dictGet dict_array (30.5506,3.07443) 101 +dictGet dict_array (30.5539,3.87084) 101 +dictGet dict_array (30.5578,3.78837) 101 +dictGet dict_array (30.5588,0.966135) 3498022 +dictGet dict_array (30.5637,2.5605) 101 +dictGet dict_array (30.5647,-1.27328) 101 +dictGet dict_array (30.5656,-0.0581332) 6088794 +dictGet dict_array (30.5715,0.65755) 3498023 +dictGet dict_array (30.5727,3.01604) 101 +dictGet dict_array (30.5729,-0.976857) 3501893 +dictGet dict_array (30.5751,0.60204) 3498023 +dictGet dict_array (30.5854,3.02473) 101 +dictGet dict_array (30.5866,0.174099) 6489998 +dictGet dict_array (30.5947,0.875193) 3498023 +dictGet dict_array (30.5992,-0.403901) 3498063 +dictGet dict_array (30.6002,4.18891) 101 +dictGet dict_array (30.6025,0.217712) 6489998 +dictGet dict_array (30.6054,0.927203) 3498022 +dictGet dict_array (30.6075,3.79359) 101 +dictGet dict_array (30.6159,3.82773) 101 +dictGet dict_array (30.627,3.84039) 101 +dictGet dict_array (30.6308,0.77517) 3498023 +dictGet dict_array (30.6338,0.179565) 6489998 +dictGet dict_array (30.6461,1.3293599999999999) 101 +dictGet dict_array (30.6674,-0.424547) 3498063 +dictGet dict_array (30.669,1.76539) 101 +dictGet dict_array (30.6788,4.01239) 101 +dictGet dict_array (30.6864,3.59158) 101 +dictGet dict_array (30.7049,-0.875413) 3501893 +dictGet dict_array (30.705,1.3307) 101 +dictGet dict_array (30.7063,-0.473192) 3498063 +dictGet dict_array (30.7075,-1.1958199999999999) 101 +dictGet dict_array (30.7101,-0.367562) 3498012 +dictGet dict_array (30.7203,2.98725) 101 +dictGet dict_array (30.7213,2.2745699999999998) 101 +dictGet dict_array (30.7446,-0.334144) 3498012 +dictGet dict_array (30.7468,3.82967) 101 +dictGet dict_array (30.747,-0.384779) 3498012 +dictGet dict_array (30.7681,0.904198) 3498022 +dictGet dict_array (30.7757,1.78743) 101 +dictGet dict_array (30.8021,-0.479212) 3498012 +dictGet dict_array (30.8079,-1.40869) 101 +dictGet dict_array (30.8206,-0.0608489) 3498012 +dictGet dict_array (30.8218,0.43909) 3498023 +dictGet dict_array (30.8239,0.10014) 3498012 +dictGet dict_array (30.8282,4.15409) 101 +dictGet dict_array (30.8288,-0.709528) 3501893 +dictGet dict_array (30.8326,0.156011) 3498012 +dictGet dict_array (30.8328,-1.03704) 101 +dictGet dict_array (30.839,2.15528) 101 +dictGet dict_array (30.8452,0.219377) 3498013 +dictGet dict_array (30.8463,0.0515355) 3498012 +dictGet dict_array (30.8526,2.06614) 101 +dictGet dict_array (30.8566,0.517876) 3498023 +dictGet dict_array (30.8588,-1.31738) 101 +dictGet dict_array (30.8681,0.44207) 3498013 +dictGet dict_array (30.8914,1.0072) 3498022 +dictGet dict_array (30.897,0.483425) 3498013 +dictGet dict_array (30.905,2.8731999999999998) 3501793 +dictGet dict_array (30.9051,2.21956) 101 +dictGet dict_array (30.9115,4.00663) 101 +dictGet dict_array (30.9167,-0.834462) 3501893 +dictGet dict_array (30.9252,-1.3289900000000001) 101 +dictGet dict_array (30.9314,1.85384) 101 +dictGet dict_array (30.9392,2.53236) 3501827 +dictGet dict_array (30.9569,2.82038) 3501793 +dictGet dict_array (30.9598,-0.641011) 3498012 +dictGet dict_array (30.9601,-0.254928) 3498012 +dictGet dict_array (30.9623,-1.3886) 101 +dictGet dict_array (30.9707,0.888854) 3498022 +dictGet dict_array (30.9766,2.81957) 3501793 +dictGet dict_array (30.9775,2.69273) 3501793 +dictGet dict_array (30.9821,0.587715) 3498013 +dictGet dict_array (30.9887,4.0233) 101 +dictGet dict_array (30.9914,0.259542) 3498013 +dictGet dict_array (30.9986,-1.36832) 101 +dictGet dict_array (31.008,0.628999) 3498013 +dictGet dict_array (31.0168,-1.17462) 101 +dictGet dict_array (31.0237,3.52547) 3501821 +dictGet dict_array (31.0306,3.78522) 101 +dictGet dict_array (31.0308,-0.72453) 3501893 +dictGet dict_array (31.0463,2.41997) 3501825 +dictGet dict_array (31.047,0.624184) 3498013 +dictGet dict_array (31.0569,0.0706393) 3498015 +dictGet dict_array (31.0583,1.3244099999999999) 3501926 +dictGet dict_array (31.063,3.23861) 3501793 +dictGet dict_array (31.068,0.695575) 3498022 +dictGet dict_array (31.0687,1.85675) 101 +dictGet dict_array (31.0692,0.254793) 3498014 +dictGet dict_array (31.0766,0.828128) 3498022 +dictGet dict_array (31.0833,0.0612782) 3498015 +dictGet dict_array (31.0833,2.59748) 3501793 +dictGet dict_array (31.0861,-1.3778299999999999) 101 +dictGet dict_array (31.0874,3.07258) 3501793 +dictGet dict_array (31.0882,1.4882) 3501926 +dictGet dict_array (31.0924,3.42242) 3501821 +dictGet dict_array (31.0927,2.67448) 3501793 +dictGet dict_array (31.0936,1.12292) 3498022 +dictGet dict_array (31.0952,-0.336928) 3498012 +dictGet dict_array (31.0978,3.48482) 3501826 +dictGet dict_array (31.1107,3.7513199999999998) 3501826 +dictGet dict_array (31.1156,1.19171) 3501926 +dictGet dict_array (31.1176,0.223509) 3498015 +dictGet dict_array (31.1249,0.946838) 3498022 +dictGet dict_array (31.1267,1.48983) 3501926 +dictGet dict_array (31.138,-0.289981) 3501898 +dictGet dict_array (31.1382,3.02904) 3501793 +dictGet dict_array (31.1475,2.6178) 3501793 +dictGet dict_array (31.1491,1.37873) 3501926 +dictGet dict_array (31.1525,3.72105) 3501826 +dictGet dict_array (31.1526,-1.4129800000000001) 101 +dictGet dict_array (31.1526,-0.186457) 3501898 +dictGet dict_array (31.1539,2.78789) 3501793 +dictGet dict_array (31.1548,-1.08552) 101 +dictGet dict_array (31.1567,-0.0768925) 3501898 +dictGet dict_array (31.1613,1.49617) 3501926 +dictGet dict_array (31.1653,1.03777) 3498022 +dictGet dict_array (31.1662,3.4214700000000002) 3501826 +dictGet dict_array (31.1672,-0.0813169) 3501898 +dictGet dict_array (31.177,0.440843) 3498014 +dictGet dict_array (31.1788,-0.737151) 3501893 +dictGet dict_array (31.1856,-0.144396) 3501898 +dictGet dict_array (31.1959,3.66813) 3501826 +dictGet dict_array (31.1996,-0.353983) 3501898 +dictGet dict_array (31.2019,2.86802) 3501793 +dictGet dict_array (31.2087,2.31245) 3501825 +dictGet dict_array (31.2125,3.2713200000000002) 3501793 +dictGet dict_array (31.2137,-0.108129) 3501898 +dictGet dict_array (31.216,3.9156) 101 +dictGet dict_array (31.2201,-0.202141) 3501898 +dictGet dict_array (31.2285,2.09058) 101 +dictGet dict_array (31.2502,4.01526) 101 +dictGet dict_array (31.2585,3.11524) 3501793 +dictGet dict_array (31.2645,-0.620418) 3501890 +dictGet dict_array (31.2684,2.74277) 3501793 +dictGet dict_array (31.2821,-1.12772) 101 +dictGet dict_array (31.2821,2.46769) 3501825 +dictGet dict_array (31.2887,3.91396) 101 +dictGet dict_array (31.295,1.49942) 3501926 +dictGet dict_array (31.2997,3.46122) 3501826 +dictGet dict_array (31.3017,3.3263) 3501826 +dictGet dict_array (31.3022,3.16754) 3501793 +dictGet dict_array (31.3048,0.364962) 3498014 +dictGet dict_array (31.305,3.1967) 3501793 +dictGet dict_array (31.3061,1.84303) 101 +dictGet dict_array (31.3082,-0.173851) 3501898 +dictGet dict_array (31.3315,3.90932) 101 +dictGet dict_array (31.3351,2.80164) 3501793 +dictGet dict_array (31.3388,0.168765) 3498015 +dictGet dict_array (31.339,0.25535) 3498094 +dictGet dict_array (31.3423,1.7036799999999999) 3501926 +dictGet dict_array (31.349,0.386456) 3498014 +dictGet dict_array (31.3558,-1.04336) 101 +dictGet dict_array (31.3564,0.478876) 3498014 +dictGet dict_array (31.3607,-0.0860507) 3498015 +dictGet dict_array (31.3831,3.84469) 101 +dictGet dict_array (31.3886,-0.731137) 3501890 +dictGet dict_array (31.4043,-0.348907) 5457271 +dictGet dict_array (31.4081,1.47391) 3501926 +dictGet dict_array (31.4176,-0.583645) 5457271 +dictGet dict_array (31.4177,1.36972) 3501926 +dictGet dict_array (31.4182,0.958303) 3498022 +dictGet dict_array (31.4199,3.1738) 3501793 +dictGet dict_array (31.4221,2.74876) 3501825 +dictGet dict_array (31.4301,-0.122643) 3498015 +dictGet dict_array (31.4344,1.00661) 3498022 +dictGet dict_array (31.4375,4.20304) 101 +dictGet dict_array (31.4377,0.289608) 3498094 +dictGet dict_array (31.4379,0.54744) 3498014 +dictGet dict_array (31.4459,3.94945) 101 +dictGet dict_array (31.4559,-0.345063) 5457271 +dictGet dict_array (31.464,0.726129) 3498014 +dictGet dict_array (31.4662,-0.299019) 3498015 +dictGet dict_array (31.4671,1.9605299999999999) 3501794 +dictGet dict_array (31.4673,-0.403676) 5457271 +dictGet dict_array (31.4712,-0.237941) 3498015 +dictGet dict_array (31.4816,0.120264) 3498015 +dictGet dict_array (31.4875,0.323483) 3498014 +dictGet dict_array (31.490099999999998,-0.338163) 5457271 +dictGet dict_array (31.4932,0.517674) 3498014 +dictGet dict_array (31.5112,1.9689299999999998) 3501794 +dictGet dict_array (31.5122,2.92785) 3501791 +dictGet dict_array (31.5151,0.166429) 3498094 +dictGet dict_array (31.5174,2.94802) 3501791 +dictGet dict_array (31.5182,4.18776) 101 +dictGet dict_array (31.5238,1.18793) 3498003 +dictGet dict_array (31.5271,3.07446) 3501791 +dictGet dict_array (31.5393,1.58061) 3501794 +dictGet dict_array (31.5421,3.13711) 3501791 +dictGet dict_array (31.5479,2.39897) 3497970 +dictGet dict_array (31.5519,0.99285) 3498003 +dictGet dict_array (31.5685,3.47987) 3501824 +dictGet dict_array (31.5959,0.437382) 3498014 +dictGet dict_array (31.6003,0.194376) 3498094 +dictGet dict_array (31.6026,2.15457) 3501794 +dictGet dict_array (31.606,2.45365) 3497970 +dictGet dict_array (31.6062,-0.453441) 3501890 +dictGet dict_array (31.6107,1.35247) 3497974 +dictGet dict_array (31.6155,3.85588) 101 +dictGet dict_array (31.6222,2.03326) 3501794 +dictGet dict_array (31.6231,-0.123059) 3498083 +dictGet dict_array (31.6244,1.6885599999999998) 3497974 +dictGet dict_array (31.6459,0.669716) 3498014 +dictGet dict_array (31.6563,-0.0644741) 3498083 +dictGet dict_array (31.6618,-0.551121) 3501890 +dictGet dict_array (31.6725,-0.38922) 3498085 +dictGet dict_array (31.6727,4.10336) 101 +dictGet dict_array (31.6739,4.1391) 101 +dictGet dict_array (31.6897,2.8694699999999997) 3501792 +dictGet dict_array (31.6902,3.98792) 101 +dictGet dict_array (31.6945,2.46687) 3497970 +dictGet dict_array (31.6987,-1.3796) 101 +dictGet dict_array (31.7012,2.34845) 3497970 +dictGet dict_array (31.7036,0.0228348) 3501888 +dictGet dict_array (31.7046,3.68111) 3501824 +dictGet dict_array (31.7055,2.92556) 3501792 +dictGet dict_array (31.7102,1.04532) 3498003 +dictGet dict_array (31.7149,-0.443302) 3498085 +dictGet dict_array (31.7195,2.99311) 3501791 +dictGet dict_array (31.7274,0.166719) 3498094 +dictGet dict_array (31.7565,-0.565382) 3498085 +dictGet dict_array (31.7615,0.771626) 3498014 +dictGet dict_array (31.7739,1.8970099999999999) 3497974 +dictGet dict_array (31.7848,1.2623199999999999) 3498003 +dictGet dict_array (31.7912,-0.788599) 101 +dictGet dict_array (31.8011,2.65853) 3497970 +dictGet dict_array (31.8032,-0.0590108) 3501888 +dictGet dict_array (31.8038,1.9618799999999998) 3497974 +dictGet dict_array (31.8098,-1.46851) 101 +dictGet dict_array (31.8131,3.41982) 3501791 +dictGet dict_array (31.8169,3.31059) 3501791 +dictGet dict_array (31.8202,-0.193692) 3501888 +dictGet dict_array (31.8306,1.57586) 3497974 +dictGet dict_array (31.8382,-0.787948) 101 +dictGet dict_array (31.8433,2.49692) 3497970 +dictGet dict_array (31.8436,2.41851) 3497970 +dictGet dict_array (31.8563,-1.10787) 101 +dictGet dict_array (31.8683,0.996504) 3498002 +dictGet dict_array (31.8693,-0.828142) 101 +dictGet dict_array (31.8723,1.08929) 3498003 +dictGet dict_array (31.8737,0.881127) 3498002 +dictGet dict_array (31.8881,-0.58441) 101 +dictGet dict_array (31.9011,0.121349) 3498094 +dictGet dict_array (31.9066,2.13045) 3497965 +dictGet dict_array (31.9142,1.03368) 3498002 +dictGet dict_array (31.9155,3.38363) 3501791 +dictGet dict_array (31.9168,1.3166) 3498004 +dictGet dict_array (31.9185,-1.11879) 101 +dictGet dict_array (31.9186,-0.647948) 101 +dictGet dict_array (31.9311,3.96928) 101 +dictGet dict_array (31.9335,1.47048) 3497974 +dictGet dict_array (31.9443,-1.36175) 101 +dictGet dict_array (31.9481,2.34231) 3497970 +dictGet dict_array (31.9526,1.36565) 3498004 +dictGet dict_array (31.9629,2.5208399999999997) 3497970 +dictGet dict_array (31.9765,0.975783) 3498002 +dictGet dict_array (31.9923,3.31773) 3501791 +dictGet dict_array (31.9994,0.972816) 3498002 +dictGet dict_array (32.001,3.47425) 3501791 +dictGet dict_array (32.0127,2.13874) 3497965 +dictGet dict_array (32.0244,3.2092) 3501792 +dictGet dict_array (32.029,1.18039) 3498004 +dictGet dict_array (32.0315,0.566073) 3498095 +dictGet dict_array (32.0354,1.0766499999999999) 3498004 +dictGet dict_array (32.0399,-1.11576) 101 +dictGet dict_array (32.053,2.16849) 3497965 +dictGet dict_array (32.0542,0.042328) 3498096 +dictGet dict_array (32.0576,2.47001) 3497970 +dictGet dict_array (32.061,3.7498899999999997) 101 +dictGet dict_array (32.0623,1.25134) 3498004 +dictGet dict_array (32.0626,1.9611399999999999) 3497965 +dictGet dict_array (32.0666,-0.0904247) 3498096 +dictGet dict_array (32.0681,2.28442) 3497970 +dictGet dict_array (32.0692,1.50869) 3497981 +dictGet dict_array (32.0724,4.03314) 101 +dictGet dict_array (32.0729,-0.064324) 101 +dictGet dict_array (32.079,0.293758) 3498094 +dictGet dict_array (32.0847,-1.19814) 101 +dictGet dict_array (32.0974,-0.91927) 101 +dictGet dict_array (32.0979,-0.736979) 101 +dictGet dict_array (32.106,-1.33063) 101 +dictGet dict_array (32.1189,0.246715) 3498094 +dictGet dict_array (32.1207,4.00883) 101 +dictGet dict_array (32.1396,1.12402) 3498004 +dictGet dict_array (32.1413,1.5668) 3497981 +dictGet dict_array (32.143,1.35559) 3498004 +dictGet dict_array (32.1538,1.32881) 3498004 +dictGet dict_array (32.1549,4.06552) 101 +dictGet dict_array (32.1555,-0.79275) 101 +dictGet dict_array (32.163,1.17733) 3498004 +dictGet dict_array (32.1634,2.94273) 3501792 +dictGet dict_array (32.1644,1.85666) 3497965 +dictGet dict_array (32.1745,0.435458) 3498095 +dictGet dict_array (32.1765,1.65149) 3497981 +dictGet dict_array (32.1893,2.08924) 3497965 +dictGet dict_array (32.2024,0.222191) 3498093 +dictGet dict_array (32.2107,1.34379) 3497981 +dictGet dict_array (32.2109,3.9018699999999997) 101 +dictGet dict_array (32.2123,1.85233) 3497965 +dictGet dict_array (32.2144,3.72534) 101 +dictGet dict_array (32.2218,2.5386699999999998) 3497970 +dictGet dict_array (32.2279,2.84267) 3497245 +dictGet dict_array (32.2345,3.33295) 3501792 +dictGet dict_array (32.2435,3.85283) 101 +dictGet dict_array (32.2527,-0.480608) 101 +dictGet dict_array (32.2566,-0.837882) 101 +dictGet dict_array (32.2627,2.57708) 3497970 +dictGet dict_array (32.2733,0.244931) 3498096 +dictGet dict_array (32.2761,4.05808) 101 +dictGet dict_array (32.2764,3.78472) 101 +dictGet dict_array (32.2814,-1.26011) 101 +dictGet dict_array (32.2861,3.02427) 3497245 +dictGet dict_array (32.2924,0.928609) 3498004 +dictGet dict_array (32.2963,-0.78543) 101 +dictGet dict_array (32.3039,3.21175) 3501792 +dictGet dict_array (32.3107,0.698287) 3498004 +dictGet dict_array (32.3138,0.0595677) 3498106 +dictGet dict_array (32.3339,0.707056) 3498004 +dictGet dict_array (32.3351,0.415474) 3498106 +dictGet dict_array (32.342,-0.681023) 101 +dictGet dict_array (32.3463,1.83196) 3497126 +dictGet dict_array (32.3494,2.43799) 3497114 +dictGet dict_array (32.3524,3.47049) 3501822 +dictGet dict_array (32.3531,2.33115) 3497114 +dictGet dict_array (32.3602,0.116106) 3498106 +dictGet dict_array (32.3612,1.1598) 3498004 +dictGet dict_array (32.3689,3.34847) 3501822 +dictGet dict_array (32.3695,0.734055) 3498004 +dictGet dict_array (32.3825,3.85017) 101 +dictGet dict_array (32.3835,-1.25491) 101 +dictGet dict_array (32.4018,-0.728568) 101 +dictGet dict_array (32.4044,2.96727) 3497245 +dictGet dict_array (32.4101,2.9988) 3497245 +dictGet dict_array (32.417,-1.12908) 101 +dictGet dict_array (32.4172,4.1952) 101 +dictGet dict_array (32.4239,2.49512) 3497245 +dictGet dict_array (32.4258,4.05137) 101 +dictGet dict_array (32.4264,-0.427357) 101 +dictGet dict_array (32.4274,3.59377) 3501822 +dictGet dict_array (32.4286,-1.24757) 101 +dictGet dict_array (32.4294,3.0665) 3497245 +dictGet dict_array (32.4333,-0.353347) 101 +dictGet dict_array (32.4391,3.64421) 3501822 +dictGet dict_array (32.4401,3.70635) 3501822 +dictGet dict_array (32.45,1.68918) 3497126 +dictGet dict_array (32.4507,-0.133471) 101 +dictGet dict_array (32.4592,0.976458) 3498105 +dictGet dict_array (32.4595,1.89135) 3497126 +dictGet dict_array (32.4604,0.280248) 3498106 +dictGet dict_array (32.4835,0.472731) 3498106 +dictGet dict_array (32.4855,2.01938) 3497126 +dictGet dict_array (32.4872,2.01697) 3497126 +dictGet dict_array (32.4911,0.613106) 3498105 +dictGet dict_array (32.4918,2.17834) 3497114 +dictGet dict_array (32.4947,2.34595) 3497114 +dictGet dict_array (32.5035,2.92234) 3497245 +dictGet dict_array (32.5132,-0.331206) 101 +dictGet dict_array (32.5156,-0.412604) 3501887 +dictGet dict_array (32.5158,2.9067499999999997) 3497245 +dictGet dict_array (32.5249,2.44519) 3497114 +dictGet dict_array (32.5293,-0.790952) 101 +dictGet dict_array (32.5319,3.96854) 101 +dictGet dict_array (32.5518,3.6093) 3501822 +dictGet dict_array (32.5541,3.5225400000000002) 3501822 +dictGet dict_array (32.5569,0.816123) 3498105 +dictGet dict_array (32.5646,1.9775) 3497126 +dictGet dict_array (32.5733,3.81271) 101 +dictGet dict_array (32.5767,0.948327) 3498105 +dictGet dict_array (32.5971,1.76179) 3497126 +dictGet dict_array (32.6035,-0.716157) 101 +dictGet dict_array (32.6087,4.21614) 101 +dictGet dict_array (32.6171,0.024481) 101 +dictGet dict_array (32.6189,-0.775391) 101 +dictGet dict_array (32.6198,2.92081) 3497167 +dictGet dict_array (32.621,-0.970784) 101 +dictGet dict_array (32.6266,0.650009) 3498105 +dictGet dict_array (32.6315,2.15144) 3497126 +dictGet dict_array (32.6385,-0.436803) 101 +dictGet dict_array (32.6449,-0.191292) 101 +dictGet dict_array (32.6535,2.10385) 3497126 +dictGet dict_array (32.6592,3.49973) 3501822 +dictGet dict_array (32.6598,2.5980600000000003) 3497114 +dictGet dict_array (32.6612,2.95681) 3497167 +dictGet dict_array (32.6636,-0.57235) 101 +dictGet dict_array (32.669,-0.382702) 101 +dictGet dict_array (32.6752,1.30748) 3497981 +dictGet dict_array (32.6811,2.9559800000000003) 3497167 +dictGet dict_array (32.6821,0.57336) 3498105 +dictGet dict_array (32.6828,3.91304) 101 +dictGet dict_array (32.6979,3.96868) 101 +dictGet dict_array (32.6983,3.15784) 3497167 +dictGet dict_array (32.7122,0.794293) 3498105 +dictGet dict_array (32.7131,-0.847256) 101 +dictGet dict_array (32.7219,0.883461) 3498105 +dictGet dict_array (32.7228,1.78808) 3497126 +dictGet dict_array (32.7273,-0.206908) 101 +dictGet dict_array (32.7292,0.259331) 3501889 +dictGet dict_array (32.7304,-1.38317) 101 +dictGet dict_array (32.7353,1.01601) 3498105 +dictGet dict_array (32.7354,4.17574) 101 +dictGet dict_array (32.7357,-0.190194) 101 +dictGet dict_array (32.7465,-1.37598) 101 +dictGet dict_array (32.7494,-0.275675) 101 +dictGet dict_array (32.7514,0.128951) 3501889 +dictGet dict_array (32.753,3.44207) 3501822 +dictGet dict_array (32.7686,2.11713) 3497126 +dictGet dict_array (32.7694,1.47159) 3497388 +dictGet dict_array (32.7768,0.0401042) 101 +dictGet dict_array (32.781,-1.34283) 101 +dictGet dict_array (32.7814,1.73876) 3497388 +dictGet dict_array (32.7856,-1.06363) 101 +dictGet dict_array (32.792699999999996,-1.1255600000000001) 101 +dictGet dict_array (32.7941,-0.645447) 101 +dictGet dict_array (32.7946,1.48889) 3497388 +dictGet dict_array (32.797,0.791753) 3501889 +dictGet dict_array (32.7982,-0.537798) 101 +dictGet dict_array (32.8091,2.3611) 3490438 +dictGet dict_array (32.81,1.7130800000000002) 3497388 +dictGet dict_array (32.8174,-0.288322) 101 +dictGet dict_array (32.823,1.6546699999999999) 3497388 +dictGet dict_array (32.8233,1.62108) 3497388 +dictGet dict_array (32.8428,-0.400045) 101 +dictGet dict_array (32.8479,2.13598) 3490438 +dictGet dict_array (32.8524,0.199902) 3501889 +dictGet dict_array (32.8543,3.23553) 3501820 +dictGet dict_array (32.8562,1.31371) 3498117 +dictGet dict_array (32.87,1.44256) 3498117 +dictGet dict_array (32.8789,2.38192) 3490438 +dictGet dict_array (32.8812,2.20734) 3497128 +dictGet dict_array (32.8815,-0.54427) 101 +dictGet dict_array (32.8853,2.4859) 3497128 +dictGet dict_array (32.8909,0.513964) 3501889 +dictGet dict_array (32.9035,2.38999) 3490438 +dictGet dict_array (32.9097,2.48131) 3497128 +dictGet dict_array (32.928,-0.943269) 101 +dictGet dict_array (32.9322,1.13165) 3498104 +dictGet dict_array (32.9348,1.22606) 3498117 +dictGet dict_array (32.9417,3.77998) 3501822 +dictGet dict_array (32.9428,3.11936) 3497167 +dictGet dict_array (32.9482,1.18092) 3498118 +dictGet dict_array (32.9506,0.0609364) 101 +dictGet dict_array (32.953,-0.828308) 101 +dictGet dict_array (32.9593,3.5209099999999998) 3501822 +dictGet dict_array (32.9617,2.07711) 3497128 +dictGet dict_array (32.966,0.693749) 3498104 +dictGet dict_array (32.9668,-0.716432) 101 +dictGet dict_array (32.9702,1.98555) 3497127 +dictGet dict_array (32.9782,1.73819) 3497388 +dictGet dict_array (32.9805,3.71151) 3501822 +dictGet dict_array (32.9821,2.97225) 3497167 +dictGet dict_array (32.995,-0.830301) 101 +dictGet dict_array (33.0234,0.770848) 3498104 +dictGet dict_array (33.0312,-0.340964) 101 +dictGet dict_array (33.0366,-0.756795) 101 +dictGet dict_array (33.0438,0.812871) 3498118 +dictGet dict_array (33.0455,1.84843) 3497127 +dictGet dict_array (33.0498,0.0913292) 101 +dictGet dict_array (33.0506,1.53739) 3497364 +dictGet dict_array (33.0554,2.4265) 3497363 +dictGet dict_array (33.0741,3.61332) 3501822 +dictGet dict_array (33.0765,-0.179985) 101 +dictGet dict_array (33.087,1.46465) 3497399 +dictGet dict_array (33.0906,-0.620383) 101 +dictGet dict_array (33.1047,-1.28027) 101 +dictGet dict_array (33.1072,1.96303) 3497127 +dictGet dict_array (33.1081,-0.897874) 101 +dictGet dict_array (33.1122,1.8950200000000001) 3497127 +dictGet dict_array (33.1237,2.63993) 3497165 +dictGet dict_array (33.1238,0.753963) 3498118 +dictGet dict_array (33.1257,0.495668) 3498102 +dictGet dict_array (33.1258,1.78341) 3497364 +dictGet dict_array (33.127,2.59646) 3497166 +dictGet dict_array (33.1324,-1.23742) 101 +dictGet dict_array (33.1359,3.83491) 101 +dictGet dict_array (33.1628,-0.379588) 101 +dictGet dict_array (33.1679,1.25601) 3498117 +dictGet dict_array (33.1688,-1.35553) 101 +dictGet dict_array (33.181,2.10943) 3497363 +dictGet dict_array (33.1871,2.81171) 3497165 +dictGet dict_array (33.1877,0.771297) 3498118 +dictGet dict_array (33.1883,-0.204797) 101 +dictGet dict_array (33.1886,3.27998) 3501820 +dictGet dict_array (33.1955,0.708907) 3498118 +dictGet dict_array (33.2044,-0.769275) 101 +dictGet dict_array (33.2182,3.36103) 3501820 +dictGet dict_array (33.2192,3.43586) 3501822 +dictGet dict_array (33.2322,-0.916753) 101 +dictGet dict_array (33.2359,-0.81321) 101 +dictGet dict_array (33.238,0.635072) 3498111 +dictGet dict_array (33.2398,3.02588) 3497165 +dictGet dict_array (33.2469,2.35698) 3497363 +dictGet dict_array (33.247,2.3327) 3497363 +dictGet dict_array (33.2579,2.8027100000000003) 3497165 +dictGet dict_array (33.2607,0.321082) 101 +dictGet dict_array (33.2653,0.243336) 101 +dictGet dict_array (33.2758,0.831836) 3498118 +dictGet dict_array (33.2771,0.886536) 3498118 +dictGet dict_array (33.2914,1.16026) 3498117 +dictGet dict_array (33.2914,1.38882) 3497399 +dictGet dict_array (33.2982,-1.16604) 101 +dictGet dict_array (33.2985,0.842556) 3498112 +dictGet dict_array (33.3005,2.8338900000000002) 3497165 +dictGet dict_array (33.305,0.0969475) 101 +dictGet dict_array (33.3072,3.82163) 101 +dictGet dict_array (33.312,3.41475) 3501820 +dictGet dict_array (33.3129,2.46048) 3497166 +dictGet dict_array (33.3134,3.46863) 3501820 +dictGet dict_array (33.3203,2.33139) 3497166 +dictGet dict_array (33.324,0.433701) 101 +dictGet dict_array (33.3338,2.44705) 3497166 +dictGet dict_array (33.337,4.06475) 101 +dictGet dict_array (33.3469,1.08172) 3498126 +dictGet dict_array (33.3538,0.717896) 3498112 +dictGet dict_array (33.3618,1.37899) 3497399 +dictGet dict_array (33.3698,0.547744) 3501862 +dictGet dict_array (33.3705,0.957619) 3498112 +dictGet dict_array (33.3821,3.07258) 3497165 +dictGet dict_array (33.3881,3.0626) 3497165 +dictGet dict_array (33.393,-0.816186) 101 +dictGet dict_array (33.3945,0.869508) 3498110 +dictGet dict_array (33.4001,1.24186) 3498117 +dictGet dict_array (33.4008,2.34911) 3497166 +dictGet dict_array (33.4166,-1.2808899999999999) 101 +dictGet dict_array (33.4167,3.0655) 3497165 +dictGet dict_array (33.4204,2.81887) 3497165 +dictGet dict_array (33.4211,1.71128) 3497400 +dictGet dict_array (33.4237,2.91761) 3497165 +dictGet dict_array (33.4266,1.5955599999999999) 3497399 +dictGet dict_array (33.4353,-0.391392) 101 +dictGet dict_array (33.4362,-0.134658) 101 +dictGet dict_array (33.4386,0.15396) 101 +dictGet dict_array (33.4421,-0.50712) 101 +dictGet dict_array (33.452,0.915829) 3498126 +dictGet dict_array (33.463,-0.0882717) 101 +dictGet dict_array (33.464,-1.00949) 101 +dictGet dict_array (33.4692,0.954092) 3498126 +dictGet dict_array (33.4716,1.9538799999999998) 3497400 +dictGet dict_array (33.4756,1.85836) 3497400 +dictGet dict_array (33.4859,4.0751) 101 +dictGet dict_array (33.4899,3.54193) 3501820 +dictGet dict_array (33.4935,3.49794) 3501820 +dictGet dict_array (33.494,-0.983356) 101 +dictGet dict_array (33.4955,-1.28128) 101 +dictGet dict_array (33.4965,-0.278687) 101 +dictGet dict_array (33.4991,0.647491) 3498110 +dictGet dict_array (33.5076,2.2272) 3497424 +dictGet dict_array (33.5079,-0.498199) 101 +dictGet dict_array (33.5157,0.535034) 3501862 +dictGet dict_array (33.5171,2.49677) 3497166 +dictGet dict_array (33.5255,2.4447200000000002) 3497166 +dictGet dict_array (33.526,4.01194) 101 +dictGet dict_array (33.5288,0.789434) 3498110 +dictGet dict_array (33.5356,-1.17671) 101 +dictGet dict_array (33.5402,1.49152) 3497399 +dictGet dict_array (33.5418,3.45757) 3501820 +dictGet dict_array (33.5428,1.90712) 3497400 +dictGet dict_array (33.5556,-0.55741) 101 +dictGet dict_array (33.5564,0.876858) 3498128 +dictGet dict_array (33.5567,-0.10208) 101 +dictGet dict_array (33.5645,-0.124824) 101 +dictGet dict_array (33.5663,3.4872) 3501820 +dictGet dict_array (33.5716,-0.0107611) 101 +dictGet dict_array (33.578,3.55714) 3501820 +dictGet dict_array (33.5826,-0.49076) 101 +dictGet dict_array (33.5909,0.773737) 3498110 +dictGet dict_array (33.5958,2.9619999999999997) 3497425 +dictGet dict_array (33.6193,-0.919755) 101 +dictGet dict_array (33.6313,0.652132) 3498110 +dictGet dict_array (33.632,0.823351) 3498128 +dictGet dict_array (33.66,2.18998) 3497424 +dictGet dict_array (33.6621,0.535395) 3498135 +dictGet dict_array (33.6726,3.19367) 3497438 +dictGet dict_array (33.6912,1.74522) 3497400 +dictGet dict_array (33.705,0.706397) 3498135 +dictGet dict_array (33.7076,0.7622) 3498128 +dictGet dict_array (33.7112,1.70187) 3497400 +dictGet dict_array (33.7246,-1.14837) 101 +dictGet dict_array (33.7326,2.62413) 3497425 +dictGet dict_array (33.7332,2.82137) 3497425 +dictGet dict_array (33.7434,0.394672) 3498135 +dictGet dict_array (33.7443,1.54557) 3497398 +dictGet dict_array (33.7506,1.57317) 3497398 +dictGet dict_array (33.7526,1.8578999999999999) 3497424 +dictGet dict_array (33.766,4.15013) 101 +dictGet dict_array (33.7834,2.41789) 3497439 +dictGet dict_array (33.7864,0.230935) 101 +dictGet dict_array (33.7965,3.05709) 3497438 +dictGet dict_array (33.7998,3.32881) 3497438 +dictGet dict_array (33.8003,2.97338) 3497425 +dictGet dict_array (33.8007,-1.08962) 101 +dictGet dict_array (33.8022,-0.139488) 101 +dictGet dict_array (33.8065,2.70857) 3497425 +dictGet dict_array (33.8169,-0.607788) 101 +dictGet dict_array (33.8203,0.108512) 3501863 +dictGet dict_array (33.8231,-1.03449) 101 +dictGet dict_array (33.8312,3.49458) 3501829 +dictGet dict_array (33.8342,0.297518) 3501863 +dictGet dict_array (33.8352,0.165872) 101 +dictGet dict_array (33.8354,1.87277) 3497424 +dictGet dict_array (33.8371,1.60103) 3497398 +dictGet dict_array (33.8387,1.9968) 3497424 +dictGet dict_array (33.8403,3.5805) 3501829 +dictGet dict_array (33.8414,-0.703067) 101 +dictGet dict_array (33.844,-0.179472) 101 +dictGet dict_array (33.8468,3.40137) 3501829 +dictGet dict_array (33.8509,4.15334) 101 +dictGet dict_array (33.8539,2.38339) 3497439 +dictGet dict_array (33.858,-1.3122500000000001) 101 +dictGet dict_array (33.859,3.72626) 3501829 +dictGet dict_array (33.8616,2.24433) 3497424 +dictGet dict_array (33.8621,3.01035) 3497438 +dictGet dict_array (33.8623,1.17559) 3498129 +dictGet dict_array (33.8682,2.706) 3497425 +dictGet dict_array (33.8684,0.189231) 3501863 +dictGet dict_array (33.872,1.93574) 3497424 +dictGet dict_array (33.8844,3.80404) 3501829 +dictGet dict_array (33.8888,0.594884) 3498135 +dictGet dict_array (33.8946,2.74161) 3497438 +dictGet dict_array (33.9023,0.6239) 3498135 +dictGet dict_array (33.9057,0.873222) 3498136 +dictGet dict_array (33.9157,-1.26607) 101 +dictGet dict_array (33.92,2.06848) 3497397 +dictGet dict_array (33.9298,-0.00526229) 101 +dictGet dict_array (33.932,3.07063) 3497438 +dictGet dict_array (33.9322,0.629385) 3501864 +dictGet dict_array (33.9367,-1.41955) 101 +dictGet dict_array (33.937,1.42532) 3498173 +dictGet dict_array (33.9375,1.1467100000000001) 3498159 +dictGet dict_array (33.9434,-1.05739) 101 +dictGet dict_array (33.9477,3.34809) 3501829 +dictGet dict_array (33.95,2.21715) 3497397 +dictGet dict_array (33.955799999999996,0.305176) 3501859 +dictGet dict_array (33.9686,-0.28273) 101 +dictGet dict_array (33.9703,4.1255) 3501829 +dictGet dict_array (33.9707,3.08199) 3497438 +dictGet dict_array (33.9754,1.06203) 3498159 +dictGet dict_array (33.9757,3.72468) 3501829 +dictGet dict_array (33.9775,-0.0440599) 101 +dictGet dict_array (33.9777,-0.251484) 101 +dictGet dict_array (33.9789,-0.339374) 101 +dictGet dict_array (33.9849,2.54515) 3497425 +dictGet dict_array (33.9885,-0.318557) 101 +dictGet dict_array (33.9977,1.07175) 3498159 +dictGet dict_array (33.9984,-0.700517) 101 +dictGet dict_array (34.0149,3.53338) 3501829 +dictGet dict_array (34.0173,3.39155) 3501829 +dictGet dict_array (34.0317,3.9579) 3501829 +dictGet dict_array (34.0369,3.83612) 3501829 +dictGet dict_array (34.043,-0.0887221) 101 +dictGet dict_array (34.0487,1.14252) 3498159 +dictGet dict_array (34.052,1.74832) 3497397 +dictGet dict_array (34.0711,-0.898071) 101 +dictGet dict_array (34.0747,1.55057) 3498173 +dictGet dict_array (34.0803,3.16763) 3497438 +dictGet dict_array (34.0872,3.75555) 3501829 +dictGet dict_array (34.0965,1.62038) 3498173 +dictGet dict_array (34.0977,-0.412691) 101 +dictGet dict_array (34.0986,0.0294206) 101 +dictGet dict_array (34.1072,3.15823) 3497438 +dictGet dict_array (34.1092,3.09599) 3497438 +dictGet dict_array (34.1206,1.04637) 3498160 +dictGet dict_array (34.1209,3.13826) 3497438 +dictGet dict_array (34.1265,3.95881) 3501829 +dictGet dict_array (34.1286,-0.539319) 101 +dictGet dict_array (34.1358,3.67451) 3501829 +dictGet dict_array (34.1428,0.136115) 101 +dictGet dict_array (34.157,1.73522) 3497397 +dictGet dict_array (34.1581,1.48001) 3498172 +dictGet dict_array (34.1682,3.42373) 3501829 +dictGet dict_array (34.1683,-1.26511) 101 +dictGet dict_array (34.1684,4.20007) 101 +dictGet dict_array (34.1854,3.32089) 3501829 +dictGet dict_array (34.2022,0.749536) 3501864 +dictGet dict_array (34.2044,3.04865) 3497438 +dictGet dict_array (34.22,-0.500055) 101 +dictGet dict_array (34.2249,0.743775) 3501864 +dictGet dict_array (34.2254,1.34702) 3498172 +dictGet dict_array (34.2355,-0.898843) 101 +dictGet dict_array (34.2394,2.0203699999999998) 3497439 +dictGet dict_array (34.2466,1.83785) 3498251 +dictGet dict_array (34.247,4.09563) 101 +dictGet dict_array (34.2508,2.61312) 3497439 +dictGet dict_array (34.2517,1.69642) 3498251 +dictGet dict_array (34.2564,4.13033) 101 +dictGet dict_array (34.2574,4.18928) 101 +dictGet dict_array (34.2614,-0.478719) 101 +dictGet dict_array (34.2625,2.38088) 3497439 +dictGet dict_array (34.2666,3.1503) 3501829 +dictGet dict_array (34.271,4.02223) 101 +dictGet dict_array (34.2727,0.514755) 101 +dictGet dict_array (34.278,1.98929) 3497439 +dictGet dict_array (34.2798,-0.199208) 101 +dictGet dict_array (34.2804,2.05184) 3497439 +dictGet dict_array (34.2945,-1.11051) 101 +dictGet dict_array (34.3168,-0.0829721) 101 +dictGet dict_array (34.3345,3.4358) 3501829 +dictGet dict_array (34.3377,1.13527) 3498162 +dictGet dict_array (34.3383,1.27891) 3498161 +dictGet dict_array (34.3391,1.47945) 3498161 +dictGet dict_array (34.3441,0.627014) 101 +dictGet dict_array (34.347,2.4853) 3497439 +dictGet dict_array (34.3514,2.16247) 3497439 +dictGet dict_array (34.3627,2.64533) 3497439 +dictGet dict_array (34.3682,-0.227501) 101 +dictGet dict_array (34.3756,4.21248) 101 +dictGet dict_array (34.379,3.96604) 101 +dictGet dict_array (34.3827,1.7518) 3498251 +dictGet dict_array (34.3912,2.8834) 3501830 +dictGet dict_array (34.3919,0.668829) 101 +dictGet dict_array (34.3949,2.00338) 3497439 +dictGet dict_array (34.3987,0.557268) 101 +dictGet dict_array (34.4111,0.768558) 101 +dictGet dict_array (34.4119,2.8742) 3501830 +dictGet dict_array (34.416,3.50841) 3501829 +dictGet dict_array (34.4212,1.24916) 3498161 +dictGet dict_array (34.4251,0.457029) 101 +dictGet dict_array (34.4274,-0.902559) 101 +dictGet dict_array (34.4325,4.03159) 101 +dictGet dict_array (34.438,1.63994) 3498251 +dictGet dict_array (34.4403,-0.177594) 101 +dictGet dict_array (34.4421,0.726712) 101 +dictGet dict_array (34.4517,2.98611) 3501830 +dictGet dict_array (34.4658,-1.312) 101 +dictGet dict_array (34.4732,-0.0681338) 101 +dictGet dict_array (34.4752,2.81646) 3501830 +dictGet dict_array (34.4914,2.3858) 3497439 +dictGet dict_array (34.4923,0.855231) 101 +dictGet dict_array (34.5235,1.78468) 3498251 +dictGet dict_array (34.5305,4.10608) 101 +dictGet dict_array (34.5389,0.621937) 101 +dictGet dict_array (34.5406,3.17145) 101 +dictGet dict_array (34.5434,-0.56306) 101 +dictGet dict_array (34.5449,3.13311) 3501829 +dictGet dict_array (34.5491,2.31572) 3497439 +dictGet dict_array (34.5539,2.94028) 3501830 +dictGet dict_array (34.5546,-0.208825) 101 +dictGet dict_array (34.5549,3.78486) 101 +dictGet dict_array (34.5676,0.307148) 101 +dictGet dict_array (34.5743,1.5217399999999999) 3501838 +dictGet dict_array (34.5775,3.48046) 101 +dictGet dict_array (34.5815,2.5243700000000002) 3501830 +dictGet dict_array (34.5841,4.21191) 101 +dictGet dict_array (34.5887,2.65083) 3501830 +dictGet dict_array (34.5937,3.2143) 101 +dictGet dict_array (34.6013,-1.0612) 101 +dictGet dict_array (34.6089,1.36066) 3501838 +dictGet dict_array (34.6103,3.40227) 101 +dictGet dict_array (34.6128,1.92276) 3498251 +dictGet dict_array (34.6175,2.43627) 3498251 +dictGet dict_array (34.6209,3.43776) 101 +dictGet dict_array (34.6234,2.60237) 3501830 +dictGet dict_array (34.6275,3.52479) 101 +dictGet dict_array (34.635,0.568558) 101 +dictGet dict_array (34.6373,2.37692) 3498251 +dictGet dict_array (34.6375,3.52234) 101 +dictGet dict_array (34.6426,2.12397) 3498251 +dictGet dict_array (34.6513,2.80915) 3501830 +dictGet dict_array (34.6632,2.30039) 3498251 +dictGet dict_array (34.6691,1.86582) 3498251 +dictGet dict_array (34.6739,0.15342) 101 +dictGet dict_array (34.6825,0.0499679) 101 +dictGet dict_array (34.6893,0.454326) 101 +dictGet dict_array (34.6957,-0.358598) 101 +dictGet dict_array (34.6986,0.562679) 101 +dictGet dict_array (34.712,1.12114) 101 +dictGet dict_array (34.7126,-0.0057301) 101 +dictGet dict_array (34.7137,0.0248501) 101 +dictGet dict_array (34.7162,1.15623) 101 +dictGet dict_array (34.7258,3.95142) 101 +dictGet dict_array (34.7347,3.5232099999999997) 101 +dictGet dict_array (34.7363,2.23374) 3501830 +dictGet dict_array (34.7375,0.397841) 101 +dictGet dict_array (34.7423,3.09198) 101 +dictGet dict_array (34.7452,3.09029) 101 +dictGet dict_array (34.7539,-1.06943) 101 +dictGet dict_array (34.7733,-0.00912717) 101 +dictGet dict_array (34.774,2.71088) 3501830 +dictGet dict_array (34.7771,1.46009) 3501835 +dictGet dict_array (34.7782,-1.28308) 101 +dictGet dict_array (34.7924,3.63564) 101 +dictGet dict_array (34.7939,-0.416676) 101 +dictGet dict_array (34.7964,-0.401773) 101 +dictGet dict_array (34.7974,0.0286873) 101 +dictGet dict_array (34.7975,3.05965) 101 +dictGet dict_array (34.8037,3.07263) 101 +dictGet dict_array (34.8254,-0.390284) 101 +dictGet dict_array (34.828,1.91869) 3498251 +dictGet dict_array (34.8289,3.71058) 101 +dictGet dict_array (34.8403,2.14606) 3501835 +dictGet dict_array (34.8437,2.20617) 3501830 +dictGet dict_array (34.8469,2.38435) 3501830 +dictGet dict_array (34.86,1.45705) 101 +dictGet dict_array (34.8612,0.914248) 101 +dictGet dict_array (34.8663,3.4215400000000002) 101 +dictGet dict_array (34.8724,-0.375144) 101 +dictGet dict_array (34.8795,3.29317) 101 +dictGet dict_array (34.8823,1.21988) 101 +dictGet dict_array (34.8834,1.07657) 101 +dictGet dict_array (34.8837,0.157648) 101 +dictGet dict_array (34.8871,-0.9755) 101 +dictGet dict_array (34.8871,1.8943699999999999) 3501835 +dictGet dict_array (34.889,3.36756) 101 +dictGet dict_array (34.8907,1.24874) 101 +dictGet dict_array (34.8965,3.13508) 101 +dictGet dict_array (34.9042,2.62092) 101 +dictGet dict_array (34.9055,-0.0448967) 101 +dictGet dict_array (34.9122,0.110576) 101 +dictGet dict_array (34.9228,3.60183) 101 +dictGet dict_array (34.9237,1.21715) 101 +dictGet dict_array (34.9296,1.70459) 3501835 +dictGet dict_array (34.941,-1.14663) 101 +dictGet dict_array (34.9448,1.18923) 101 +dictGet dict_array (34.9462,3.81678) 101 +dictGet dict_array (34.9466,0.593463) 101 +dictGet dict_array (34.9485,0.150307) 101 +dictGet dict_array (34.9542,0.487238) 101 +dictGet dict_array (34.9559,2.03473) 3501835 +dictGet dict_array (34.9671,-0.960225) 101 +dictGet dict_array (34.9711,2.63444) 101 +dictGet dict_array (34.9892,0.354775) 101 +dictGet dict_array (34.9907,1.40724) 101 +dictGet dict_array (34.9916,-0.00173097) 101 +dictGet dict_array (34.9919,2.06167) 101 diff --git a/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh b/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh index f6880ae5009..47f7a5c1c4f 100755 --- a/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh +++ b/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh @@ -1,31 +1,31 @@ #!/usr/bin/env bash -# Tags: no-debug, no-parallel +# Tags: no-debug CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -TMP_DIR="/tmp" +TMP_DIR=${CLICKHOUSE_TMP}/tmp +DATA_DIR=${CLICKHOUSE_TMP}/data +mkdir -p $TMP_DIR +mkdir -p $DATA_DIR declare -a SearchTypes=("POLYGON_INDEX_EACH" "POLYGON_INDEX_CELL") -tar -xf "${CURDIR}"/01037_test_data_perf.tar.gz -C "${CURDIR}" +tar -xf "${CURDIR}"/01037_test_data_perf.tar.gz -C "${DATA_DIR}" $CLICKHOUSE_CLIENT -n --query=" -DROP DATABASE IF EXISTS test_01037; -CREATE DATABASE test_01037; -DROP TABLE IF EXISTS test_01037.points; -CREATE TABLE test_01037.points (x Float64, y Float64) ENGINE = Memory; +CREATE TABLE points (x Float64, y Float64) ENGINE = Memory; " -$CLICKHOUSE_CLIENT --query="INSERT INTO test_01037.points FORMAT TSV" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${CURDIR}/01037_point_data" +$CLICKHOUSE_CLIENT --query="INSERT INTO points FORMAT TSV" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${DATA_DIR}/01037_point_data" -rm "${CURDIR}"/01037_point_data +rm "${DATA_DIR}"/01037_point_data $CLICKHOUSE_CLIENT -n --query=" -DROP TABLE IF EXISTS test_01037.polygons_array; +DROP TABLE IF EXISTS polygons_array; -CREATE TABLE test_01037.polygons_array +CREATE TABLE polygons_array ( key Array(Array(Array(Array(Float64)))), name String, @@ -34,36 +34,32 @@ CREATE TABLE test_01037.polygons_array ENGINE = Memory; " -$CLICKHOUSE_CLIENT --query="INSERT INTO test_01037.polygons_array FORMAT JSONEachRow" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${CURDIR}/01037_polygon_data" +$CLICKHOUSE_CLIENT --query="INSERT INTO polygons_array FORMAT JSONEachRow" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${DATA_DIR}/01037_polygon_data" -rm "${CURDIR}"/01037_polygon_data +rm "${DATA_DIR}"/01037_polygon_data for type in "${SearchTypes[@]}"; do outputFile="${TMP_DIR}/results${type}.out" $CLICKHOUSE_CLIENT -n --query=" - DROP DICTIONARY IF EXISTS test_01037.dict_array; + DROP DICTIONARY IF EXISTS dict_array; - CREATE DICTIONARY test_01037.dict_array + CREATE DICTIONARY dict_array ( key Array(Array(Array(Array(Float64)))), name String DEFAULT 'qqq', value UInt64 DEFAULT 101 ) PRIMARY KEY key - SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'polygons_array' PASSWORD '' DB 'test_01037')) + SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'polygons_array' PASSWORD '' DB currentDatabase())) LIFETIME(0) LAYOUT($type()); - select 'dictGet', 'test_01037.dict_array' as dict_name, tuple(x, y) as key, - dictGet(dict_name, 'value', key) from test_01037.points order by x, y; + select 'dictGet', 'dict_array' as dict_name, tuple(x, y) as key, + dictGet(dict_name, 'value', key) from points order by x, y; " > "$outputFile" diff -q "${CURDIR}/01037_polygon_dicts_correctness_fast.ans" "$outputFile" done -$CLICKHOUSE_CLIENT -n --query=" -DROP TABLE test_01037.points; -DROP DATABASE test_01037; -" diff --git a/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.ans b/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.ans index dfad14fb113..937539643ec 100644 --- a/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.ans +++ b/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.ans @@ -1,104 +1,104 @@ -dictGet test_01037.dict_array (-100,-42) qqq 101 -dictGet test_01037.dict_array (-1,0) Click South 423 -dictGet test_01037.dict_array (-0.1,0) Click South 423 -dictGet test_01037.dict_array (0,-2) Click West 424 -dictGet test_01037.dict_array (0,-1.1) Click West 424 -dictGet test_01037.dict_array (0,1.1) Click North 422 -dictGet test_01037.dict_array (0,2) Click North 422 -dictGet test_01037.dict_array (0.1,0) Click East 421 -dictGet test_01037.dict_array (0.99,2.99) Click North 422 -dictGet test_01037.dict_array (1,0) Click East 421 -dictGet test_01037.dict_array (3,3) House 314159 -dictGet test_01037.dict_array (5,6) Click 42 -dictGet test_01037.dict_array (7.01,7.01) qqq 101 -dictGetOrDefault test_01037.dict_array (-100,-42) www 1234 -dictGetOrDefault test_01037.dict_array (-1,0) Click South 423 -dictGetOrDefault test_01037.dict_array (-0.1,0) Click South 423 -dictGetOrDefault test_01037.dict_array (0,-2) Click West 424 -dictGetOrDefault test_01037.dict_array (0,-1.1) Click West 424 -dictGetOrDefault test_01037.dict_array (0,1.1) Click North 422 -dictGetOrDefault test_01037.dict_array (0,2) Click North 422 -dictGetOrDefault test_01037.dict_array (0.1,0) Click East 421 -dictGetOrDefault test_01037.dict_array (0.99,2.99) Click North 422 -dictGetOrDefault test_01037.dict_array (1,0) Click East 421 -dictGetOrDefault test_01037.dict_array (3,3) House 314159 -dictGetOrDefault test_01037.dict_array (5,6) Click 42 -dictGetOrDefault test_01037.dict_array (7.01,7.01) www 1234 -dictGetOrDefault test_01037.dict_array (-100,-42) dd 44 -dictGetOrDefault test_01037.dict_array (-1,0) Click South 423 -dictGetOrDefault test_01037.dict_array (-0.1,0) Click South 423 -dictGetOrDefault test_01037.dict_array (0,-2) Click West 424 -dictGetOrDefault test_01037.dict_array (0,-1.1) Click West 424 -dictGetOrDefault test_01037.dict_array (0,1.1) Click North 422 -dictGetOrDefault test_01037.dict_array (0,2) Click North 422 -dictGetOrDefault test_01037.dict_array (0.1,0) Click East 421 -dictGetOrDefault test_01037.dict_array (0.99,2.99) Click North 422 -dictGetOrDefault test_01037.dict_array (1,0) Click East 421 -dictGetOrDefault test_01037.dict_array (3,3) House 314159 -dictGetOrDefault test_01037.dict_array (5,6) Click 42 -dictGetOrDefault test_01037.dict_array (7.01,7.01) ee 55 -dictGet test_01037.dict_tuple (-100,-42) qqq 101 -dictGet test_01037.dict_tuple (-1,0) Click South 423 -dictGet test_01037.dict_tuple (-0.1,0) Click South 423 -dictGet test_01037.dict_tuple (0,-2) Click West 424 -dictGet test_01037.dict_tuple (0,-1.1) Click West 424 -dictGet test_01037.dict_tuple (0,1.1) Click North 422 -dictGet test_01037.dict_tuple (0,2) Click North 422 -dictGet test_01037.dict_tuple (0.1,0) Click East 421 -dictGet test_01037.dict_tuple (0.99,2.99) Click North 422 -dictGet test_01037.dict_tuple (1,0) Click East 421 -dictGet test_01037.dict_tuple (3,3) House 314159 -dictGet test_01037.dict_tuple (5,6) Click 42 -dictGet test_01037.dict_tuple (7.01,7.01) qqq 101 -dictGetOrDefault test_01037.dict_tuple (-100,-42) www 1234 -dictGetOrDefault test_01037.dict_tuple (-1,0) Click South 423 -dictGetOrDefault test_01037.dict_tuple (-0.1,0) Click South 423 -dictGetOrDefault test_01037.dict_tuple (0,-2) Click West 424 -dictGetOrDefault test_01037.dict_tuple (0,-1.1) Click West 424 -dictGetOrDefault test_01037.dict_tuple (0,1.1) Click North 422 -dictGetOrDefault test_01037.dict_tuple (0,2) Click North 422 -dictGetOrDefault test_01037.dict_tuple (0.1,0) Click East 421 -dictGetOrDefault test_01037.dict_tuple (0.99,2.99) Click North 422 -dictGetOrDefault test_01037.dict_tuple (1,0) Click East 421 -dictGetOrDefault test_01037.dict_tuple (3,3) House 314159 -dictGetOrDefault test_01037.dict_tuple (5,6) Click 42 -dictGetOrDefault test_01037.dict_tuple (7.01,7.01) www 1234 -dictGetOrDefault test_01037.dict_tuple (-100,-42) dd 44 -dictGetOrDefault test_01037.dict_tuple (-1,0) Click South 423 -dictGetOrDefault test_01037.dict_tuple (-0.1,0) Click South 423 -dictGetOrDefault test_01037.dict_tuple (0,-2) Click West 424 -dictGetOrDefault test_01037.dict_tuple (0,-1.1) Click West 424 -dictGetOrDefault test_01037.dict_tuple (0,1.1) Click North 422 -dictGetOrDefault test_01037.dict_tuple (0,2) Click North 422 -dictGetOrDefault test_01037.dict_tuple (0.1,0) Click East 421 -dictGetOrDefault test_01037.dict_tuple (0.99,2.99) Click North 422 -dictGetOrDefault test_01037.dict_tuple (1,0) Click East 421 -dictGetOrDefault test_01037.dict_tuple (3,3) House 314159 -dictGetOrDefault test_01037.dict_tuple (5,6) Click 42 -dictGetOrDefault test_01037.dict_tuple (7.01,7.01) ee 55 -dictHas test_01037.dict_array (-100,-42) 0 -dictHas test_01037.dict_array (-1,0) 1 -dictHas test_01037.dict_array (-0.1,0) 1 -dictHas test_01037.dict_array (0,-2) 1 -dictHas test_01037.dict_array (0,-1.1) 1 -dictHas test_01037.dict_array (0,1.1) 1 -dictHas test_01037.dict_array (0,2) 1 -dictHas test_01037.dict_array (0.1,0) 1 -dictHas test_01037.dict_array (0.99,2.99) 1 -dictHas test_01037.dict_array (1,0) 1 -dictHas test_01037.dict_array (3,3) 1 -dictHas test_01037.dict_array (5,6) 1 -dictHas test_01037.dict_array (7.01,7.01) 0 -dictHas test_01037.dict_tuple (-100,-42) 0 -dictHas test_01037.dict_tuple (-1,0) 1 -dictHas test_01037.dict_tuple (-0.1,0) 1 -dictHas test_01037.dict_tuple (0,-2) 1 -dictHas test_01037.dict_tuple (0,-1.1) 1 -dictHas test_01037.dict_tuple (0,1.1) 1 -dictHas test_01037.dict_tuple (0,2) 1 -dictHas test_01037.dict_tuple (0.1,0) 1 -dictHas test_01037.dict_tuple (0.99,2.99) 1 -dictHas test_01037.dict_tuple (1,0) 1 -dictHas test_01037.dict_tuple (3,3) 1 -dictHas test_01037.dict_tuple (5,6) 1 -dictHas test_01037.dict_tuple (7.01,7.01) 0 +dictGet dict_array (-100,-42) qqq 101 +dictGet dict_array (-1,0) Click South 423 +dictGet dict_array (-0.1,0) Click South 423 +dictGet dict_array (0,-2) Click West 424 +dictGet dict_array (0,-1.1) Click West 424 +dictGet dict_array (0,1.1) Click North 422 +dictGet dict_array (0,2) Click North 422 +dictGet dict_array (0.1,0) Click East 421 +dictGet dict_array (0.99,2.99) Click North 422 +dictGet dict_array (1,0) Click East 421 +dictGet dict_array (3,3) House 314159 +dictGet dict_array (5,6) Click 42 +dictGet dict_array (7.01,7.01) qqq 101 +dictGetOrDefault dict_array (-100,-42) www 1234 +dictGetOrDefault dict_array (-1,0) Click South 423 +dictGetOrDefault dict_array (-0.1,0) Click South 423 +dictGetOrDefault dict_array (0,-2) Click West 424 +dictGetOrDefault dict_array (0,-1.1) Click West 424 +dictGetOrDefault dict_array (0,1.1) Click North 422 +dictGetOrDefault dict_array (0,2) Click North 422 +dictGetOrDefault dict_array (0.1,0) Click East 421 +dictGetOrDefault dict_array (0.99,2.99) Click North 422 +dictGetOrDefault dict_array (1,0) Click East 421 +dictGetOrDefault dict_array (3,3) House 314159 +dictGetOrDefault dict_array (5,6) Click 42 +dictGetOrDefault dict_array (7.01,7.01) www 1234 +dictGetOrDefault dict_array (-100,-42) dd 44 +dictGetOrDefault dict_array (-1,0) Click South 423 +dictGetOrDefault dict_array (-0.1,0) Click South 423 +dictGetOrDefault dict_array (0,-2) Click West 424 +dictGetOrDefault dict_array (0,-1.1) Click West 424 +dictGetOrDefault dict_array (0,1.1) Click North 422 +dictGetOrDefault dict_array (0,2) Click North 422 +dictGetOrDefault dict_array (0.1,0) Click East 421 +dictGetOrDefault dict_array (0.99,2.99) Click North 422 +dictGetOrDefault dict_array (1,0) Click East 421 +dictGetOrDefault dict_array (3,3) House 314159 +dictGetOrDefault dict_array (5,6) Click 42 +dictGetOrDefault dict_array (7.01,7.01) ee 55 +dictGet dict_tuple (-100,-42) qqq 101 +dictGet dict_tuple (-1,0) Click South 423 +dictGet dict_tuple (-0.1,0) Click South 423 +dictGet dict_tuple (0,-2) Click West 424 +dictGet dict_tuple (0,-1.1) Click West 424 +dictGet dict_tuple (0,1.1) Click North 422 +dictGet dict_tuple (0,2) Click North 422 +dictGet dict_tuple (0.1,0) Click East 421 +dictGet dict_tuple (0.99,2.99) Click North 422 +dictGet dict_tuple (1,0) Click East 421 +dictGet dict_tuple (3,3) House 314159 +dictGet dict_tuple (5,6) Click 42 +dictGet dict_tuple (7.01,7.01) qqq 101 +dictGetOrDefault dict_tuple (-100,-42) www 1234 +dictGetOrDefault dict_tuple (-1,0) Click South 423 +dictGetOrDefault dict_tuple (-0.1,0) Click South 423 +dictGetOrDefault dict_tuple (0,-2) Click West 424 +dictGetOrDefault dict_tuple (0,-1.1) Click West 424 +dictGetOrDefault dict_tuple (0,1.1) Click North 422 +dictGetOrDefault dict_tuple (0,2) Click North 422 +dictGetOrDefault dict_tuple (0.1,0) Click East 421 +dictGetOrDefault dict_tuple (0.99,2.99) Click North 422 +dictGetOrDefault dict_tuple (1,0) Click East 421 +dictGetOrDefault dict_tuple (3,3) House 314159 +dictGetOrDefault dict_tuple (5,6) Click 42 +dictGetOrDefault dict_tuple (7.01,7.01) www 1234 +dictGetOrDefault dict_tuple (-100,-42) dd 44 +dictGetOrDefault dict_tuple (-1,0) Click South 423 +dictGetOrDefault dict_tuple (-0.1,0) Click South 423 +dictGetOrDefault dict_tuple (0,-2) Click West 424 +dictGetOrDefault dict_tuple (0,-1.1) Click West 424 +dictGetOrDefault dict_tuple (0,1.1) Click North 422 +dictGetOrDefault dict_tuple (0,2) Click North 422 +dictGetOrDefault dict_tuple (0.1,0) Click East 421 +dictGetOrDefault dict_tuple (0.99,2.99) Click North 422 +dictGetOrDefault dict_tuple (1,0) Click East 421 +dictGetOrDefault dict_tuple (3,3) House 314159 +dictGetOrDefault dict_tuple (5,6) Click 42 +dictGetOrDefault dict_tuple (7.01,7.01) ee 55 +dictHas dict_array (-100,-42) 0 +dictHas dict_array (-1,0) 1 +dictHas dict_array (-0.1,0) 1 +dictHas dict_array (0,-2) 1 +dictHas dict_array (0,-1.1) 1 +dictHas dict_array (0,1.1) 1 +dictHas dict_array (0,2) 1 +dictHas dict_array (0.1,0) 1 +dictHas dict_array (0.99,2.99) 1 +dictHas dict_array (1,0) 1 +dictHas dict_array (3,3) 1 +dictHas dict_array (5,6) 1 +dictHas dict_array (7.01,7.01) 0 +dictHas dict_tuple (-100,-42) 0 +dictHas dict_tuple (-1,0) 1 +dictHas dict_tuple (-0.1,0) 1 +dictHas dict_tuple (0,-2) 1 +dictHas dict_tuple (0,-1.1) 1 +dictHas dict_tuple (0,1.1) 1 +dictHas dict_tuple (0,2) 1 +dictHas dict_tuple (0.1,0) 1 +dictHas dict_tuple (0.99,2.99) 1 +dictHas dict_tuple (1,0) 1 +dictHas dict_tuple (3,3) 1 +dictHas dict_tuple (5,6) 1 +dictHas dict_tuple (7.01,7.01) 0 diff --git a/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.sh b/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.sh index be983ec1be4..d1ee3f283bc 100755 --- a/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.sh +++ b/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.sh @@ -1,56 +1,52 @@ #!/usr/bin/env bash -# Tags: no-debug, no-parallel +# Tags: no-debug CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -TMP_DIR="/tmp" +TMP_DIR=${CLICKHOUSE_TMP}/tmp +mkdir -p $TMP_DIR $CLICKHOUSE_CLIENT -n --query=" -DROP DATABASE IF EXISTS test_01037; +DROP TABLE IF EXISTS polygons_array; -CREATE DATABASE test_01037; +CREATE TABLE polygons_array (key Array(Array(Array(Array(Float64)))), name String, value UInt64) ENGINE = Memory; +INSERT INTO polygons_array VALUES ([[[[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1, -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]], [[[5, 5], [5, 1], [7, 1], [7, 7], [1, 7], [1, 5]]]], 'Click', 42); +INSERT INTO polygons_array VALUES ([[[[5, 5], [5, -5], [-5, -5], [-5, 5]], [[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1, -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]]], 'House', 314159); +INSERT INTO polygons_array VALUES ([[[[3, 1], [0, 1], [0, -1], [3, -1]]]], 'Click East', 421); +INSERT INTO polygons_array VALUES ([[[[-1, 1], [1, 1], [1, 3], [-1, 3]]]], 'Click North', 422); +INSERT INTO polygons_array VALUES ([[[[-3, 1], [-3, -1], [0, -1], [0, 1]]]], 'Click South', 423); +INSERT INTO polygons_array VALUES ([[[[-1, -1], [1, -1], [1, -3], [-1, -3]]]], 'Click West', 424); -DROP TABLE IF EXISTS test_01037.polygons_array; +DROP TABLE IF EXISTS polygons_tuple; -CREATE TABLE test_01037.polygons_array (key Array(Array(Array(Array(Float64)))), name String, value UInt64) ENGINE = Memory; -INSERT INTO test_01037.polygons_array VALUES ([[[[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1, -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]], [[[5, 5], [5, 1], [7, 1], [7, 7], [1, 7], [1, 5]]]], 'Click', 42); -INSERT INTO test_01037.polygons_array VALUES ([[[[5, 5], [5, -5], [-5, -5], [-5, 5]], [[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1, -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]]], 'House', 314159); -INSERT INTO test_01037.polygons_array VALUES ([[[[3, 1], [0, 1], [0, -1], [3, -1]]]], 'Click East', 421); -INSERT INTO test_01037.polygons_array VALUES ([[[[-1, 1], [1, 1], [1, 3], [-1, 3]]]], 'Click North', 422); -INSERT INTO test_01037.polygons_array VALUES ([[[[-3, 1], [-3, -1], [0, -1], [0, 1]]]], 'Click South', 423); -INSERT INTO test_01037.polygons_array VALUES ([[[[-1, -1], [1, -1], [1, -3], [-1, -3]]]], 'Click West', 424); +CREATE TABLE polygons_tuple (key Array(Array(Array(Tuple(Float64, Float64)))), name String, value UInt64) ENGINE = Memory; +INSERT INTO polygons_tuple VALUES ([[[(1, 3), (1, 1), (3, 1), (3, -1), (1, -1), (1, -3), (-1, -3), (-1, -1), (-3, -1), (-3, 1), (-1, 1), (-1, 3)]], [[(5, 5), (5, 1), (7, 1), (7, 7), (1, 7), (1, 5)]]], 'Click', 42); +INSERT INTO polygons_tuple VALUES ([[[(5, 5), (5, -5), (-5, -5), (-5, 5)], [(1, 3), (1, 1), (3, 1), (3, -1), (1, -1), (1, -3), (-1, -3), (-1, -1), (-3, -1), (-3, 1), (-1, 1), (-1, 3)]]], 'House', 314159); +INSERT INTO polygons_tuple VALUES ([[[(3, 1), (0, 1), (0, -1), (3, -1)]]], 'Click East', 421); +INSERT INTO polygons_tuple VALUES ([[[(-1, 1), (1, 1), (1, 3), (-1, 3)]]], 'Click North', 422); +INSERT INTO polygons_tuple VALUES ([[[(-3, 1), (-3, -1), (0, -1), (0, 1)]]], 'Click South', 423); +INSERT INTO polygons_tuple VALUES ([[[(-1, -1), (1, -1), (1, -3), (-1, -3)]]], 'Click West', 424); -DROP TABLE IF EXISTS test_01037.polygons_tuple; +DROP TABLE IF EXISTS points; -CREATE TABLE test_01037.polygons_tuple (key Array(Array(Array(Tuple(Float64, Float64)))), name String, value UInt64) ENGINE = Memory; -INSERT INTO test_01037.polygons_tuple VALUES ([[[(1, 3), (1, 1), (3, 1), (3, -1), (1, -1), (1, -3), (-1, -3), (-1, -1), (-3, -1), (-3, 1), (-1, 1), (-1, 3)]], [[(5, 5), (5, 1), (7, 1), (7, 7), (1, 7), (1, 5)]]], 'Click', 42); -INSERT INTO test_01037.polygons_tuple VALUES ([[[(5, 5), (5, -5), (-5, -5), (-5, 5)], [(1, 3), (1, 1), (3, 1), (3, -1), (1, -1), (1, -3), (-1, -3), (-1, -1), (-3, -1), (-3, 1), (-1, 1), (-1, 3)]]], 'House', 314159); -INSERT INTO test_01037.polygons_tuple VALUES ([[[(3, 1), (0, 1), (0, -1), (3, -1)]]], 'Click East', 421); -INSERT INTO test_01037.polygons_tuple VALUES ([[[(-1, 1), (1, 1), (1, 3), (-1, 3)]]], 'Click North', 422); -INSERT INTO test_01037.polygons_tuple VALUES ([[[(-3, 1), (-3, -1), (0, -1), (0, 1)]]], 'Click South', 423); -INSERT INTO test_01037.polygons_tuple VALUES ([[[(-1, -1), (1, -1), (1, -3), (-1, -3)]]], 'Click West', 424); - -DROP TABLE IF EXISTS test_01037.points; - -CREATE TABLE test_01037.points (x Float64, y Float64, def_i UInt64, def_s String) ENGINE = Memory; -INSERT INTO test_01037.points VALUES (0.1, 0.0, 112, 'aax'); -INSERT INTO test_01037.points VALUES (-0.1, 0.0, 113, 'aay'); -INSERT INTO test_01037.points VALUES (0.0, 1.1, 114, 'aaz'); -INSERT INTO test_01037.points VALUES (0.0, -1.1, 115, 'aat'); -INSERT INTO test_01037.points VALUES (3.0, 3.0, 22, 'bb'); -INSERT INTO test_01037.points VALUES (5.0, 6.0, 33, 'cc'); -INSERT INTO test_01037.points VALUES (-100.0, -42.0, 44, 'dd'); -INSERT INTO test_01037.points VALUES (7.01, 7.01, 55, 'ee'); -INSERT INTO test_01037.points VALUES (0.99, 2.99, 66, 'ee'); -INSERT INTO test_01037.points VALUES (1.0, 0.0, 771, 'ffa'); -INSERT INTO test_01037.points VALUES (-1.0, 0.0, 772, 'ffb'); -INSERT INTO test_01037.points VALUES (0.0, 2.0, 773, 'ffc'); -INSERT INTO test_01037.points VALUES (0.0, -2.0, 774, 'ffd'); +CREATE TABLE points (x Float64, y Float64, def_i UInt64, def_s String) ENGINE = Memory; +INSERT INTO points VALUES (0.1, 0.0, 112, 'aax'); +INSERT INTO points VALUES (-0.1, 0.0, 113, 'aay'); +INSERT INTO points VALUES (0.0, 1.1, 114, 'aaz'); +INSERT INTO points VALUES (0.0, -1.1, 115, 'aat'); +INSERT INTO points VALUES (3.0, 3.0, 22, 'bb'); +INSERT INTO points VALUES (5.0, 6.0, 33, 'cc'); +INSERT INTO points VALUES (-100.0, -42.0, 44, 'dd'); +INSERT INTO points VALUES (7.01, 7.01, 55, 'ee'); +INSERT INTO points VALUES (0.99, 2.99, 66, 'ee'); +INSERT INTO points VALUES (1.0, 0.0, 771, 'ffa'); +INSERT INTO points VALUES (-1.0, 0.0, 772, 'ffb'); +INSERT INTO points VALUES (0.0, 2.0, 773, 'ffc'); +INSERT INTO points VALUES (0.0, -2.0, 774, 'ffd'); " - declare -a SearchTypes=("POLYGON" "POLYGON_SIMPLE" "POLYGON_INDEX_EACH" "POLYGON_INDEX_CELL") for type in "${SearchTypes[@]}"; @@ -58,63 +54,62 @@ do outputFile="${TMP_DIR}/results${type}.out" $CLICKHOUSE_CLIENT -n --query=" - DROP DICTIONARY IF EXISTS test_01037.dict_array; - CREATE DICTIONARY test_01037.dict_array + DROP DICTIONARY IF EXISTS dict_array; + CREATE DICTIONARY dict_array ( key Array(Array(Array(Array(Float64)))), name String DEFAULT 'qqq', value UInt64 DEFAULT 101 ) PRIMARY KEY key - SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'polygons_array' PASSWORD '' DB 'test_01037')) + SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'polygons_array' PASSWORD '' DB currentDatabase())) LIFETIME(0) LAYOUT($type()); - DROP DICTIONARY IF EXISTS test_01037.dict_tuple; + DROP DICTIONARY IF EXISTS dict_tuple; - CREATE DICTIONARY test_01037.dict_tuple + CREATE DICTIONARY dict_tuple ( key Array(Array(Array(Tuple(Float64, Float64)))), name String DEFAULT 'qqq', value UInt64 DEFAULT 101 ) PRIMARY KEY key - SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'polygons_tuple' PASSWORD '' DB 'test_01037')) + SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'polygons_tuple' PASSWORD '' DB currentDatabase())) LIFETIME(0) LAYOUT($type()); - select 'dictGet', 'test_01037.dict_array' as dict_name, tuple(x, y) as key, + select 'dictGet', 'dict_array' as dict_name, tuple(x, y) as key, dictGet(dict_name, 'name', key), - dictGet(dict_name, 'value', key) from test_01037.points order by x, y; - select 'dictGetOrDefault', 'test_01037.dict_array' as dict_name, tuple(x, y) as key, + dictGet(dict_name, 'value', key) from points order by x, y; + select 'dictGetOrDefault', 'dict_array' as dict_name, tuple(x, y) as key, dictGetOrDefault(dict_name, 'name', key, 'www'), - dictGetOrDefault(dict_name, 'value', key, toUInt64(1234)) from test_01037.points order by x, y; - select 'dictGetOrDefault', 'test_01037.dict_array' as dict_name, tuple(x, y) as key, + dictGetOrDefault(dict_name, 'value', key, toUInt64(1234)) from points order by x, y; + select 'dictGetOrDefault', 'dict_array' as dict_name, tuple(x, y) as key, dictGetOrDefault(dict_name, 'name', key, def_s), - dictGetOrDefault(dict_name, 'value', key, def_i) from test_01037.points order by x, y; - select 'dictGet', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key, + dictGetOrDefault(dict_name, 'value', key, def_i) from points order by x, y; + select 'dictGet', 'dict_tuple' as dict_name, tuple(x, y) as key, dictGet(dict_name, 'name', key), - dictGet(dict_name, 'value', key) from test_01037.points order by x, y; - select 'dictGetOrDefault', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key, + dictGet(dict_name, 'value', key) from points order by x, y; + select 'dictGetOrDefault', 'dict_tuple' as dict_name, tuple(x, y) as key, dictGetOrDefault(dict_name, 'name', key, 'www'), - dictGetOrDefault(dict_name, 'value', key, toUInt64(1234)) from test_01037.points order by x, y; - select 'dictGetOrDefault', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key, + dictGetOrDefault(dict_name, 'value', key, toUInt64(1234)) from points order by x, y; + select 'dictGetOrDefault', 'dict_tuple' as dict_name, tuple(x, y) as key, dictGetOrDefault(dict_name, 'name', key, def_s), - dictGetOrDefault(dict_name, 'value', key, def_i) from test_01037.points order by x, y; - select 'dictHas', 'test_01037.dict_array' as dict_name, tuple(x, y) as key, - dictHas(dict_name, key) from test_01037.points order by x, y; - select 'dictHas', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key, - dictHas(dict_name, key) from test_01037.points order by x, y; + dictGetOrDefault(dict_name, 'value', key, def_i) from points order by x, y; + select 'dictHas', 'dict_array' as dict_name, tuple(x, y) as key, + dictHas(dict_name, key) from points order by x, y; + select 'dictHas', 'dict_tuple' as dict_name, tuple(x, y) as key, + dictHas(dict_name, key) from points order by x, y; " > "$outputFile" diff -q "${CURDIR}/01037_polygon_dicts_simple_functions.ans" "$outputFile" done $CLICKHOUSE_CLIENT -n --query=" -DROP DICTIONARY test_01037.dict_array; -DROP DICTIONARY test_01037.dict_tuple; -DROP TABLE test_01037.polygons_array; -DROP TABLE test_01037.polygons_tuple; -DROP TABLE test_01037.points; -DROP DATABASE test_01037; +DROP DICTIONARY dict_array; +DROP DICTIONARY dict_tuple; +DROP TABLE polygons_array; +DROP TABLE polygons_tuple; +DROP TABLE points; " diff --git a/tests/queries/0_stateless/01038_dictionary_lifetime_min_zero_sec.sh b/tests/queries/0_stateless/01038_dictionary_lifetime_min_zero_sec.sh index 1e754dce786..66732205f95 100755 --- a/tests/queries/0_stateless/01038_dictionary_lifetime_min_zero_sec.sh +++ b/tests/queries/0_stateless/01038_dictionary_lifetime_min_zero_sec.sh @@ -5,13 +5,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS db_01038" - -$CLICKHOUSE_CLIENT --query "CREATE DATABASE db_01038" - $CLICKHOUSE_CLIENT --query " -CREATE TABLE db_01038.table_for_dict +CREATE TABLE ${CLICKHOUSE_DATABASE}.table_for_dict ( key_column UInt64, value Float64 @@ -19,34 +15,34 @@ CREATE TABLE db_01038.table_for_dict ENGINE = MergeTree() ORDER BY key_column" -$CLICKHOUSE_CLIENT --query "INSERT INTO db_01038.table_for_dict VALUES (1, 1.1)" +$CLICKHOUSE_CLIENT --query "INSERT INTO ${CLICKHOUSE_DATABASE}.table_for_dict VALUES (1, 1.1)" $CLICKHOUSE_CLIENT --query " -CREATE DICTIONARY db_01038.dict_with_zero_min_lifetime +CREATE DICTIONARY ${CLICKHOUSE_DATABASE}.dict_with_zero_min_lifetime ( key_column UInt64, value Float64 DEFAULT 77.77 ) PRIMARY KEY key_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB 'db_01038')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB '${CLICKHOUSE_DATABASE}')) LIFETIME(1) LAYOUT(FLAT())" -$CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('db_01038.dict_with_zero_min_lifetime', 'value', toUInt64(1))" +$CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('${CLICKHOUSE_DATABASE}.dict_with_zero_min_lifetime', 'value', toUInt64(1))" -$CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('db_01038.dict_with_zero_min_lifetime', 'value', toUInt64(2))" +$CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('${CLICKHOUSE_DATABASE}.dict_with_zero_min_lifetime', 'value', toUInt64(2))" -$CLICKHOUSE_CLIENT --query "INSERT INTO db_01038.table_for_dict VALUES (2, 2.2)" +$CLICKHOUSE_CLIENT --query "INSERT INTO ${CLICKHOUSE_DATABASE}.table_for_dict VALUES (2, 2.2)" function check() { - query_result=$($CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('db_01038.dict_with_zero_min_lifetime', 'value', toUInt64(2))") + query_result=$($CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('${CLICKHOUSE_DATABASE}.dict_with_zero_min_lifetime', 'value', toUInt64(2))") while [ "$query_result" != "2.2" ] do - query_result=$($CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('db_01038.dict_with_zero_min_lifetime', 'value', toUInt64(2))") + query_result=$($CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('${CLICKHOUSE_DATABASE}.dict_with_zero_min_lifetime', 'value', toUInt64(2))") done } @@ -55,8 +51,6 @@ export -f check; timeout 10 bash -c check -$CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('db_01038.dict_with_zero_min_lifetime', 'value', toUInt64(1))" +$CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('${CLICKHOUSE_DATABASE}.dict_with_zero_min_lifetime', 'value', toUInt64(1))" -$CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('db_01038.dict_with_zero_min_lifetime', 'value', toUInt64(2))" - -$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS db_01038" +$CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('${CLICKHOUSE_DATABASE}.dict_with_zero_min_lifetime', 'value', toUInt64(2))" diff --git a/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.sh b/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.sh index 6856f952a47..d558fbf465e 100755 --- a/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.sh +++ b/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.sh @@ -1,17 +1,12 @@ #!/usr/bin/env bash -# Tags: long, no-parallel +# Tags: long CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh - -$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS dictdb_01041_01040" - -$CLICKHOUSE_CLIENT --query "CREATE DATABASE dictdb_01041_01040" - $CLICKHOUSE_CLIENT --query " -CREATE TABLE dictdb_01041_01040.dict_invalidate +CREATE TABLE dict_invalidate ENGINE = Memory AS SELECT 122 as dummy, @@ -20,31 +15,31 @@ FROM system.one" $CLICKHOUSE_CLIENT --query " -CREATE DICTIONARY dictdb_01041_01040.invalidate +CREATE DICTIONARY invalidate ( dummy UInt64, two UInt8 EXPRESSION dummy ) PRIMARY KEY dummy -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_invalidate' DB 'dictdb_01041_01040' INVALIDATE_QUERY 'select max(last_time) from dictdb_01041_01040.dict_invalidate')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_invalidate' DB currentDatabase() INVALIDATE_QUERY 'select max(last_time) from dict_invalidate')) LIFETIME(MIN 0 MAX 1) LAYOUT(FLAT())" -$CLICKHOUSE_CLIENT --query "SELECT dictGetUInt8('dictdb_01041_01040.invalidate', 'two', toUInt64(122))" +$CLICKHOUSE_CLIENT --query "SELECT dictGetUInt8('invalidate', 'two', toUInt64(122))" # No exception happened -$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" +$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = currentDatabase() AND name = 'invalidate'" -$CLICKHOUSE_CLIENT --check_table_dependencies=0 --query "DROP TABLE dictdb_01041_01040.dict_invalidate" +$CLICKHOUSE_CLIENT --check_table_dependencies=0 --query "DROP TABLE dict_invalidate" function check_exception_detected() { - query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" 2>&1) + query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = currentDatabase() AND name = 'invalidate'" 2>&1) while [ -z "$query_result" ] do - query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" 2>&1) + query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = currentDatabase() AND name = 'invalidate'" 2>&1) sleep 0.1 done } @@ -53,10 +48,10 @@ function check_exception_detected() export -f check_exception_detected; timeout 30 bash -c check_exception_detected 2> /dev/null -$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" 2>&1 | grep -Eo "dictdb_01041_01040.dict_invalidate.*UNKNOWN_TABLE" | wc -l +$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = currentDatabase() AND name = 'invalidate'" 2>&1 | grep -Eo "dict_invalidate.*UNKNOWN_TABLE" | wc -l $CLICKHOUSE_CLIENT --query " -CREATE TABLE dictdb_01041_01040.dict_invalidate +CREATE TABLE dict_invalidate ENGINE = Memory AS SELECT 133 as dummy, @@ -65,11 +60,11 @@ FROM system.one" function check_exception_fixed() { - query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" 2>&1) + query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = currentDatabase() AND name = 'invalidate'" 2>&1) while [ "$query_result" ] do - query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" 2>&1) + query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = currentDatabase() AND name = 'invalidate'" 2>&1) sleep 0.1 done } @@ -78,7 +73,5 @@ export -f check_exception_fixed; # it may take a while until dictionary reloads timeout 60 bash -c check_exception_fixed 2> /dev/null -$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" 2>&1 -$CLICKHOUSE_CLIENT --query "SELECT dictGetUInt8('dictdb_01041_01040.invalidate', 'two', toUInt64(133))" - -$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS dictdb_01041_01040" +$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = currentDatabase() AND name = 'invalidate'" 2>&1 +$CLICKHOUSE_CLIENT --query "SELECT dictGetUInt8('invalidate', 'two', toUInt64(133))" diff --git a/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh b/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh index 9d34470c38d..2b075566ac3 100755 --- a/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh +++ b/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -8,41 +7,37 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -e -o pipefail # NOTE: dictionaries TTLs works with server timezone, so session_timeout cannot be used -$CLICKHOUSE_CLIENT --session_timezone '' --multiquery <<'EOF' -DROP DATABASE IF EXISTS dictdb_01042; -CREATE DATABASE dictdb_01042; -CREATE TABLE dictdb_01042.table(x Int64, y Int64, insert_time DateTime) ENGINE = MergeTree ORDER BY tuple(); -INSERT INTO dictdb_01042.table VALUES (12, 102, now()); +$CLICKHOUSE_CLIENT --session_timezone '' --multiquery < ', dictGetInt64('dictdb_01042.dict', 'y', toUInt64(12))" +$CLICKHOUSE_CLIENT --query "SELECT '12 -> ', dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(12))" -$CLICKHOUSE_CLIENT --query "INSERT INTO dictdb_01042.table VALUES (13, 103, now())" -$CLICKHOUSE_CLIENT --query "INSERT INTO dictdb_01042.table VALUES (14, 104, now() - INTERVAL 1 DAY)" +$CLICKHOUSE_CLIENT --query "INSERT INTO ${CLICKHOUSE_DATABASE}.table VALUES (13, 103, now())" +$CLICKHOUSE_CLIENT --query "INSERT INTO ${CLICKHOUSE_DATABASE}.table VALUES (14, 104, now() - INTERVAL 1 DAY)" -while [ "$(${CLICKHOUSE_CLIENT} --query "SELECT dictGetInt64('dictdb_01042.dict', 'y', toUInt64(13))")" = -1 ] +while [ "$(${CLICKHOUSE_CLIENT} --query "SELECT dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(13))")" = -1 ] do sleep 0.5 done -$CLICKHOUSE_CLIENT --query "SELECT '13 -> ', dictGetInt64('dictdb_01042.dict', 'y', toUInt64(13))" -$CLICKHOUSE_CLIENT --query "SELECT '14 -> ', dictGetInt64('dictdb_01042.dict', 'y', toUInt64(14))" +$CLICKHOUSE_CLIENT --query "SELECT '13 -> ', dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(13))" +$CLICKHOUSE_CLIENT --query "SELECT '14 -> ', dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(14))" -$CLICKHOUSE_CLIENT --query "SYSTEM RELOAD DICTIONARY 'dictdb_01042.dict'" +$CLICKHOUSE_CLIENT --query "SYSTEM RELOAD DICTIONARY '${CLICKHOUSE_DATABASE}.dict'" -$CLICKHOUSE_CLIENT --query "SELECT '12(r) -> ', dictGetInt64('dictdb_01042.dict', 'y', toUInt64(12))" -$CLICKHOUSE_CLIENT --query "SELECT '13(r) -> ', dictGetInt64('dictdb_01042.dict', 'y', toUInt64(13))" -$CLICKHOUSE_CLIENT --query "SELECT '14(r) -> ', dictGetInt64('dictdb_01042.dict', 'y', toUInt64(14))" - -$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS dictdb_01042" +$CLICKHOUSE_CLIENT --query "SELECT '12(r) -> ', dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(12))" +$CLICKHOUSE_CLIENT --query "SELECT '13(r) -> ', dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(13))" +$CLICKHOUSE_CLIENT --query "SELECT '14(r) -> ', dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(14))" diff --git a/tests/queries/0_stateless/01053_ssd_dictionary.sh b/tests/queries/0_stateless/01053_ssd_dictionary.sh index b49144c9b1a..00e5719a9a9 100755 --- a/tests/queries/0_stateless/01053_ssd_dictionary.sh +++ b/tests/queries/0_stateless/01053_ssd_dictionary.sh @@ -6,8 +6,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - $CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=1 -n --query=" DROP DATABASE IF EXISTS 01053_db; diff --git a/tests/queries/0_stateless/01055_compact_parts_1.sql b/tests/queries/0_stateless/01055_compact_parts_1.sql index ff5ab722e0f..72048c59a41 100644 --- a/tests/queries/0_stateless/01055_compact_parts_1.sql +++ b/tests/queries/0_stateless/01055_compact_parts_1.sql @@ -1,8 +1,3 @@ --- Tags: no-parallel - -drop table if exists mt_compact; -drop table if exists mt_compact_2; - create table mt_compact (a Int, s String) engine = MergeTree order by a partition by a settings index_granularity_bytes = 0; alter table mt_compact modify setting min_rows_for_wide_part = 1000; -- { serverError NOT_IMPLEMENTED } @@ -25,5 +20,3 @@ alter table mt_compact modify setting parts_to_delay_insert = 300; alter table mt_compact modify setting min_rows_for_wide_part = 0; show create table mt_compact; - -drop table mt_compact diff --git a/tests/queries/0_stateless/01060_avro.sh b/tests/queries/0_stateless/01060_avro.sh index 3c70927db25..6ed26c8565f 100755 --- a/tests/queries/0_stateless/01060_avro.sh +++ b/tests/queries/0_stateless/01060_avro.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest set -e @@ -69,9 +69,6 @@ cat "$DATA_DIR"/simple.null.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --out - - - # output echo '===' output diff --git a/tests/queries/0_stateless/01069_database_memory.sql b/tests/queries/0_stateless/01069_database_memory.sql index 5aab9175c58..5d2fa4ea11e 100644 --- a/tests/queries/0_stateless/01069_database_memory.sql +++ b/tests/queries/0_stateless/01069_database_memory.sql @@ -1,5 +1,3 @@ --- Tags: no-parallel - DROP DATABASE IF EXISTS memory_01069; CREATE DATABASE memory_01069 ENGINE = Memory; SHOW CREATE DATABASE memory_01069; diff --git a/tests/queries/0_stateless/01070_mutations_with_dependencies.sql b/tests/queries/0_stateless/01070_mutations_with_dependencies.sql index 813ebf3f5a7..4d1cd54306c 100644 --- a/tests/queries/0_stateless/01070_mutations_with_dependencies.sql +++ b/tests/queries/0_stateless/01070_mutations_with_dependencies.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-s3-storage +-- Tags: no-parallel, no-object-storage -- With s3 policy TTL TO DISK 'default' doesn't work (because we have no default, only 's3') drop table if exists ttl; diff --git a/tests/queries/0_stateless/01076_cache_dictionary_datarace_exception_ptr.sh b/tests/queries/0_stateless/01076_cache_dictionary_datarace_exception_ptr.sh index 17068dcbdf9..dcd15718416 100755 --- a/tests/queries/0_stateless/01076_cache_dictionary_datarace_exception_ptr.sh +++ b/tests/queries/0_stateless/01076_cache_dictionary_datarace_exception_ptr.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, no-parallel +# Tags: race # This is a monkey test used to trigger sanitizers. @@ -7,11 +7,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --query="DROP DATABASE IF EXISTS dictdb_01076;" -$CLICKHOUSE_CLIENT --query="CREATE DATABASE dictdb_01076;" - $CLICKHOUSE_CLIENT --query=" -CREATE TABLE dictdb_01076.table_datarace +CREATE TABLE ${CLICKHOUSE_DATABASE}.table_datarace ( key_column UUID, value Float64 @@ -21,17 +18,17 @@ ORDER BY key_column; " $CLICKHOUSE_CLIENT --query=" -INSERT INTO dictdb_01076.table_datarace VALUES ('cd5db34f-0c25-4375-b10e-bfb3708ddc72', 1.1), ('cd5db34f-0c25-4375-b10e-bfb3708ddc72', 2.2), ('cd5db34f-0c25-4375-b10e-bfb3708ddc72', 3.3); +INSERT INTO ${CLICKHOUSE_DATABASE}.table_datarace VALUES ('cd5db34f-0c25-4375-b10e-bfb3708ddc72', 1.1), ('cd5db34f-0c25-4375-b10e-bfb3708ddc72', 2.2), ('cd5db34f-0c25-4375-b10e-bfb3708ddc72', 3.3); " $CLICKHOUSE_CLIENT --query=" -CREATE DICTIONARY IF NOT EXISTS dictdb_01076.dict_datarace +CREATE DICTIONARY IF NOT EXISTS ${CLICKHOUSE_DATABASE}.dict_datarace ( key_column UInt64, value Float64 DEFAULT 77.77 ) PRIMARY KEY key_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_datarace' DB 'dictdb_01076')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_datarace' DB '${CLICKHOUSE_DATABASE}')) LIFETIME(1) LAYOUT(CACHE(SIZE_IN_CELLS 10)); " @@ -41,7 +38,7 @@ function thread1() for _ in {1..50} do # This query will be ended with exception, because source dictionary has UUID as a key type. - $CLICKHOUSE_CLIENT --query="SELECT dictGetFloat64('dictdb_01076.dict_datarace', 'value', toUInt64(1));" + $CLICKHOUSE_CLIENT --query="SELECT dictGetFloat64('${CLICKHOUSE_DATABASE}.dict_datarace', 'value', toUInt64(1));" done } @@ -51,7 +48,7 @@ function thread2() for _ in {1..50} do # This query will be ended with exception, because source dictionary has UUID as a key type. - $CLICKHOUSE_CLIENT --query="SELECT dictGetFloat64('dictdb_01076.dict_datarace', 'value', toUInt64(2));" + $CLICKHOUSE_CLIENT --query="SELECT dictGetFloat64('${CLICKHOUSE_DATABASE}.dict_datarace', 'value', toUInt64(2));" done } @@ -67,6 +64,5 @@ wait echo OK -$CLICKHOUSE_CLIENT --query="DROP DICTIONARY dictdb_01076.dict_datarace;" -$CLICKHOUSE_CLIENT --query="DROP TABLE dictdb_01076.table_datarace;" -$CLICKHOUSE_CLIENT --query="DROP DATABASE dictdb_01076;" +$CLICKHOUSE_CLIENT --query="DROP DICTIONARY ${CLICKHOUSE_DATABASE}.dict_datarace;" +$CLICKHOUSE_CLIENT --query="DROP TABLE ${CLICKHOUSE_DATABASE}.table_datarace;" diff --git a/tests/queries/0_stateless/01078_merge_tree_read_one_thread.sql b/tests/queries/0_stateless/01078_merge_tree_read_one_thread.sql index 3a05e4507a2..166f44df2a7 100644 --- a/tests/queries/0_stateless/01078_merge_tree_read_one_thread.sql +++ b/tests/queries/0_stateless/01078_merge_tree_read_one_thread.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage -- Output slightly different plan drop table if exists t; diff --git a/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh b/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh index bfdea95fa9e..f05a0fed965 100755 --- a/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh +++ b/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: zookeeper, no-parallel, no-fasttest +# Tags: zookeeper, no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -63,7 +63,7 @@ export -f optimize_thread; export -f insert_thread; -TIMEOUT=30 +TIMEOUT=20 # Sometimes we detach and attach tables timeout $TIMEOUT bash -c alter_thread 2> /dev/null & diff --git a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.sh b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.sh index ba8d89aad3c..399c9e488a4 100755 --- a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.sh +++ b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, zookeeper, no-parallel, no-fasttest +# Tags: long, zookeeper, no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -76,7 +76,7 @@ export -f insert_thread; export -f select_thread; -TIMEOUT=30 +TIMEOUT=20 # Selects should run successfully diff --git a/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference b/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference index 708c5d9d994..26085389381 100644 --- a/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference +++ b/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference @@ -1,3 +1,22 @@ +-- bitTestAny +0 1 +1 0 +2 1 +3 0 +4 1 +5 0 +6 1 +7 0 +-- bitTestAll +0 1 +1 0 +2 1 +3 0 +4 1 +5 0 +6 1 +7 0 +-- bitTest 0 1 1 0 2 1 @@ -6,98 +25,6 @@ 5 0 6 1 7 0 -8 0 -9 0 -10 0 -11 0 -12 0 -13 0 -14 0 -15 0 -16 0 -17 0 -18 0 -19 0 -20 0 -21 0 -22 0 -23 0 -24 0 -25 0 -26 0 -27 0 -28 0 -29 0 -30 0 -31 0 -32 0 -33 0 -34 0 -35 0 -36 0 -37 0 -38 0 -39 0 -40 0 -41 0 -42 0 -43 0 -44 0 -45 0 -46 0 -47 0 -48 0 -49 0 -50 0 -51 0 -52 0 -53 0 -54 0 -55 0 -56 0 -57 0 -58 0 -59 0 -60 0 -61 0 -62 0 -63 0 -64 0 -65 0 -66 0 -67 0 -68 0 -69 0 -70 0 -71 0 -72 0 -73 0 -74 0 -75 0 -76 0 -77 0 -78 0 -79 0 -80 0 -81 0 -82 0 -83 0 -84 0 -85 0 -86 0 -87 0 -88 0 -89 0 -90 0 -91 0 -92 0 -93 0 -94 0 -95 0 -96 0 -97 0 -98 0 -99 0 0 1 1 0 2 1 @@ -107,94 +34,10 @@ 6 1 7 0 8 1 -9 1 +9 0 10 1 -11 1 +11 0 12 1 -13 1 +13 0 14 1 -15 1 -16 1 -17 1 -18 1 -19 1 -20 1 -21 1 -22 1 -23 1 -24 1 -25 1 -26 1 -27 1 -28 1 -29 1 -30 1 -31 1 -32 1 -33 1 -34 1 -35 1 -36 1 -37 1 -38 1 -39 1 -40 1 -41 1 -42 1 -43 1 -44 1 -45 1 -46 1 -47 1 -48 1 -49 1 -50 1 -51 1 -52 1 -53 1 -54 1 -55 1 -56 1 -57 1 -58 1 -59 1 -60 1 -61 1 -62 1 -63 1 -64 1 -65 1 -66 1 -67 1 -68 1 -69 1 -70 1 -71 1 -72 1 -73 1 -74 1 -75 1 -76 1 -77 1 -78 1 -79 1 -80 1 -81 1 -82 1 -83 1 -84 1 -85 1 -86 1 -87 1 -88 1 -89 1 -90 1 -91 1 -92 1 -93 1 -94 1 -95 1 -96 1 -97 1 -98 1 -99 1 +15 0 diff --git a/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql b/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql index 82e2c5a2380..e741cb249d0 100644 --- a/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql +++ b/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql @@ -1,2 +1,13 @@ -SELECT number, bitTestAny(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(100); -SELECT number, bitTestAll(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(100); +SELECT '-- bitTestAny'; +SELECT number, bitTestAny(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8); +SELECT number, bitTestAny(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8, 16); -- { serverError PARAMETER_OUT_OF_BOUND } + +SELECT '-- bitTestAll'; +SELECT number, bitTestAll(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8); +SELECT number, bitTestAll(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8, 16); -- { serverError PARAMETER_OUT_OF_BOUND } + +SELECT '-- bitTest'; +SELECT number, bitTest(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8); +SELECT number, bitTest(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8, 16); -- { serverError PARAMETER_OUT_OF_BOUND } +SELECT number, bitTest(toUInt16(1 + 4 + 16 + 64 + 256 + 1024 + 4096 + 16384 + 65536), number) FROM numbers(16); +SELECT -number, bitTest(toUInt16(1), -number) FROM numbers(8); -- { serverError PARAMETER_OUT_OF_BOUND } diff --git a/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql b/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql index bdfbf2a47cf..697843be27f 100644 --- a/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql +++ b/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql @@ -28,7 +28,7 @@ CREATE TABLE url (n UInt64, col String) ENGINE=URL ( replace ( - 'https://localhost:8443/?query=' || 'select n, _table from ' || currentDatabase() || '.merge format CSV', ' ', '+' + 'https://localhost:' || getServerPort('https_port') || '/?query=' || 'select n, _table from ' || currentDatabase() || '.merge format CSV', ' ', '+' ), CSV ); @@ -39,7 +39,7 @@ CREATE VIEW view AS SELECT toInt64(n) as n FROM (SELECT toString(n) as n from me SELECT nonexistentsomething; -- { serverError UNKNOWN_IDENTIFIER } CREATE DICTIONARY dict (n UInt64, col String DEFAULT '42') PRIMARY KEY n -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9440 SECURE 1 USER 'default' TABLE 'url')) LIFETIME(1) LAYOUT(CACHE(SIZE_IN_CELLS 1)); +SOURCE(CLICKHOUSE(HOST 'localhost' PORT getServerPort('tcp_port_secure') SECURE 1 USER 'default' TABLE 'url')) LIFETIME(1) LAYOUT(CACHE(SIZE_IN_CELLS 1)); -- dict --> url --> merge |-> distributed -> file (1) -- |-> distributed_tf -> buffer -> file (1) diff --git a/tests/queries/0_stateless/01098_msgpack_format.sh b/tests/queries/0_stateless/01098_msgpack_format.sh index e2ae026eb27..30956ac6c7f 100755 --- a/tests/queries/0_stateless/01098_msgpack_format.sh +++ b/tests/queries/0_stateless/01098_msgpack_format.sh @@ -1,14 +1,11 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - - $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS msgpack"; $CLICKHOUSE_CLIENT --query="CREATE TABLE msgpack (uint8 UInt8, uint16 UInt16, uint32 UInt32, uint64 UInt64, int8 Int8, int16 Int16, int32 Int32, int64 Int64, float Float32, double Float64, string String, date Date, datetime DateTime('Asia/Istanbul'), datetime64 DateTime64(3, 'Asia/Istanbul'), array Array(UInt32)) ENGINE = Memory"; diff --git a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh index 91f7a276ea3..90128d7a8ad 100755 --- a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh +++ b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh @@ -87,7 +87,7 @@ function insert() -TIMEOUT=30 +TIMEOUT=20 create_db $TIMEOUT & sync_db $TIMEOUT & diff --git a/tests/queries/0_stateless/01113_local_dictionary_type_conversion.sql b/tests/queries/0_stateless/01113_local_dictionary_type_conversion.sql index 65a03993295..1dc727930ab 100644 --- a/tests/queries/0_stateless/01113_local_dictionary_type_conversion.sql +++ b/tests/queries/0_stateless/01113_local_dictionary_type_conversion.sql @@ -1,29 +1,21 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS database_for_dict; - -CREATE DATABASE database_for_dict; - -CREATE TABLE database_for_dict.table_for_dict ( +CREATE TABLE table_for_dict ( CompanyID String, OSType Enum('UNKNOWN' = 0, 'WINDOWS' = 1, 'LINUX' = 2, 'ANDROID' = 3, 'MAC' = 4), SomeID Int32 ) ENGINE = Memory(); -INSERT INTO database_for_dict.table_for_dict VALUES ('First', 'WINDOWS', 1), ('Second', 'LINUX', 2); +INSERT INTO table_for_dict VALUES ('First', 'WINDOWS', 1), ('Second', 'LINUX', 2); -CREATE DICTIONARY database_for_dict.dict_with_conversion +CREATE DICTIONARY dict_with_conversion ( CompanyID String DEFAULT '', OSType String DEFAULT '', SomeID Int32 DEFAULT 0 ) PRIMARY KEY CompanyID -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB 'database_for_dict')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB currentDatabase())) LIFETIME(MIN 1 MAX 20) LAYOUT(COMPLEX_KEY_HASHED()); -SELECT * FROM database_for_dict.dict_with_conversion ORDER BY CompanyID; - -DROP DATABASE IF EXISTS database_for_dict; +SELECT * FROM dict_with_conversion ORDER BY CompanyID; diff --git a/tests/queries/0_stateless/01114_database_atomic.sh b/tests/queries/0_stateless/01114_database_atomic.sh index 1b1f064ae0b..fed76727a27 100755 --- a/tests/queries/0_stateless/01114_database_atomic.sh +++ b/tests/queries/0_stateless/01114_database_atomic.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest # Tag no-fasttest: 45 seconds running # Creation of a database with Ordinary engine emits a warning. diff --git a/tests/queries/0_stateless/01114_mysql_database_engine_segfault.sql b/tests/queries/0_stateless/01114_mysql_database_engine_segfault.sql index 3379acf4d7b..783a728e336 100644 --- a/tests/queries/0_stateless/01114_mysql_database_engine_segfault.sql +++ b/tests/queries/0_stateless/01114_mysql_database_engine_segfault.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest +-- Tags: no-fasttest DROP DATABASE IF EXISTS conv_main; CREATE DATABASE conv_main ENGINE = MySQL('127.0.0.1:3456', conv_main, 'metrika', 'password'); -- { serverError CANNOT_CREATE_DATABASE } diff --git a/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.sql b/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.sql index a324d278c12..6a818d94a58 100644 --- a/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.sql +++ b/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.sql @@ -1,11 +1,3 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS database_for_dict; - -CREATE DATABASE database_for_dict; - -use database_for_dict; - CREATE TABLE date_table ( id UInt32, @@ -24,7 +16,7 @@ CREATE DICTIONARY somedict end Date ) PRIMARY KEY id -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'date_table' DB 'database_for_dict')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'date_table' DB currentDatabase())) LAYOUT(RANGE_HASHED()) RANGE (MIN start MAX end) LIFETIME(MIN 300 MAX 360); @@ -35,5 +27,3 @@ SELECT * from somedict; SELECT 1 FROM somedict; SHOW TABLES; - -DROP DATABASE database_for_dict; diff --git a/tests/queries/0_stateless/01127_month_partitioning_consistency_select.sql b/tests/queries/0_stateless/01127_month_partitioning_consistency_select.sql index 2a1d04e6074..78632ab2463 100644 --- a/tests/queries/0_stateless/01127_month_partitioning_consistency_select.sql +++ b/tests/queries/0_stateless/01127_month_partitioning_consistency_select.sql @@ -1,6 +1,3 @@ --- Tags: no-parallel - -DROP TABLE IF EXISTS mt; set allow_deprecated_syntax_for_merge_tree=1; CREATE TABLE mt (d Date, x String) ENGINE = MergeTree(d, x, 8192); INSERT INTO mt VALUES ('2106-02-07', 'Hello'), ('1970-01-01', 'World'); diff --git a/tests/queries/0_stateless/01144_multiword_data_types.sql b/tests/queries/0_stateless/01144_multiword_data_types.sql index cc380f82d63..56def658ae0 100644 --- a/tests/queries/0_stateless/01144_multiword_data_types.sql +++ b/tests/queries/0_stateless/01144_multiword_data_types.sql @@ -23,7 +23,7 @@ CREATE TABLE multiword_types ( SHOW CREATE TABLE multiword_types; INSERT INTO multiword_types(a) VALUES (1); -SELECT toTypeName((*,)) FROM multiword_types; +SELECT toTypeName((*,)) FROM multiword_types SETTINGS enable_named_columns_in_function_tuple = 0; CREATE TABLE unsigned_types ( a TINYINT SIGNED, @@ -43,7 +43,7 @@ CREATE TABLE unsigned_types ( SHOW CREATE TABLE unsigned_types; INSERT INTO unsigned_types(a) VALUES (1); -SELECT toTypeName((*,)) FROM unsigned_types; +SELECT toTypeName((*,)) FROM unsigned_types SETTINGS enable_named_columns_in_function_tuple = 0; SELECT CAST('42' AS DOUBLE PRECISION), CAST(42, 'NATIONAL CHARACTER VARYING'), CAST(-1 AS tinyint UnSiGnEd), CAST(65535, ' sMaLlInT signed '); diff --git a/tests/queries/0_stateless/01154_move_partition_long.sh b/tests/queries/0_stateless/01154_move_partition_long.sh index 24ec58c9c17..bdffa028846 100755 --- a/tests/queries/0_stateless/01154_move_partition_long.sh +++ b/tests/queries/0_stateless/01154_move_partition_long.sh @@ -107,7 +107,7 @@ export -f drop_partition_thread; export -f optimize_thread; export -f drop_part_thread; -TIMEOUT=60 +TIMEOUT=40 #timeout $TIMEOUT bash -c "create_drop_thread ${engines[@]}" & timeout $TIMEOUT bash -c 'insert_thread src' & diff --git a/tests/queries/0_stateless/01158_zookeeper_log_long.sql b/tests/queries/0_stateless/01158_zookeeper_log_long.sql index 55d4162fc48..804cdf48fb6 100644 --- a/tests/queries/0_stateless/01158_zookeeper_log_long.sql +++ b/tests/queries/0_stateless/01158_zookeeper_log_long.sql @@ -29,14 +29,20 @@ select 'parts'; select type, has_watch, op_num, replace(path, toString(serverUUID()), ''), is_ephemeral, is_sequential, if(startsWith(path, '/clickhouse/sessions'), 1, version), requests_size, request_idx, error, watch_type, watch_state, path_created, stat_version, stat_cversion, stat_dataLength, stat_numChildren from system.zookeeper_log -where (session_id, xid) in (select session_id, xid from system.zookeeper_log where path='/test/01158/' || currentDatabase() || '/rmt/replicas/1/parts/all_0_0_0') +where (session_id, xid) in ( + select session_id, xid from system.zookeeper_log where path='/test/01158/' || currentDatabase() || '/rmt/replicas/1/parts/all_0_0_0' + and (query_id='' or query_id in (select query_id from system.query_log where current_database=currentDatabase() and event_date>=yesterday())) +) order by xid, type, request_idx; select 'blocks'; select type, has_watch, op_num, path, is_ephemeral, is_sequential, version, requests_size, request_idx, error, watch_type, watch_state, path_created, stat_version, stat_cversion, stat_dataLength, stat_numChildren from system.zookeeper_log -where (session_id, xid) in (select session_id, xid from system.zookeeper_log where path like '/test/01158/' || currentDatabase() || '/rmt/blocks/%' and op_num not in (1, 12, 500)) +where (session_id, xid) in ( + select session_id, xid from system.zookeeper_log where path like '/test/01158/' || currentDatabase() || '/rmt/blocks/%' and op_num not in (1, 12, 500) + and (query_id='' or query_id in (select query_id from system.query_log where current_database=currentDatabase() and event_date>=yesterday())) +) order by xid, type, request_idx; drop table rmt sync; diff --git a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh index 8344bb6f426..2ab7f883367 100755 --- a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh +++ b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-parallel, no-ordinary-database, no-debug +# Tags: long, no-ordinary-database, no-debug # Test is too heavy, avoid parallel run in Flaky Check # shellcheck disable=SC2119 diff --git a/tests/queries/0_stateless/01185_create_or_replace_table.sql b/tests/queries/0_stateless/01185_create_or_replace_table.sql index 11759d0bb0c..801a775e024 100644 --- a/tests/queries/0_stateless/01185_create_or_replace_table.sql +++ b/tests/queries/0_stateless/01185_create_or_replace_table.sql @@ -1,4 +1,4 @@ --- Tags: no-ordinary-database, no-parallel +-- Tags: no-ordinary-database drop table if exists t1; diff --git a/tests/queries/0_stateless/01188_attach_table_from_path.sql b/tests/queries/0_stateless/01188_attach_table_from_path.sql index d1b9493b6c2..026979a0132 100644 --- a/tests/queries/0_stateless/01188_attach_table_from_path.sql +++ b/tests/queries/0_stateless/01188_attach_table_from_path.sql @@ -1,4 +1,4 @@ --- Tags: no-replicated-database, no-parallel +-- Tags: no-replicated-database drop table if exists test; drop table if exists file; diff --git a/tests/queries/0_stateless/01200_mutations_memory_consumption.sql b/tests/queries/0_stateless/01200_mutations_memory_consumption.sql index 5019abc38ab..f2d071961ee 100644 --- a/tests/queries/0_stateless/01200_mutations_memory_consumption.sql +++ b/tests/queries/0_stateless/01200_mutations_memory_consumption.sql @@ -1,4 +1,4 @@ --- Tags: no-debug, no-parallel, long, no-s3-storage, no-random-settings, no-random-merge-tree-settings +-- Tags: no-debug, no-parallel, long, no-object-storage, no-random-settings, no-random-merge-tree-settings SET optimize_trivial_insert_select = 1; DROP TABLE IF EXISTS table_with_single_pk; diff --git a/tests/queries/0_stateless/01202_array_auc_special.reference b/tests/queries/0_stateless/01202_array_auc_special.reference index 85c230fba58..8f3f0cf1efe 100644 --- a/tests/queries/0_stateless/01202_array_auc_special.reference +++ b/tests/queries/0_stateless/01202_array_auc_special.reference @@ -1,9 +1,9 @@ nan nan nan -0 -1 -0 0.5 1 -0.5 +0 +0.75 +1 +0.75 diff --git a/tests/queries/0_stateless/01221_system_settings.sql b/tests/queries/0_stateless/01221_system_settings.sql index fcffd6c45fe..da0204b37bd 100644 --- a/tests/queries/0_stateless/01221_system_settings.sql +++ b/tests/queries/0_stateless/01221_system_settings.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage select * from system.settings where name = 'send_timeout'; select * from system.merge_tree_settings order by length(description) limit 1; diff --git a/tests/queries/0_stateless/01225_drop_dictionary_as_table.sql b/tests/queries/0_stateless/01225_drop_dictionary_as_table.sql index 3e497f9e3a4..a0cacd8bc7a 100644 --- a/tests/queries/0_stateless/01225_drop_dictionary_as_table.sql +++ b/tests/queries/0_stateless/01225_drop_dictionary_as_table.sql @@ -1,22 +1,15 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS dict_db_01225; -CREATE DATABASE dict_db_01225; - -CREATE TABLE dict_db_01225.dict_data (key UInt64, val UInt64) Engine=Memory(); -CREATE DICTIONARY dict_db_01225.dict +CREATE TABLE dict_data (key UInt64, val UInt64) Engine=Memory(); +CREATE DICTIONARY dict ( key UInt64 DEFAULT 0, val UInt64 DEFAULT 10 ) PRIMARY KEY key -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_data' PASSWORD '' DB 'dict_db_01225')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_data' PASSWORD '' DB currentDatabase())) LIFETIME(MIN 0 MAX 0) LAYOUT(FLAT()); -SYSTEM RELOAD DICTIONARY dict_db_01225.dict; +SYSTEM RELOAD DICTIONARY dict; -DROP TABLE dict_db_01225.dict; -- { serverError CANNOT_DETACH_DICTIONARY_AS_TABLE } -DROP DICTIONARY dict_db_01225.dict; - -DROP DATABASE dict_db_01225; +DROP TABLE dict; -- { serverError CANNOT_DETACH_DICTIONARY_AS_TABLE } +DROP DICTIONARY dict; diff --git a/tests/queries/0_stateless/01232_untuple.reference b/tests/queries/0_stateless/01232_untuple.reference index 0358cde1354..3cd8eaa5611 100644 --- a/tests/queries/0_stateless/01232_untuple.reference +++ b/tests/queries/0_stateless/01232_untuple.reference @@ -2,7 +2,7 @@ hello 1 3 world 9 9 (0,1) -key tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'1\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'2\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'3\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'4\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'5\') +key tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'v1\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'v2\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'v3\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'v4\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'v5\') 1 20 20 10 20 30 2 11 20 10 20 30 3 70 20 10 20 30 diff --git a/tests/queries/0_stateless/01232_untuple.sql b/tests/queries/0_stateless/01232_untuple.sql index ccefd13a772..391d08ab859 100644 --- a/tests/queries/0_stateless/01232_untuple.sql +++ b/tests/queries/0_stateless/01232_untuple.sql @@ -1,4 +1,5 @@ SET allow_experimental_analyzer = 1; +SET enable_named_columns_in_function_tuple = 1; select untuple((* except (b),)) from (select 1 a, 2 b, 3 c); select 'hello', untuple((* except (b),)), 'world' from (select 1 a, 2 b, 3 c); diff --git a/tests/queries/0_stateless/01246_buffer_flush.sh b/tests/queries/0_stateless/01246_buffer_flush.sh new file mode 100755 index 00000000000..1ca953c80d9 --- /dev/null +++ b/tests/queries/0_stateless/01246_buffer_flush.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +function elapsed_sec() +{ + local expr=$1 && shift + local start end + start=$(date +%s.%N) + while ! eval "$expr"; do + sleep 0.5 + done + end=$(date +%s.%N) + $CLICKHOUSE_LOCAL -q "select floor($end-$start)" +} + +$CLICKHOUSE_CLIENT -nm -q " + drop table if exists data_01256; + drop table if exists buffer_01256; + + create table data_01256 as system.numbers Engine=Memory(); +" + +echo "min" +$CLICKHOUSE_CLIENT -nm -q " + create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, + 2, 100, /* time */ + 4, 100, /* rows */ + 1, 1e6 /* bytes */ + ); + insert into buffer_01256 select * from system.numbers limit 5; + select count() from data_01256; +" +sec=$(elapsed_sec '[[ $($CLICKHOUSE_CLIENT -q "select count() from data_01256") -eq 5 ]]') +[[ $sec -ge 2 ]] || echo "Buffer flushed too early, min_time=2, flushed after $sec sec" +[[ $sec -lt 100 ]] || echo "Buffer flushed too late, max_time=100, flushed after $sec sec" +$CLICKHOUSE_CLIENT -q "select count() from data_01256" +$CLICKHOUSE_CLIENT -q "drop table buffer_01256" + +echo "max" +$CLICKHOUSE_CLIENT -nm -q " + create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, + 100, 2, /* time */ + 0, 100, /* rows */ + 0, 1e6 /* bytes */ + ); + insert into buffer_01256 select * from system.numbers limit 5; + select count() from data_01256; +" +sec=$(elapsed_sec '[[ $($CLICKHOUSE_CLIENT -q "select count() from data_01256") -eq 10 ]]') +[[ $sec -ge 2 ]] || echo "Buffer flushed too early, max_time=2, flushed after $sec sec" +$CLICKHOUSE_CLIENT -q "select count() from data_01256" +$CLICKHOUSE_CLIENT -q "drop table buffer_01256" + +echo "direct" +$CLICKHOUSE_CLIENT -nm -q " + create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, + 100, 100, /* time */ + 0, 9, /* rows */ + 0, 1e6 /* bytes */ + ); + insert into buffer_01256 select * from system.numbers limit 10; + select count() from data_01256; +" + +echo "drop" +$CLICKHOUSE_CLIENT -nm -q " + insert into buffer_01256 select * from system.numbers limit 10; + drop table if exists buffer_01256; + select count() from data_01256; +" + +$CLICKHOUSE_CLIENT -q "drop table data_01256" diff --git a/tests/queries/0_stateless/01246_buffer_flush.sql b/tests/queries/0_stateless/01246_buffer_flush.sql deleted file mode 100644 index 66f93371c29..00000000000 --- a/tests/queries/0_stateless/01246_buffer_flush.sql +++ /dev/null @@ -1,50 +0,0 @@ --- Tags: no-fasttest - -SET function_sleep_max_microseconds_per_block = 4000000; - -drop table if exists data_01256; -drop table if exists buffer_01256; - -create table data_01256 as system.numbers Engine=Memory(); - -select 'min'; -create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, - 5, 100, /* time */ - 4, 100, /* rows */ - 1, 1e6 /* bytes */ -); -insert into buffer_01256 select * from system.numbers limit 5; -select count() from data_01256; --- It is enough to ensure that the buffer will be flushed earlier then 2*min_time (10 sec) -select sleepEachRow(9) FORMAT Null SETTINGS function_sleep_max_microseconds_per_block=10e6; -select count() from data_01256; -drop table buffer_01256; - -select 'max'; -create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, - 100, 2, /* time */ - 0, 100, /* rows */ - 0, 1e6 /* bytes */ -); -insert into buffer_01256 select * from system.numbers limit 5; -select count() from data_01256; --- sleep 2 (min time) + 1 (round up) + bias (1) = 4 -select sleepEachRow(2) from numbers(2) FORMAT Null; -select count() from data_01256; -drop table buffer_01256; - -select 'direct'; -create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, - 100, 100, /* time */ - 0, 9, /* rows */ - 0, 1e6 /* bytes */ -); -insert into buffer_01256 select * from system.numbers limit 10; -select count() from data_01256; - -select 'drop'; -insert into buffer_01256 select * from system.numbers limit 10; -drop table if exists buffer_01256; -select count() from data_01256; - -drop table data_01256; diff --git a/tests/queries/0_stateless/01254_dict_create_without_db.sql b/tests/queries/0_stateless/01254_dict_create_without_db.sql index 65a2ab52d23..2d4da5af9a9 100644 --- a/tests/queries/0_stateless/01254_dict_create_without_db.sql +++ b/tests/queries/0_stateless/01254_dict_create_without_db.sql @@ -1,9 +1,3 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS dict_db_01254; -CREATE DATABASE dict_db_01254; -USE dict_db_01254; - CREATE TABLE dict_data (key UInt64, val UInt64) Engine=Memory(); CREATE DICTIONARY dict ( @@ -11,15 +5,12 @@ CREATE DICTIONARY dict val UInt64 DEFAULT 10 ) PRIMARY KEY key -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_data' PASSWORD '' DB 'dict_db_01254')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_data' PASSWORD '' DB currentDatabase())) LIFETIME(MIN 0 MAX 0) LAYOUT(FLAT()); -SELECT query_count, status FROM system.dictionaries WHERE database = 'dict_db_01254' AND name = 'dict'; -SYSTEM RELOAD DICTIONARY dict_db_01254.dict; -SELECT query_count, status FROM system.dictionaries WHERE database = 'dict_db_01254' AND name = 'dict'; -SELECT dictGetUInt64('dict_db_01254.dict', 'val', toUInt64(0)); -SELECT query_count, status FROM system.dictionaries WHERE database = 'dict_db_01254' AND name = 'dict'; - -USE system; -DROP DATABASE dict_db_01254; +SELECT query_count, status FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; +SYSTEM RELOAD DICTIONARY dict; +SELECT query_count, status FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; +SELECT dictGetUInt64('dict', 'val', toUInt64(0)); +SELECT query_count, status FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; diff --git a/tests/queries/0_stateless/01254_dict_load_after_detach_attach.sql b/tests/queries/0_stateless/01254_dict_load_after_detach_attach.sql index 206ddeac612..11473c6ce32 100644 --- a/tests/queries/0_stateless/01254_dict_load_after_detach_attach.sql +++ b/tests/queries/0_stateless/01254_dict_load_after_detach_attach.sql @@ -1,26 +1,19 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS dict_db_01254; -CREATE DATABASE dict_db_01254; - -CREATE TABLE dict_db_01254.dict_data (key UInt64, val UInt64) Engine=Memory(); -CREATE DICTIONARY dict_db_01254.dict +CREATE TABLE dict_data (key UInt64, val UInt64) Engine=Memory(); +CREATE DICTIONARY dict ( key UInt64 DEFAULT 0, val UInt64 DEFAULT 10 ) PRIMARY KEY key -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_data' PASSWORD '' DB 'dict_db_01254')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_data' PASSWORD '' DB currentDatabase())) LIFETIME(MIN 0 MAX 0) LAYOUT(FLAT()); -DETACH DATABASE dict_db_01254; -ATTACH DATABASE dict_db_01254; +DETACH DATABASE {CLICKHOUSE_DATABASE:Identifier}; +ATTACH DATABASE {CLICKHOUSE_DATABASE:Identifier}; -SELECT COALESCE((SELECT status FROM system.dictionaries WHERE database = 'dict_db_01254' AND name = 'dict')::Nullable(String), 'NOT_LOADED'); -SYSTEM RELOAD DICTIONARY dict_db_01254.dict; -SELECT query_count, status FROM system.dictionaries WHERE database = 'dict_db_01254' AND name = 'dict'; -SELECT dictGetUInt64('dict_db_01254.dict', 'val', toUInt64(0)); -SELECT query_count, status FROM system.dictionaries WHERE database = 'dict_db_01254' AND name = 'dict'; - -DROP DATABASE dict_db_01254; +SELECT COALESCE((SELECT status FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict')::Nullable(String), 'NOT_LOADED'); +SYSTEM RELOAD DICTIONARY dict; +SELECT query_count, status FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; +SELECT dictGetUInt64('dict', 'val', toUInt64(0)); +SELECT query_count, status FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; diff --git a/tests/queries/0_stateless/01259_dictionary_custom_settings_ddl.sql b/tests/queries/0_stateless/01259_dictionary_custom_settings_ddl.sql index 432256d33c2..be56806f8d6 100644 --- a/tests/queries/0_stateless/01259_dictionary_custom_settings_ddl.sql +++ b/tests/queries/0_stateless/01259_dictionary_custom_settings_ddl.sql @@ -1,12 +1,6 @@ --- Tags: no-parallel, no-fasttest +-- Tags: no-fasttest -DROP DATABASE IF EXISTS database_for_dict; - -CREATE DATABASE database_for_dict; - -DROP TABLE IF EXISTS database_for_dict.table_for_dict; - -CREATE TABLE database_for_dict.table_for_dict +CREATE TABLE table_for_dict ( key_column UInt64, second_column UInt64, @@ -15,7 +9,7 @@ CREATE TABLE database_for_dict.table_for_dict ENGINE = MergeTree() ORDER BY key_column; -INSERT INTO database_for_dict.table_for_dict VALUES (100500, 10000000, 'Hello world'); +INSERT INTO table_for_dict VALUES (100500, 10000000, 'Hello world'); DROP DATABASE IF EXISTS ordinary_db; @@ -30,7 +24,7 @@ CREATE DICTIONARY ordinary_db.dict1 third_column String DEFAULT 'qqq' ) PRIMARY KEY key_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' PASSWORD '' DB currentDatabase())) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT()) SETTINGS(max_result_bytes=1); @@ -40,10 +34,6 @@ SELECT dictGetUInt64('ordinary_db.dict1', 'second_column', toUInt64(100500)); -- SELECT 'END'; -DROP DICTIONARY IF EXISTS ordinary_db.dict1; - DROP DATABASE IF EXISTS ordinary_db; -DROP TABLE IF EXISTS database_for_dict.table_for_dict; - -DROP DATABASE IF EXISTS database_for_dict; +DROP TABLE IF EXISTS table_for_dict; diff --git a/tests/queries/0_stateless/01268_procfs_metrics.sh b/tests/queries/0_stateless/01268_procfs_metrics.sh index 4f09d197596..7d6389bb86e 100755 --- a/tests/queries/0_stateless/01268_procfs_metrics.sh +++ b/tests/queries/0_stateless/01268_procfs_metrics.sh @@ -15,7 +15,7 @@ tmp_path=$(mktemp "$CURDIR/01268_procfs_metrics.XXXXXX") trap 'rm -f $tmp_path' EXIT truncate -s1025 "$tmp_path" -$CLICKHOUSE_LOCAL --profile-events-delay-ms=-1 --print-profile-events -q "SELECT * FROM file('$tmp_path', 'LineAsString') FORMAT Null" |& grep -m1 -F -o -e OSReadChars +$CLICKHOUSE_LOCAL --profile-events-delay-ms=-1 --print-profile-events --storage_file_read_method=pread -q "SELECT * FROM file('$tmp_path', 'LineAsString') FORMAT Null" |& grep -m1 -F -o -e OSReadChars # NOTE: that OSCPUVirtualTimeMicroseconds is in microseconds, so 1e6 is not enough. $CLICKHOUSE_LOCAL --profile-events-delay-ms=-1 --print-profile-events -q "SELECT * FROM numbers(1e8) FORMAT Null" |& grep -m1 -F -o -e OSCPUVirtualTimeMicroseconds exit 0 diff --git a/tests/queries/0_stateless/01269_alias_type_differs.sql b/tests/queries/0_stateless/01269_alias_type_differs.sql index 64abcf9e367..b78e46f62c8 100644 --- a/tests/queries/0_stateless/01269_alias_type_differs.sql +++ b/tests/queries/0_stateless/01269_alias_type_differs.sql @@ -1,5 +1,3 @@ --- Tags: no-parallel - DROP TABLE IF EXISTS data_01269; CREATE TABLE data_01269 ( diff --git a/tests/queries/0_stateless/01272_suspicious_codecs.sql b/tests/queries/0_stateless/01272_suspicious_codecs.sql index 082a8d08675..1c1d7b58dd0 100644 --- a/tests/queries/0_stateless/01272_suspicious_codecs.sql +++ b/tests/queries/0_stateless/01272_suspicious_codecs.sql @@ -1,9 +1,5 @@ --- Tags: no-parallel - DROP TABLE IF EXISTS codecs; --- test what should work - CREATE TABLE codecs ( a UInt8 CODEC(LZ4), diff --git a/tests/queries/0_stateless/01275_parallel_mv.reference b/tests/queries/0_stateless/01275_parallel_mv.reference index a9801e3b910..f5f31c4a563 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.reference +++ b/tests/queries/0_stateless/01275_parallel_mv.reference @@ -10,7 +10,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=0, max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and @@ -34,7 +34,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=0, max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and @@ -58,7 +58,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=1, max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and @@ -82,7 +82,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=1, max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and @@ -106,7 +106,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=0, max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and @@ -130,7 +130,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=0, max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and @@ -154,7 +154,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=1, max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and @@ -178,7 +178,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=1, max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and diff --git a/tests/queries/0_stateless/01275_parallel_mv.sql.j2 b/tests/queries/0_stateless/01275_parallel_mv.sql.j2 index 047b1cc3ee7..5918035e9c3 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.sql.j2 +++ b/tests/queries/0_stateless/01275_parallel_mv.sql.j2 @@ -1,5 +1,5 @@ --- Tags: no-s3-storage, no-parallel, no-fasttest --- no-s3-storage: s3 has 20 more threads +-- Tags: no-object-storage, no-parallel, no-fasttest +-- no-object-storage: s3 has 20 more threads -- no-parallel: it checks the number of threads, which can be lowered in presence of other queries -- avoid settings randomization by clickhouse-test @@ -28,7 +28,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select={{ optimize_trivial_insert_select }}, max_insert_threads={{ max_insert_threads }}; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and diff --git a/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sh b/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sh index fb7bf5c6fc1..9a80820dd58 100755 --- a/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sh +++ b/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - $CLICKHOUSE_CLIENT -n --query=" set allow_deprecated_database_ordinary=1; DROP DATABASE IF EXISTS 01280_db; diff --git a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh index e83e49dffef..33b8f413fd5 100755 --- a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh +++ b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-replicated-database, no-parallel, no-fasttest, no-tsan, no-asan, no-random-settings, no-s3-storage, no-msan +# Tags: no-replicated-database, no-parallel, no-fasttest, no-tsan, no-asan, no-random-settings, no-object-storage, no-msan # Tag no-fasttest: max_memory_usage_for_user can interfere another queries running concurrently # Regression for MemoryTracker that had been incorrectly accounted diff --git a/tests/queries/0_stateless/01293_optimize_final_force.sh b/tests/queries/0_stateless/01293_optimize_final_force.sh index d3d3d3e1ac5..e838af8af9b 100755 --- a/tests/queries/0_stateless/01293_optimize_final_force.sh +++ b/tests/queries/0_stateless/01293_optimize_final_force.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, long, no-debug, no-s3-storage +# Tags: no-fasttest, long, no-debug, no-object-storage # This test is too slow with S3 storage and debug modes. CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/01294_lazy_database_concurrent_recreate_reattach_and_show_tables_long.sh b/tests/queries/0_stateless/01294_lazy_database_concurrent_recreate_reattach_and_show_tables_long.sh index 3c11dc5f772..21f46a34514 100755 --- a/tests/queries/0_stateless/01294_lazy_database_concurrent_recreate_reattach_and_show_tables_long.sh +++ b/tests/queries/0_stateless/01294_lazy_database_concurrent_recreate_reattach_and_show_tables_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-parallel, no-fasttest +# Tags: long, no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -91,7 +91,7 @@ ${CLICKHOUSE_CLIENT} -n -q " " -TIMEOUT=30 +TIMEOUT=20 timeout $TIMEOUT bash -c recreate_lazy_func1 2> /dev/null & timeout $TIMEOUT bash -c recreate_lazy_func2 2> /dev/null & diff --git a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh index 2d18c45406c..47fe7a9c7d9 100755 --- a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh +++ b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh b/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh index b9f1f81da1a..a521accb082 100755 --- a/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh +++ b/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01304_direct_io_long.sh b/tests/queries/0_stateless/01304_direct_io_long.sh index 97148dc268e..2e27c2f7728 100755 --- a/tests/queries/0_stateless/01304_direct_io_long.sh +++ b/tests/queries/0_stateless/01304_direct_io_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-s3-storage-with-slow-build +# Tags: long, no-object-storage-with-slow-build CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01323_too_many_threads_bug.sql b/tests/queries/0_stateless/01323_too_many_threads_bug.sql index c377e2c7570..5bf282808c3 100644 --- a/tests/queries/0_stateless/01323_too_many_threads_bug.sql +++ b/tests/queries/0_stateless/01323_too_many_threads_bug.sql @@ -14,11 +14,11 @@ set log_queries = 1; select x from table_01323_many_parts limit 10 format Null; system flush logs; -select arrayUniq(thread_ids) <= 4 from system.query_log where current_database = currentDatabase() AND event_date >= today() - 1 and query ilike '%select x from table_01323_many_parts%' and query not like '%system.query_log%' and type = 'QueryFinish' order by query_start_time desc limit 1; +select peak_threads_usage <= 4 from system.query_log where current_database = currentDatabase() AND event_date >= today() - 1 and query ilike '%select x from table_01323_many_parts%' and query not like '%system.query_log%' and type = 'QueryFinish' order by query_start_time desc limit 1; select x from table_01323_many_parts order by x limit 10 format Null; system flush logs; -select arrayUniq(thread_ids) <= 36 from system.query_log where current_database = currentDatabase() AND event_date >= today() - 1 and query ilike '%select x from table_01323_many_parts order by x%' and query not like '%system.query_log%' and type = 'QueryFinish' order by query_start_time desc limit 1; +select peak_threads_usage <= 36 from system.query_log where current_database = currentDatabase() AND event_date >= today() - 1 and query ilike '%select x from table_01323_many_parts order by x%' and query not like '%system.query_log%' and type = 'QueryFinish' order by query_start_time desc limit 1; drop table if exists table_01323_many_parts; diff --git a/tests/queries/0_stateless/01338_long_select_and_alter.sh b/tests/queries/0_stateless/01338_long_select_and_alter.sh index fcdfa2dec82..2b0709162a3 100755 --- a/tests/queries/0_stateless/01338_long_select_and_alter.sh +++ b/tests/queries/0_stateless/01338_long_select_and_alter.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-parallel +# Tags: long CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh index 50ade3fad45..41e0a12f369 100755 --- a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh +++ b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, zookeeper, no-parallel +# Tags: long, zookeeper CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01343_min_bytes_to_use_mmap_io.sql b/tests/queries/0_stateless/01343_min_bytes_to_use_mmap_io.sql index 614629351ef..15c9ec16700 100644 --- a/tests/queries/0_stateless/01343_min_bytes_to_use_mmap_io.sql +++ b/tests/queries/0_stateless/01343_min_bytes_to_use_mmap_io.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage DROP TABLE IF EXISTS test_01343; CREATE TABLE test_01343 (x String) ENGINE = MergeTree ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0; INSERT INTO test_01343 VALUES ('Hello, world'); diff --git a/tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.sql b/tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.sql index 2e5ec563641..76cb535dcb7 100644 --- a/tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.sql +++ b/tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage DROP TABLE IF EXISTS test_01344; CREATE TABLE test_01344 (x String, INDEX idx (x) TYPE set(10) GRANULARITY 1) ENGINE = MergeTree ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0; INSERT INTO test_01344 VALUES ('Hello, world'); diff --git a/tests/queries/0_stateless/01355_alter_column_with_order.sql b/tests/queries/0_stateless/01355_alter_column_with_order.sql index 0b1b4c42cce..405157fd891 100644 --- a/tests/queries/0_stateless/01355_alter_column_with_order.sql +++ b/tests/queries/0_stateless/01355_alter_column_with_order.sql @@ -1,28 +1,26 @@ --- Tags: no-parallel - -DROP TABLE IF EXISTS alter_test; +DROP TABLE IF EXISTS alter_01355; set allow_deprecated_syntax_for_merge_tree=1; -CREATE TABLE alter_test (CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192); +CREATE TABLE alter_01355 (CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192); -ALTER TABLE alter_test ADD COLUMN Added1 UInt32 FIRST; +ALTER TABLE alter_01355 ADD COLUMN Added1 UInt32 FIRST; -ALTER TABLE alter_test ADD COLUMN Added2 UInt32 AFTER NestedColumn; +ALTER TABLE alter_01355 ADD COLUMN Added2 UInt32 AFTER NestedColumn; -ALTER TABLE alter_test ADD COLUMN Added3 UInt32 AFTER ToDrop; +ALTER TABLE alter_01355 ADD COLUMN Added3 UInt32 AFTER ToDrop; -DESC alter_test; -DETACH TABLE alter_test; -ATTACH TABLE alter_test; -DESC alter_test; +DESC alter_01355; +DETACH TABLE alter_01355; +ATTACH TABLE alter_01355; +DESC alter_01355; -ALTER TABLE alter_test MODIFY COLUMN Added2 UInt32 FIRST; +ALTER TABLE alter_01355 MODIFY COLUMN Added2 UInt32 FIRST; -ALTER TABLE alter_test MODIFY COLUMN Added3 UInt32 AFTER CounterID; +ALTER TABLE alter_01355 MODIFY COLUMN Added3 UInt32 AFTER CounterID; -DESC alter_test; -DETACH TABLE alter_test; -ATTACH TABLE alter_test; -DESC alter_test; +DESC alter_01355; +DETACH TABLE alter_01355; +ATTACH TABLE alter_01355; +DESC alter_01355; -DROP TABLE IF EXISTS alter_test; +DROP TABLE IF EXISTS alter_01355; diff --git a/tests/queries/0_stateless/01355_ilike.sql b/tests/queries/0_stateless/01355_ilike.sql index 6bde62bf47e..1ceb878a5ef 100644 --- a/tests/queries/0_stateless/01355_ilike.sql +++ b/tests/queries/0_stateless/01355_ilike.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest +-- Tags: no-fasttest SELECT 'Hello' ILIKE ''; SELECT 'Hello' ILIKE '%'; @@ -53,11 +53,7 @@ SELECT 'ощщЁё' ILIKE '%щ%'; SELECT 'ощЩЁё' ILIKE '%ё%'; SHOW TABLES NOT ILIKE '%'; -DROP DATABASE IF EXISTS test_01355; -CREATE DATABASE test_01355; -USE test_01355; CREATE TABLE test1 (x UInt8) ENGINE = Memory; CREATE TABLE test2 (x UInt8) ENGINE = Memory; SHOW TABLES ILIKE 'tES%'; SHOW TABLES NOT ILIKE 'TeS%'; -DROP DATABASE test_01355; diff --git a/tests/queries/0_stateless/01388_clear_all_columns.sql b/tests/queries/0_stateless/01388_clear_all_columns.sql index cc395aa7fb4..07b4fb3de90 100644 --- a/tests/queries/0_stateless/01388_clear_all_columns.sql +++ b/tests/queries/0_stateless/01388_clear_all_columns.sql @@ -1,5 +1,3 @@ --- Tags: no-parallel - DROP TABLE IF EXISTS test; CREATE TABLE test (x UInt8) ENGINE = MergeTree ORDER BY tuple(); INSERT INTO test (x) VALUES (1), (2), (3); diff --git a/tests/queries/0_stateless/01391_join_on_dict_crash.sql b/tests/queries/0_stateless/01391_join_on_dict_crash.sql index 854da04b334..e056e147501 100644 --- a/tests/queries/0_stateless/01391_join_on_dict_crash.sql +++ b/tests/queries/0_stateless/01391_join_on_dict_crash.sql @@ -1,13 +1,3 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS db_01391; -CREATE DATABASE db_01391; -USE db_01391; - -DROP TABLE IF EXISTS t; -DROP TABLE IF EXISTS d_src; -DROP DICTIONARY IF EXISTS d; - CREATE TABLE t (click_city_id UInt32, click_country_id UInt32) Engine = Memory; CREATE TABLE d_src (id UInt64, country_id UInt8, name String) Engine = Memory; @@ -16,14 +6,9 @@ INSERT INTO d_src VALUES (0, 0, 'n'); CREATE DICTIONARY d (id UInt32, country_id UInt8, name String) PRIMARY KEY id -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' DB 'db_01391' table 'd_src')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' DB currentDatabase() table 'd_src')) LIFETIME(MIN 1 MAX 1) LAYOUT(HASHED()); SELECT click_country_id FROM t AS cc LEFT JOIN d ON toUInt32(d.id) = cc.click_city_id; SELECT click_country_id FROM t AS cc LEFT JOIN d ON d.country_id < 99 AND d.id = cc.click_city_id; - -DROP DICTIONARY d; -DROP TABLE t; -DROP TABLE d_src; -DROP DATABASE IF EXISTS db_01391; diff --git a/tests/queries/0_stateless/01392_column_resolve.sql b/tests/queries/0_stateless/01392_column_resolve.sql index 72b6af4576a..90a7d9b169a 100644 --- a/tests/queries/0_stateless/01392_column_resolve.sql +++ b/tests/queries/0_stateless/01392_column_resolve.sql @@ -1,16 +1,11 @@ --- Tags: no-parallel +CREATE TABLE tableConversion (conversionId String, value Nullable(Double)) ENGINE = Log(); +CREATE TABLE tableClick (clickId String, conversionId String, value Nullable(Double)) ENGINE = Log(); +CREATE TABLE leftjoin (id String) ENGINE = Log(); -DROP DATABASE IF EXISTS test_01392; -CREATE DATABASE test_01392; - -CREATE TABLE test_01392.tableConversion (conversionId String, value Nullable(Double)) ENGINE = Log(); -CREATE TABLE test_01392.tableClick (clickId String, conversionId String, value Nullable(Double)) ENGINE = Log(); -CREATE TABLE test_01392.leftjoin (id String) ENGINE = Log(); - -INSERT INTO test_01392.tableConversion(conversionId, value) VALUES ('Conversion 1', 1); -INSERT INTO test_01392.tableClick(clickId, conversionId, value) VALUES ('Click 1', 'Conversion 1', 14); -INSERT INTO test_01392.tableClick(clickId, conversionId, value) VALUES ('Click 2', 'Conversion 1', 15); -INSERT INTO test_01392.tableClick(clickId, conversionId, value) VALUES ('Click 3', 'Conversion 1', 16); +INSERT INTO tableConversion(conversionId, value) VALUES ('Conversion 1', 1); +INSERT INTO tableClick(clickId, conversionId, value) VALUES ('Click 1', 'Conversion 1', 14); +INSERT INTO tableClick(clickId, conversionId, value) VALUES ('Click 2', 'Conversion 1', 15); +INSERT INTO tableClick(clickId, conversionId, value) VALUES ('Click 3', 'Conversion 1', 16); SELECT conversion.conversionId AS myConversionId, @@ -18,19 +13,13 @@ SELECT click.myValue AS myValue FROM ( SELECT conversionId, value as myValue - FROM test_01392.tableConversion + FROM tableConversion ) AS conversion INNER JOIN ( SELECT clickId, conversionId, value as myValue - FROM test_01392.tableClick + FROM tableClick ) AS click ON click.conversionId = conversion.conversionId LEFT JOIN ( - SELECT * FROM test_01392.leftjoin + SELECT * FROM leftjoin ) AS dummy ON (dummy.id = conversion.conversionId) ORDER BY myValue; - -DROP TABLE test_01392.tableConversion; -DROP TABLE test_01392.tableClick; -DROP TABLE test_01392.leftjoin; - -DROP DATABASE test_01392; diff --git a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh index 1c1eb4489ee..b81bb75891d 100755 --- a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh +++ b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: replica, no-debug, no-parallel +# Tags: replica, no-debug CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01412_cache_dictionary_race.sh b/tests/queries/0_stateless/01412_cache_dictionary_race.sh index 165a461193d..9aa39652021 100755 --- a/tests/queries/0_stateless/01412_cache_dictionary_race.sh +++ b/tests/queries/0_stateless/01412_cache_dictionary_race.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, no-parallel +# Tags: race CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -54,7 +54,7 @@ function drop_create_table_thread() export -f dict_get_thread; export -f drop_create_table_thread; -TIMEOUT=30 +TIMEOUT=20 timeout $TIMEOUT bash -c dict_get_thread 2> /dev/null & timeout $TIMEOUT bash -c dict_get_thread 2> /dev/null & diff --git a/tests/queries/0_stateless/01415_sticking_mutations.sh b/tests/queries/0_stateless/01415_sticking_mutations.sh index 821c83fe728..b7c8768a65d 100755 --- a/tests/queries/0_stateless/01415_sticking_mutations.sh +++ b/tests/queries/0_stateless/01415_sticking_mutations.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-replicated-database, no-parallel +# Tags: no-replicated-database set -e diff --git a/tests/queries/0_stateless/01442_merge_detach_attach_long.sh b/tests/queries/0_stateless/01442_merge_detach_attach_long.sh index 85fdf7ed764..9ba1fe93543 100755 --- a/tests/queries/0_stateless/01442_merge_detach_attach_long.sh +++ b/tests/queries/0_stateless/01442_merge_detach_attach_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-parallel, no-debug +# Tags: long, no-debug set -e @@ -27,7 +27,7 @@ function thread_ops() } export -f thread_ops -TIMEOUT=60 +TIMEOUT=30 thread_ops $TIMEOUT & wait diff --git a/tests/queries/0_stateless/01444_create_table_drop_database_race.sh b/tests/queries/0_stateless/01444_create_table_drop_database_race.sh index eb231e71525..ae74efa4e20 100755 --- a/tests/queries/0_stateless/01444_create_table_drop_database_race.sh +++ b/tests/queries/0_stateless/01444_create_table_drop_database_race.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, no-parallel +# Tags: race set -e @@ -8,20 +8,20 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh # This test reproduces "Directory not empty" error in DROP DATABASE query. +export DB=test_$RANDOM function thread1() { while true; do -# ${CLICKHOUSE_CLIENT} --query="SHOW TABLES FROM test_01444" - ${CLICKHOUSE_CLIENT} --query="DROP DATABASE IF EXISTS test_01444" 2>&1| grep -F "Code: " | grep -Fv "Code: 219" - ${CLICKHOUSE_CLIENT} --query="CREATE DATABASE IF NOT EXISTS test_01444" + ${CLICKHOUSE_CLIENT} --query="DROP DATABASE IF EXISTS $DB" 2>&1| grep -F "Code: " | grep -Fv "Code: 219" + ${CLICKHOUSE_CLIENT} --query="CREATE DATABASE IF NOT EXISTS $DB" done } function thread2() { while true; do - ${CLICKHOUSE_CLIENT} --query="CREATE TABLE IF NOT EXISTS test_01444.t$RANDOM (x UInt8) ENGINE = MergeTree ORDER BY tuple()" 2>/dev/null + ${CLICKHOUSE_CLIENT} --query="CREATE TABLE IF NOT EXISTS $DB.t$RANDOM (x UInt8) ENGINE = MergeTree ORDER BY tuple()" 2>/dev/null done } @@ -36,4 +36,4 @@ timeout $TIMEOUT bash -c thread2 & wait -${CLICKHOUSE_CLIENT} --query="DROP DATABASE IF EXISTS test_01444" 2>&1| grep -F "Code: " | grep -Fv "Code: 219" || exit 0 +${CLICKHOUSE_CLIENT} --query="DROP DATABASE IF EXISTS ${DB}" 2>&1| grep -F "Code: " | grep -Fv "Code: 219" || exit 0 diff --git a/tests/queries/0_stateless/01454_storagememory_data_race_challenge.sh b/tests/queries/0_stateless/01454_storagememory_data_race_challenge.sh index d83343b3cb3..2bfd350ec51 100755 --- a/tests/queries/0_stateless/01454_storagememory_data_race_challenge.sh +++ b/tests/queries/0_stateless/01454_storagememory_data_race_challenge.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, no-parallel +# Tags: race set -e @@ -36,8 +36,8 @@ function g { export -f f; export -f g; -timeout 30 bash -c f > /dev/null & -timeout 30 bash -c g > /dev/null & +timeout 20 bash -c f > /dev/null & +timeout 20 bash -c g > /dev/null & wait $CLICKHOUSE_CLIENT -q "DROP TABLE mem" diff --git a/tests/queries/0_stateless/01475_read_subcolumns.sql b/tests/queries/0_stateless/01475_read_subcolumns.sql index 8d4e3cb779b..d6eec2f84a1 100644 --- a/tests/queries/0_stateless/01475_read_subcolumns.sql +++ b/tests/queries/0_stateless/01475_read_subcolumns.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage, no-random-settings +-- Tags: no-object-storage, no-random-settings SET use_uncompressed_cache = 0; diff --git a/tests/queries/0_stateless/01475_read_subcolumns_storages.sh b/tests/queries/0_stateless/01475_read_subcolumns_storages.sh index 5a30f9e0f08..f74f6755e59 100755 --- a/tests/queries/0_stateless/01475_read_subcolumns_storages.sh +++ b/tests/queries/0_stateless/01475_read_subcolumns_storages.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-s3-storage +# Tags: no-object-storage CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01487_distributed_in_not_default_db.sql b/tests/queries/0_stateless/01487_distributed_in_not_default_db.sql deleted file mode 100644 index cd027530ac8..00000000000 --- a/tests/queries/0_stateless/01487_distributed_in_not_default_db.sql +++ /dev/null @@ -1,43 +0,0 @@ --- Tags: distributed, no-parallel - -CREATE DATABASE IF NOT EXISTS shard_0; -CREATE DATABASE IF NOT EXISTS shard_1; -CREATE DATABASE IF NOT EXISTS main_01487; -CREATE DATABASE IF NOT EXISTS test_01487; - -USE main_01487; - -DROP TABLE IF EXISTS shard_0.l; -DROP TABLE IF EXISTS shard_1.l; -DROP TABLE IF EXISTS d; -DROP TABLE IF EXISTS t; - -CREATE TABLE shard_0.l (value UInt8) ENGINE = MergeTree ORDER BY value; -CREATE TABLE shard_1.l (value UInt8) ENGINE = MergeTree ORDER BY value; -CREATE TABLE t (value UInt8) ENGINE = Memory; - -INSERT INTO shard_0.l VALUES (0); -INSERT INTO shard_1.l VALUES (1); -INSERT INTO t VALUES (0), (1), (2); - -CREATE TABLE d AS t ENGINE = Distributed(test_cluster_two_shards_different_databases, currentDatabase(), t); - -USE test_01487; -DROP DATABASE test_01487; - --- After the default database is dropped QueryAnalysisPass cannot process the following SELECT query. --- That query is invalid on the initiator node. -set allow_experimental_analyzer = 0; - -SELECT * FROM main_01487.d WHERE value IN (SELECT l.value FROM l) ORDER BY value; - -USE main_01487; - -DROP TABLE IF EXISTS shard_0.l; -DROP TABLE IF EXISTS shard_1.l; -DROP TABLE IF EXISTS d; -DROP TABLE IF EXISTS t; - -DROP DATABASE shard_0; -DROP DATABASE shard_1; -DROP DATABASE main_01487; diff --git a/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper_long.sh b/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper_long.sh index c3c87eeaf8b..6098c826e32 100755 --- a/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper_long.sh +++ b/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, zookeeper, no-s3-storage +# Tags: race, zookeeper, no-object-storage CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql b/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql index 49ef9d8b79f..e53f4476ec6 100644 --- a/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql +++ b/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql @@ -1,4 +1,4 @@ --- Tags: long, replica, no-replicated-database, no-parallel, no-s3-storage +-- Tags: long, replica, no-replicated-database, no-parallel, no-object-storage -- Tag no-replicated-database: Fails due to additional replicas or shards -- Tag no-parallel: static zk path diff --git a/tests/queries/0_stateless/01533_multiple_nested.sql b/tests/queries/0_stateless/01533_multiple_nested.sql index 1a6f0ec395e..80e9fc7e2fb 100644 --- a/tests/queries/0_stateless/01533_multiple_nested.sql +++ b/tests/queries/0_stateless/01533_multiple_nested.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage, no-random-merge-tree-settings +-- Tags: no-object-storage, no-random-merge-tree-settings -- no-s3 because read FileOpen metric DROP TABLE IF EXISTS nested; diff --git a/tests/queries/0_stateless/01543_avro_deserialization_with_lc.sh b/tests/queries/0_stateless/01543_avro_deserialization_with_lc.sh index 697b32a77ae..a5697a62dc2 100755 --- a/tests/queries/0_stateless/01543_avro_deserialization_with_lc.sh +++ b/tests/queries/0_stateless/01543_avro_deserialization_with_lc.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - $CLICKHOUSE_CLIENT --multiquery --query " SET allow_suspicious_low_cardinality_types=1; CREATE TABLE IF NOT EXISTS test_01543 (value LowCardinality(String), value2 LowCardinality(UInt64)) ENGINE=Memory(); diff --git a/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql b/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql index 95b46c69e83..b5ece08196e 100644 --- a/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql +++ b/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage, no-random-merge-tree-settings +-- Tags: no-object-storage, no-random-merge-tree-settings DROP TABLE IF EXISTS data_01551; diff --git a/tests/queries/0_stateless/01563_distributed_query_finish.reference b/tests/queries/0_stateless/01563_distributed_query_finish.reference index c3688b553c4..b48979a492e 100644 --- a/tests/queries/0_stateless/01563_distributed_query_finish.reference +++ b/tests/queries/0_stateless/01563_distributed_query_finish.reference @@ -1,2 +1 @@ -1,0 NETWORK_ERROR=0 diff --git a/tests/queries/0_stateless/01563_distributed_query_finish.sh b/tests/queries/0_stateless/01563_distributed_query_finish.sh index 0019c714e40..e3c5928f108 100755 --- a/tests/queries/0_stateless/01563_distributed_query_finish.sh +++ b/tests/queries/0_stateless/01563_distributed_query_finish.sh @@ -19,20 +19,25 @@ create table dist_01247 as data_01247 engine=Distributed(test_cluster_two_shards select * from dist_01247 format Null; EOL -network_errors_before=$($CLICKHOUSE_CLIENT -q "SELECT value FROM system.errors WHERE name = 'NETWORK_ERROR'") +# NOTE: it is possible to got NETWORK_ERROR even with no-parallel, at least due to system.*_log_sender to the cloud +for ((i = 0; i < 100; ++i)); do + network_errors_before=$($CLICKHOUSE_CLIENT -q "SELECT value FROM system.errors WHERE name = 'NETWORK_ERROR'") -opts=( - "--max_distributed_connections=1" - "--optimize_skip_unused_shards=1" - "--optimize_distributed_group_by_sharding_key=1" - "--prefer_localhost_replica=0" -) -$CLICKHOUSE_CLIENT "${opts[@]}" --format CSV -nm < /dev/null & timeout $TIMEOUT bash -c kill_mutation_thread 2> /dev/null & diff --git a/tests/queries/0_stateless/01600_parts_states_metrics_long.reference b/tests/queries/0_stateless/01600_parts_states_metrics_long.reference new file mode 100644 index 00000000000..98fb6a68656 --- /dev/null +++ b/tests/queries/0_stateless/01600_parts_states_metrics_long.reference @@ -0,0 +1,4 @@ +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/01600_parts_states_metrics_long.sh b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh new file mode 100755 index 00000000000..2e47034e528 --- /dev/null +++ b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# NOTE: database = $CLICKHOUSE_DATABASE is unwanted +verify_sql="SELECT + (SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics) + = (SELECT sum(active), sum(NOT active) FROM + (SELECT active FROM system.parts UNION ALL SELECT active FROM system.projection_parts UNION ALL SELECT 1 FROM system.dropped_tables_parts))" + +# The query is not atomic - it can compare states between system.parts and system.metrics from different points in time. +# So, there is inherent race condition. But it should get expected result eventually. +# In case of test failure, this code will do infinite loop and timeout. +verify() +{ + while true + do + result=$( $CLICKHOUSE_CLIENT -m --query="$verify_sql" ) + [ "$result" = "1" ] && break + sleep 0.1 + done + echo 1 +} + +$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE IF EXISTS test_table" +$CLICKHOUSE_CLIENT --query="CREATE TABLE test_table(data Date) ENGINE = MergeTree PARTITION BY toYear(data) ORDER BY data;" + +$CLICKHOUSE_CLIENT --query="INSERT INTO test_table VALUES ('1992-01-01')" +verify + +$CLICKHOUSE_CLIENT --query="INSERT INTO test_table VALUES ('1992-01-02')" +verify + +$CLICKHOUSE_CLIENT --query="OPTIMIZE TABLE test_table FINAL" +verify + +$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE test_table" +verify diff --git a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql index 187ff5c37e1..9b96ce3e586 100644 --- a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql +++ b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql @@ -1,6 +1,6 @@ --- Tags: no-random-merge-tree-settings, no-tsan, no-debug, no-s3-storage +-- Tags: no-random-merge-tree-settings, no-tsan, no-debug, no-object-storage -- no-tsan: too slow --- no-s3-storage: for remote tables we use thread pool even when reading with one stream, so memory consumption is higher +-- no-object-storage: for remote tables we use thread pool even when reading with one stream, so memory consumption is higher SET use_uncompressed_cache = 0; SET allow_prefetched_read_pool_for_remote_filesystem=0; diff --git a/tests/queries/0_stateless/01643_merge_tree_fsync_smoke.sql b/tests/queries/0_stateless/01643_merge_tree_fsync_smoke.sql index dfc761e1764..f7622bcf98f 100644 --- a/tests/queries/0_stateless/01643_merge_tree_fsync_smoke.sql +++ b/tests/queries/0_stateless/01643_merge_tree_fsync_smoke.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage drop table if exists data_01643; diff --git a/tests/queries/0_stateless/01643_replicated_merge_tree_fsync_smoke.sql b/tests/queries/0_stateless/01643_replicated_merge_tree_fsync_smoke.sql index 54c30fa2b1a..992cc687c88 100644 --- a/tests/queries/0_stateless/01643_replicated_merge_tree_fsync_smoke.sql +++ b/tests/queries/0_stateless/01643_replicated_merge_tree_fsync_smoke.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-s3-storage +-- Tags: no-parallel, no-object-storage -- no-parallel -- for flaky check and to avoid "Removing leftovers from table" (for other tables) -- Temporarily skip warning 'table was created by another server at the same moment, will retry' diff --git a/tests/queries/0_stateless/01655_plan_optimizations.reference b/tests/queries/0_stateless/01655_plan_optimizations.reference index a6af1f2170d..edf93b4b39f 100644 --- a/tests/queries/0_stateless/01655_plan_optimizations.reference +++ b/tests/queries/0_stateless/01655_plan_optimizations.reference @@ -163,6 +163,7 @@ Filter column: notEquals(__table1.y, 2_UInt8) > filter is pushed down before CreatingSets CreatingSets Filter +Filter 1 3 > one condition of filter is pushed down before LEFT JOIN diff --git a/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.sql b/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.sql index 1301135b4cb..2193fc7a8f4 100644 --- a/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.sql +++ b/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.sql @@ -1,3 +1,5 @@ +set query_plan_merge_filters=1; + set allow_experimental_analyzer=1; select explain from (explain actions = 1 select * from (select sum(number) as v, bitAnd(number, 15) as key from numbers(1e8) group by key having v != 0) where key = 7) where explain like '%Filter%' or explain like '%Aggregating%'; diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh index 96327536f89..685fe69642a 100755 --- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh +++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh @@ -7,18 +7,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Data preparation. - -# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -CLICKHOUSE_USER_FILES_PATH=$(clickhouse-client --query "select _path, _file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -mkdir -p ${CLICKHOUSE_USER_FILES_PATH}/ -echo -n aaaaaaaaa > ${CLICKHOUSE_USER_FILES_PATH}/a.txt -echo -n bbbbbbbbb > ${CLICKHOUSE_USER_FILES_PATH}/b.txt -echo -n ccccccccc > ${CLICKHOUSE_USER_FILES_PATH}/c.txt +echo -n aaaaaaaaa > ${USER_FILES_PATH}/a.txt +echo -n bbbbbbbbb > ${USER_FILES_PATH}/b.txt +echo -n ccccccccc > ${USER_FILES_PATH}/c.txt echo -n ccccccccc > /tmp/c.txt -mkdir -p ${CLICKHOUSE_USER_FILES_PATH}/dir +mkdir -p ${USER_FILES_PATH}/dir ### 1st TEST in CLIENT mode. @@ -85,15 +78,15 @@ echo "${CLICKHOUSE_LOCAL} --query "'"select file('"'dir'), file('b.txt')"'";echo # Test that the function is not injective -echo -n Hello > ${CLICKHOUSE_USER_FILES_PATH}/a -echo -n Hello > ${CLICKHOUSE_USER_FILES_PATH}/b -echo -n World > ${CLICKHOUSE_USER_FILES_PATH}/c +echo -n Hello > ${USER_FILES_PATH}/a +echo -n Hello > ${USER_FILES_PATH}/b +echo -n World > ${USER_FILES_PATH}/c ${CLICKHOUSE_CLIENT} --query "SELECT file(arrayJoin(['a', 'b', 'c'])) AS s, count() GROUP BY s ORDER BY s" ${CLICKHOUSE_CLIENT} --query "SELECT s, count() FROM file('?', TSV, 's String') GROUP BY s ORDER BY s" # Restore -rm ${CLICKHOUSE_USER_FILES_PATH}/{a,b,c}.txt -rm ${CLICKHOUSE_USER_FILES_PATH}/{a,b,c} +rm ${USER_FILES_PATH}/{a,b,c}.txt +rm ${USER_FILES_PATH}/{a,b,c} rm /tmp/c.txt -rm -rf ${CLICKHOUSE_USER_FILES_PATH}/dir +rm -rf ${USER_FILES_PATH}/dir diff --git a/tests/queries/0_stateless/01684_ssd_cache_dictionary_simple_key.sh b/tests/queries/0_stateless/01684_ssd_cache_dictionary_simple_key.sh index 9167a2d306f..0e5c2862066 100755 --- a/tests/queries/0_stateless/01684_ssd_cache_dictionary_simple_key.sh +++ b/tests/queries/0_stateless/01684_ssd_cache_dictionary_simple_key.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - $CLICKHOUSE_CLIENT -n --query=" DROP DATABASE IF EXISTS 01684_database_for_cache_dictionary; CREATE DATABASE 01684_database_for_cache_dictionary; diff --git a/tests/queries/0_stateless/01685_ssd_cache_dictionary_complex_key.sh b/tests/queries/0_stateless/01685_ssd_cache_dictionary_complex_key.sh index 9dd8a41ce5a..5583a9dd5e7 100755 --- a/tests/queries/0_stateless/01685_ssd_cache_dictionary_complex_key.sh +++ b/tests/queries/0_stateless/01685_ssd_cache_dictionary_complex_key.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - $CLICKHOUSE_CLIENT -n --query=" DROP DATABASE IF EXISTS 01685_database_for_cache_dictionary; CREATE DATABASE 01685_database_for_cache_dictionary; diff --git a/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql b/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql index be0f7e8b710..921d28e6399 100644 --- a/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql +++ b/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql @@ -1,4 +1,4 @@ --- Tags: zookeeper, no-replicated-database, no-parallel, no-s3-storage +-- Tags: zookeeper, no-replicated-database, no-parallel, no-object-storage drop table if exists x; diff --git a/tests/queries/0_stateless/01710_minmax_count_projection.sql b/tests/queries/0_stateless/01710_minmax_count_projection.sql index d0177da84d2..6c598bce440 100644 --- a/tests/queries/0_stateless/01710_minmax_count_projection.sql +++ b/tests/queries/0_stateless/01710_minmax_count_projection.sql @@ -16,7 +16,7 @@ select min(i), max(i), count() from d where _partition_value.1 = 10 group by _pa select min(i) from d where 1 = _partition_value.1; -- fuzz crash https://github.com/ClickHouse/ClickHouse/issues/37151 -SELECT min(i), max(i), count() FROM d WHERE (_partition_value.1) = 0 GROUP BY ignore(bitTest(ignore(NULL), 65535), NULL, (_partition_value.1) = 7, '10.25', bitTest(NULL, -9223372036854775808), NULL, ignore(ignore(-2147483647, NULL)), 1024), _partition_id ORDER BY _partition_id ASC NULLS FIRST; +SELECT min(i), max(i), count() FROM d WHERE (_partition_value.1) = 0 GROUP BY ignore(bitTest(ignore(NULL), 0), NULL, (_partition_value.1) = 7, '10.25', bitTest(NULL, 0), NULL, ignore(ignore(-2147483647, NULL)), 1024), _partition_id ORDER BY _partition_id ASC NULLS FIRST; drop table d; diff --git a/tests/queries/0_stateless/01710_projection_vertical_merges.sql b/tests/queries/0_stateless/01710_projection_vertical_merges.sql index 2c4378bb7a4..0f80d659e92 100644 --- a/tests/queries/0_stateless/01710_projection_vertical_merges.sql +++ b/tests/queries/0_stateless/01710_projection_vertical_merges.sql @@ -1,4 +1,4 @@ --- Tags: long, no-parallel +-- Tags: long drop table if exists t; diff --git a/tests/queries/0_stateless/01747_join_view_filter_dictionary.sql b/tests/queries/0_stateless/01747_join_view_filter_dictionary.sql index 050aa33464e..c7c525ba20e 100644 --- a/tests/queries/0_stateless/01747_join_view_filter_dictionary.sql +++ b/tests/queries/0_stateless/01747_join_view_filter_dictionary.sql @@ -25,7 +25,7 @@ create table dictst01747(some_name String, field1 String, field2 UInt8) Engine = as select 'name', 'test', 33; CREATE DICTIONARY default.dict01747 (some_name String, field1 String, field2 UInt8) -PRIMARY KEY some_name SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 +PRIMARY KEY some_name SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE dictst01747 DB currentDatabase() USER 'default')) LIFETIME(MIN 0 MAX 0) LAYOUT(COMPLEX_KEY_HASHED()); diff --git a/tests/queries/0_stateless/01747_system_session_log_long.sh b/tests/queries/0_stateless/01747_system_session_log_long.sh index ecddcb627b8..022bf488886 100755 --- a/tests/queries/0_stateless/01747_system_session_log_long.sh +++ b/tests/queries/0_stateless/01747_system_session_log_long.sh @@ -228,13 +228,13 @@ function testMySQL() echo "MySQL 'successful login' case is skipped for ${auth_type}." else executeQuery \ - <<< "SELECT 1 FROM mysql('127.0.0.1:9004', 'system', 'one', '${username}', '${password}') LIMIT 1 \ + <<< "SELECT 1 FROM mysql('127.0.0.1:${CLICKHOUSE_PORT_MYSQL}', 'system', 'one', '${username}', '${password}') LIMIT 1 \ FORMAT Null" fi echo 'Wrong username' executeQueryExpectError \ - <<< "SELECT 1 FROM mysql('127.0.0.1:9004', 'system', 'one', 'invalid_${username}', '${password}') LIMIT 1 \ + <<< "SELECT 1 FROM mysql('127.0.0.1:${CLICKHOUSE_PORT_MYSQL}', 'system', 'one', 'invalid_${username}', '${password}') LIMIT 1 \ FORMAT Null" \ | grep -Eq "Code: 279\. DB::Exception: .* invalid_${username}" @@ -246,7 +246,7 @@ function testMySQL() echo "MySQL 'wrong password' case is skipped for ${auth_type}." else executeQueryExpectError \ - <<< "SELECT 1 FROM mysql('127.0.0.1:9004', 'system', 'one', '${username}', 'invalid_${password}') LIMIT 1 \ + <<< "SELECT 1 FROM mysql('127.0.0.1:${CLICKHOUSE_PORT_MYSQL}', 'system', 'one', '${username}', 'invalid_${password}') LIMIT 1 \ FORMAT Null" | grep -Eq "Code: 279\. DB::Exception: .* ${username}" fi } @@ -267,11 +267,11 @@ function testMySQL() ## Loging\Logout ## CH is being able to log into itself via PostgreSQL protocol but query fails. #executeQueryExpectError \ - # <<< "SELECT 1 FROM postgresql('localhost:9005', 'system', 'one', '${username}', '${password}') LIMIT 1 FORMAT Null" \ + # <<< "SELECT 1 FROM postgresql('localhost:${CLICKHOUSE_PORT_POSTGRESQL', 'system', 'one', '${username}', '${password}') LIMIT 1 FORMAT Null" \ # Wrong username executeQueryExpectError \ - <<< "SELECT 1 FROM postgresql('localhost:9005', 'system', 'one', 'invalid_${username}', '${password}') LIMIT 1 FORMAT Null" \ + <<< "SELECT 1 FROM postgresql('localhost:${CLICKHOUSE_PORT_POSTGRESQL}', 'system', 'one', 'invalid_${username}', '${password}') LIMIT 1 FORMAT Null" \ | grep -Eq "Invalid user or password" if [[ "${auth_type}" == "no_password" ]] @@ -281,7 +281,7 @@ function testMySQL() else # Wrong password executeQueryExpectError \ - <<< "SELECT 1 FROM postgresql('localhost:9005', 'system', 'one', '${username}', 'invalid_${password}') LIMIT 1 FORMAT Null" \ + <<< "SELECT 1 FROM postgresql('localhost:${CLICKHOUSE_PORT_POSTGRESQL}', 'system', 'one', '${username}', 'invalid_${password}') LIMIT 1 FORMAT Null" \ | grep -Eq "Invalid user or password" fi } diff --git a/tests/queries/0_stateless/01748_dictionary_table_dot.sql b/tests/queries/0_stateless/01748_dictionary_table_dot.sql index a2364fdf823..993d2e1a635 100644 --- a/tests/queries/0_stateless/01748_dictionary_table_dot.sql +++ b/tests/queries/0_stateless/01748_dictionary_table_dot.sql @@ -22,7 +22,7 @@ CREATE DICTIONARY test_dict `value` String ) PRIMARY KEY key1, key2 -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE `test.txt` PASSWORD '' DB currentDatabase())) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE `test.txt` PASSWORD '' DB currentDatabase())) LIFETIME(MIN 1 MAX 3600) LAYOUT(COMPLEX_KEY_HASHED()); diff --git a/tests/queries/0_stateless/01748_partition_id_pruning.sql b/tests/queries/0_stateless/01748_partition_id_pruning.sql index b637528bc6c..f492a4a8871 100644 --- a/tests/queries/0_stateless/01748_partition_id_pruning.sql +++ b/tests/queries/0_stateless/01748_partition_id_pruning.sql @@ -8,17 +8,17 @@ insert into x values (1, 1), (1, 2), (1, 3), (2, 4), (2, 5), (2, 6); set max_rows_to_read = 3; -select * from x where _partition_id = partitionId(1); +select * from x where _partition_id = partitionID(1); set max_rows_to_read = 5; -- one row for subquery + subquery -select * from x where _partition_id in (select partitionId(number + 1) from numbers(1)); +select * from x where _partition_id in (select partitionID(number + 1) from numbers(1)); -- trivial count optimization test set max_rows_to_read = 2; -- one row for subquery + subquery itself -- TODO: Relax the limits because we might build prepared set twice with _minmax_count_projection set max_rows_to_read = 3; -select count() from x where _partition_id in (select partitionId(number + 1) from numbers(1)); +select count() from x where _partition_id in (select partitionID(number + 1) from numbers(1)); drop table x; diff --git a/tests/queries/0_stateless/01780_clickhouse_dictionary_source_loop.sql b/tests/queries/0_stateless/01780_clickhouse_dictionary_source_loop.sql index 1eee4090112..3ebc85c47f7 100644 --- a/tests/queries/0_stateless/01780_clickhouse_dictionary_source_loop.sql +++ b/tests/queries/0_stateless/01780_clickhouse_dictionary_source_loop.sql @@ -10,7 +10,7 @@ CREATE DICTIONARY dict1 value String ) PRIMARY KEY id -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 TABLE 'dict1')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE 'dict1')) LAYOUT(DIRECT()); SELECT * FROM dict1; --{serverError BAD_ARGUMENTS} @@ -24,7 +24,7 @@ CREATE DICTIONARY 01780_db.dict2 value String ) PRIMARY KEY id -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 DATABASE '01780_db' TABLE 'dict2')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() DATABASE '01780_db' TABLE 'dict2')) LAYOUT(DIRECT()); SELECT * FROM 01780_db.dict2; --{serverError BAD_ARGUMENTS} @@ -45,7 +45,7 @@ CREATE DICTIONARY 01780_db.dict3 value String ) PRIMARY KEY id -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 TABLE 'dict3_source' DATABASE '01780_db')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE 'dict3_source' DATABASE '01780_db')) LAYOUT(DIRECT()); SELECT * FROM 01780_db.dict3; diff --git a/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh b/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh index 3782a7d3ad6..c38fc505fa8 100755 --- a/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh +++ b/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-s3-storage +# Tags: long, no-object-storage # Because parallel parts removal disabled for s3 storage # NOTE: this done as not .sql since we need to Ordinary database diff --git a/tests/queries/0_stateless/01825_type_json_btc.sh b/tests/queries/0_stateless/01825_type_json_btc.sh index 1e74166e7a7..ebc5482de7a 100755 --- a/tests/queries/0_stateless/01825_type_json_btc.sh +++ b/tests/queries/0_stateless/01825_type_json_btc.sh @@ -5,10 +5,9 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* -cp $CUR_DIR/data_json/btc_transactions.json ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ +mkdir -p ${CLICKHOUSE_USER_FILES_UNIQUE}/ +rm -rf "${CLICKHOUSE_USER_FILES_UNIQUE:?}"/* +cp $CUR_DIR/data_json/btc_transactions.json ${CLICKHOUSE_USER_FILES_UNIQUE}/ ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS btc" @@ -27,4 +26,4 @@ ${CLICKHOUSE_CLIENT} -q "SELECT data.out.spending_outpoints AS outpoints FROM bt ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS btc" -rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/btc_transactions.json +rm ${CLICKHOUSE_USER_FILES_UNIQUE}/btc_transactions.json diff --git a/tests/queries/0_stateless/01825_type_json_multiple_files.sh b/tests/queries/0_stateless/01825_type_json_multiple_files.sh index 089b7991784..453e7a3c78e 100755 --- a/tests/queries/0_stateless/01825_type_json_multiple_files.sh +++ b/tests/queries/0_stateless/01825_type_json_multiple_files.sh @@ -1,23 +1,22 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel +# Tags: no-fasttest CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -user_files_path=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}') -for f in "$user_files_path"/01825_file_*.json; do +for f in "${USER_FILES_PATH:?}/${CLICKHOUSE_DATABASE}"_*.json; do [ -e $f ] && rm $f done for i in {0..5}; do - echo "{\"k$i\": 100}" > "$user_files_path"/01825_file_$i.json + echo "{\"k$i\": 100}" > "$USER_FILES_PATH/${CLICKHOUSE_DATABASE}_$i.json" done ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_files" ${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_json_files (file String, data JSON) ENGINE = MergeTree ORDER BY tuple()" --allow_experimental_object_type 1 -${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_files SELECT _file, data FROM file('01825_file_*.json', 'JSONAsObject', 'data JSON')" --allow_experimental_object_type 1 +${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_files SELECT _file, data FROM file('${CLICKHOUSE_DATABASE}_*.json', 'JSONAsObject', 'data JSON')" --allow_experimental_object_type 1 ${CLICKHOUSE_CLIENT} -q "SELECT data FROM t_json_files ORDER BY file FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 ${CLICKHOUSE_CLIENT} -q "SELECT toTypeName(data) FROM t_json_files LIMIT 1" @@ -25,7 +24,7 @@ ${CLICKHOUSE_CLIENT} -q "SELECT toTypeName(data) FROM t_json_files LIMIT 1" ${CLICKHOUSE_CLIENT} -q "TRUNCATE TABLE IF EXISTS t_json_files" ${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_files \ - SELECT _file, data FROM file('01825_file_*.json', 'JSONAsObject', 'data JSON') \ + SELECT _file, data FROM file('${CLICKHOUSE_DATABASE}_*.json', 'JSONAsObject', 'data JSON') \ ORDER BY _file LIMIT 3" --max_threads 1 --min_insert_block_size_rows 1 --max_insert_block_size 1 --max_block_size 1 --allow_experimental_object_type 1 ${CLICKHOUSE_CLIENT} -q "SELECT data FROM t_json_files ORDER BY file, data FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 @@ -34,11 +33,11 @@ ${CLICKHOUSE_CLIENT} -q "SELECT toTypeName(data) FROM t_json_files LIMIT 1" ${CLICKHOUSE_CLIENT} -q "TRUNCATE TABLE IF EXISTS t_json_files" ${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_files \ - SELECT _file, data FROM file('01825_file_*.json', 'JSONAsObject', 'data JSON') \ - WHERE _file IN ('01825_file_1.json', '01825_file_3.json')" --allow_experimental_object_type 1 + SELECT _file, data FROM file('${CLICKHOUSE_DATABASE}_*.json', 'JSONAsObject', 'data JSON') \ + WHERE _file IN ('${CLICKHOUSE_DATABASE}_1.json', '${CLICKHOUSE_DATABASE}_3.json')" --allow_experimental_object_type 1 ${CLICKHOUSE_CLIENT} -q "SELECT data FROM t_json_files ORDER BY file FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 ${CLICKHOUSE_CLIENT} -q "SELECT toTypeName(data) FROM t_json_files LIMIT 1" ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_files" -rm "$user_files_path"/01825_file_*.json +rm "$USER_FILES_PATH"/${CLICKHOUSE_DATABASE}_*.json diff --git a/tests/queries/0_stateless/01825_type_json_schema_inference.sh b/tests/queries/0_stateless/01825_type_json_schema_inference.sh index 5fca608d8bb..e0c283b2230 100755 --- a/tests/queries/0_stateless/01825_type_json_schema_inference.sh +++ b/tests/queries/0_stateless/01825_type_json_schema_inference.sh @@ -10,11 +10,10 @@ ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_inference" ${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_json_inference (id UInt64, obj Object(Nullable('json')), s String) \ ENGINE = MergeTree ORDER BY id" --allow_experimental_object_type 1 -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* -filename="${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/data.json" +filename="${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/data.json" echo '{"id": 1, "obj": {"k1": 1, "k2": {"k3": 2, "k4": [{"k5": 3}, {"k5": 4}]}}, "s": "foo"}' > $filename echo '{"id": 2, "obj": {"k2": {"k3": "str", "k4": [{"k6": 55}]}, "some": 42}, "s": "bar"}' >> $filename diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns.reference b/tests/queries/0_stateless/01872_functions_to_subcolumns.reference deleted file mode 100644 index a1cd31e2dc9..00000000000 --- a/tests/queries/0_stateless/01872_functions_to_subcolumns.reference +++ /dev/null @@ -1,47 +0,0 @@ -0 0 1 -0 1 0 -SELECT - id IS NULL, - `n.null`, - NOT `n.null` -FROM t_func_to_subcolumns -3 0 1 0 -0 1 0 \N -SELECT - `arr.size0`, - `arr.size0` = 0, - `arr.size0` != 0, - empty(n) -FROM t_func_to_subcolumns -['foo','bar'] [1,2] -[] [] -SELECT - `m.keys`, - `m.values` -FROM t_func_to_subcolumns -1 -SELECT sum(NOT `n.null`) -FROM t_func_to_subcolumns -2 -SELECT count(id) -FROM t_func_to_subcolumns -1 0 0 -2 1 0 -3 0 0 -SELECT - id, - `n.null`, - right.n IS NULL -FROM t_func_to_subcolumns AS left -ALL FULL OUTER JOIN -( - SELECT - 1 AS id, - \'qqq\' AS n - UNION ALL - SELECT - 3 AS id, - \'www\' -) AS right USING (id) -0 10 -0 20 diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference new file mode 100644 index 00000000000..32bacfba5ea --- /dev/null +++ b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference @@ -0,0 +1,181 @@ +0 0 1 +0 1 0 +QUERY id: 0 + PROJECTION COLUMNS + isNull(id) UInt8 + isNull(n) UInt8 + isNotNull(n) UInt8 + PROJECTION + LIST id: 1, nodes: 3 + CONSTANT id: 2, constant_value: UInt64_0, constant_value_type: UInt8 + EXPRESSION + FUNCTION id: 3, function_name: isNull, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 4, nodes: 1 + COLUMN id: 5, column_name: id, result_type: UInt64, source_id: 6 + COLUMN id: 7, column_name: n.null, result_type: UInt8, source_id: 6 + FUNCTION id: 8, function_name: not, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 1 + COLUMN id: 10, column_name: n.null, result_type: UInt8, source_id: 6 + JOIN TREE + TABLE id: 6, alias: __table1, table_name: default.t_func_to_subcolumns + +SELECT + _CAST(0, \'UInt8\') AS `isNull(id)`, + __table1.`n.null` AS `isNull(n)`, + NOT __table1.`n.null` AS `isNotNull(n)` +FROM default.t_func_to_subcolumns AS __table1 +3 0 1 0 +0 1 0 \N +QUERY id: 0 + PROJECTION COLUMNS + length(arr) UInt64 + empty(arr) UInt8 + notEmpty(arr) UInt8 + empty(n) Nullable(UInt8) + PROJECTION + LIST id: 1, nodes: 4 + COLUMN id: 2, column_name: arr.size0, result_type: UInt64, source_id: 3 + FUNCTION id: 4, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + COLUMN id: 6, column_name: arr.size0, result_type: UInt64, source_id: 3 + CONSTANT id: 7, constant_value: UInt64_0, constant_value_type: UInt8 + FUNCTION id: 8, function_name: notEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: arr.size0, result_type: UInt64, source_id: 3 + CONSTANT id: 11, constant_value: UInt64_0, constant_value_type: UInt8 + FUNCTION id: 12, function_name: empty, function_type: ordinary, result_type: Nullable(UInt8) + ARGUMENTS + LIST id: 13, nodes: 1 + COLUMN id: 14, column_name: n, result_type: Nullable(String), source_id: 3 + JOIN TREE + TABLE id: 3, alias: __table1, table_name: default.t_func_to_subcolumns + +SELECT + __table1.`arr.size0` AS `length(arr)`, + __table1.`arr.size0` = 0 AS `empty(arr)`, + __table1.`arr.size0` != 0 AS `notEmpty(arr)`, + empty(__table1.n) AS `empty(n)` +FROM default.t_func_to_subcolumns AS __table1 +['foo','bar'] [1,2] +[] [] +QUERY id: 0 + PROJECTION COLUMNS + mapKeys(m) Array(String) + mapValues(m) Array(UInt64) + PROJECTION + LIST id: 1, nodes: 2 + COLUMN id: 2, column_name: m.keys, result_type: Array(String), source_id: 3 + COLUMN id: 4, column_name: m.values, result_type: Array(UInt64), source_id: 3 + JOIN TREE + TABLE id: 3, alias: __table1, table_name: default.t_func_to_subcolumns + +SELECT + __table1.`m.keys` AS `mapKeys(m)`, + __table1.`m.values` AS `mapValues(m)` +FROM default.t_func_to_subcolumns AS __table1 +1 +QUERY id: 0 + PROJECTION COLUMNS + count(n) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: not, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: n.null, result_type: UInt8, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.t_func_to_subcolumns + +SELECT sum(NOT __table1.`n.null`) AS `count(n)` +FROM default.t_func_to_subcolumns AS __table1 +2 +QUERY id: 0 + PROJECTION COLUMNS + count(id) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 1 + COLUMN id: 4, column_name: id, result_type: UInt64, source_id: 5 + JOIN TREE + TABLE id: 5, alias: __table1, table_name: default.t_func_to_subcolumns + +SELECT count(__table1.id) AS `count(id)` +FROM default.t_func_to_subcolumns AS __table1 +1 0 0 +2 1 0 +3 0 0 +QUERY id: 0 + PROJECTION COLUMNS + id UInt64 + isNull(n) UInt8 + isNull(right.n) UInt8 + PROJECTION + LIST id: 1, nodes: 3 + COLUMN id: 2, column_name: id, result_type: UInt64, source_id: 3 + COLUMN id: 4, column_name: n.null, result_type: UInt8, source_id: 3 + CONSTANT id: 5, constant_value: UInt64_0, constant_value_type: UInt8 + EXPRESSION + FUNCTION id: 6, function_name: isNull, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: n, result_type: String, source_id: 9 + JOIN TREE + JOIN id: 10, strictness: ALL, kind: FULL + LEFT TABLE EXPRESSION + TABLE id: 3, alias: __table1, table_name: default.t_func_to_subcolumns + RIGHT TABLE EXPRESSION + UNION id: 9, alias: __table2, is_subquery: 1, union_mode: UNION_ALL + QUERIES + LIST id: 11, nodes: 2 + QUERY id: 12, alias: __table3 + PROJECTION COLUMNS + id UInt8 + PROJECTION + LIST id: 13, nodes: 1 + CONSTANT id: 14, constant_value: UInt64_1, constant_value_type: UInt8 + JOIN TREE + TABLE id: 15, alias: __table4, table_name: system.one + QUERY id: 16, alias: __table5 + PROJECTION COLUMNS + id UInt8 + PROJECTION + LIST id: 17, nodes: 1 + CONSTANT id: 18, constant_value: UInt64_3, constant_value_type: UInt8 + JOIN TREE + TABLE id: 19, alias: __table6, table_name: system.one + JOIN EXPRESSION + LIST id: 20, nodes: 1 + COLUMN id: 21, column_name: id, result_type: UInt64, source_id: 10 + EXPRESSION + LIST id: 22, nodes: 2 + COLUMN id: 23, column_name: id, result_type: UInt64, source_id: 3 + COLUMN id: 24, column_name: id, result_type: UInt8, source_id: 9 + +SELECT + __table1.id AS id, + __table1.`n.null` AS `isNull(n)`, + _CAST(0, \'UInt8\') AS `isNull(right.n)` +FROM default.t_func_to_subcolumns AS __table1 +ALL FULL OUTER JOIN +( + ( + SELECT 1 AS id + FROM system.one AS __table4 + ) + UNION ALL + ( + SELECT 3 AS id + FROM system.one AS __table6 + ) +) AS __table2 USING (id) +0 10 +0 20 diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns.sql b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.sql similarity index 62% rename from tests/queries/0_stateless/01872_functions_to_subcolumns.sql rename to tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.sql index 45f83bf20e5..b544f6829cf 100644 --- a/tests/queries/0_stateless/01872_functions_to_subcolumns.sql +++ b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.sql @@ -1,5 +1,6 @@ DROP TABLE IF EXISTS t_func_to_subcolumns; +SET allow_experimental_analyzer = 1; SET optimize_functions_to_subcolumns = 1; CREATE TABLE t_func_to_subcolumns (id UInt64, arr Array(UInt64), n Nullable(String), m Map(String, UInt64)) @@ -8,24 +9,24 @@ ENGINE = MergeTree ORDER BY tuple(); INSERT INTO t_func_to_subcolumns VALUES (1, [1, 2, 3], 'abc', map('foo', 1, 'bar', 2)) (2, [], NULL, map()); SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns; -EXPLAIN SYNTAX SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 1, dump_ast = 1 SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns; SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns; -EXPLAIN SYNTAX SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 1, dump_ast = 1 SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns; SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns; -EXPLAIN SYNTAX SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 1, dump_ast = 1 SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns; SELECT count(n) FROM t_func_to_subcolumns; -EXPLAIN SYNTAX SELECT count(n) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 1, dump_ast = 1 SELECT count(n) FROM t_func_to_subcolumns; SELECT count(id) FROM t_func_to_subcolumns; -EXPLAIN SYNTAX SELECT count(id) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 1, dump_ast = 1 SELECT count(id) FROM t_func_to_subcolumns; SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id); -EXPLAIN SYNTAX SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left +EXPLAIN QUERY TREE dump_tree = 1, dump_ast = 1 SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id); DROP TABLE t_func_to_subcolumns; diff --git a/tests/queries/0_stateless/01875_ssd_cache_dictionary_decimal256_type.sh b/tests/queries/0_stateless/01875_ssd_cache_dictionary_decimal256_type.sh index 36a2165329b..8336229a643 100755 --- a/tests/queries/0_stateless/01875_ssd_cache_dictionary_decimal256_type.sh +++ b/tests/queries/0_stateless/01875_ssd_cache_dictionary_decimal256_type.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - $CLICKHOUSE_CLIENT -n --query=" SET allow_experimental_bigint_types = 1; diff --git a/tests/queries/0_stateless/01881_join_on_conditions_hash.sql.j2 b/tests/queries/0_stateless/01881_join_on_conditions_hash.sql.j2 index bd20d34b684..c2d85cefb18 100644 --- a/tests/queries/0_stateless/01881_join_on_conditions_hash.sql.j2 +++ b/tests/queries/0_stateless/01881_join_on_conditions_hash.sql.j2 @@ -72,7 +72,7 @@ SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t1.id + 2; -- { serverE SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.id + 2; -- { serverError 403 } SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t1.key; -- { serverError 43, 403 } SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.key; -- { serverError 43, 403 } -SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 AND (t1.id == t2.id OR isNull(t2.key2)); -- { serverError 403 } +SELECT * FROM t1 JOIN t2_nullable as t2 ON t2.key == t2.key2 AND (t1.id == t2.id OR isNull(t2.key2)); -- { serverError 403 } SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 OR t1.id == t2.id; -- { serverError 403 } SELECT * FROM t1 JOIN t2 ON (t2.key == t2.key2 AND (t1.key == t1.key2 AND t1.key != 'XXX' OR t1.id == t2.id)) AND t1.id == t2.id; -- { serverError 403 } SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 AND t1.key == t1.key2 AND t1.key != 'XXX' AND t1.id == t2.id OR t2.key == t2.key2 AND t1.id == t2.id AND t1.id == t2.id; diff --git a/tests/queries/0_stateless/01881_join_on_conditions_merge.sql.j2 b/tests/queries/0_stateless/01881_join_on_conditions_merge.sql.j2 index e4b704247b2..13703771ac8 100644 --- a/tests/queries/0_stateless/01881_join_on_conditions_merge.sql.j2 +++ b/tests/queries/0_stateless/01881_join_on_conditions_merge.sql.j2 @@ -70,7 +70,7 @@ SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t1.id + 2; -- { serverE SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.id + 2; -- { serverError 403 } SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t1.key; -- { serverError 43, 403 } SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.key; -- { serverError 43, 403 } -SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 AND (t1.id == t2.id OR isNull(t2.key2)); -- { serverError 403 } +SELECT * FROM t1 JOIN t2_nullable as t2 ON t2.key == t2.key2 AND (t1.id == t2.id OR isNull(t2.key2)); -- { serverError 403 } SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 OR t1.id == t2.id; -- { serverError 403 } SELECT * FROM t1 JOIN t2 ON (t2.key == t2.key2 AND (t1.key == t1.key2 AND t1.key != 'XXX' OR t1.id == t2.id)) AND t1.id == t2.id; -- { serverError 403 } SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 AND t1.key == t1.key2 AND t1.key != 'XXX' AND t1.id == t2.id OR t2.key == t2.key2 AND t1.id == t2.id AND t1.id == t2.id; -- { serverError 48 } diff --git a/tests/queries/0_stateless/01889_check_row_policy_defined_using_user_function.sh b/tests/queries/0_stateless/01889_check_row_policy_defined_using_user_function.sh index b5be39a91df..f79637a7635 100755 --- a/tests/queries/0_stateless/01889_check_row_policy_defined_using_user_function.sh +++ b/tests/queries/0_stateless/01889_check_row_policy_defined_using_user_function.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-parallel CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none @@ -7,23 +6,27 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -q "drop user if exists u_01889" -${CLICKHOUSE_CLIENT} -q "drop role if exists r_01889" -${CLICKHOUSE_CLIENT} -q "drop policy if exists t_01889_filter on t_01889" -${CLICKHOUSE_CLIENT} -q "create user u_01889 identified with plaintext_password by 'dfsdffdf5t123'" -${CLICKHOUSE_CLIENT} -q "revoke all on *.* from u_01889" -${CLICKHOUSE_CLIENT} -q "create role r_01889" +USER=u_01889$RANDOM +ROLE=r_01889$RANDOM +POLICY=t_01889_filter$RANDOM + +${CLICKHOUSE_CLIENT} -q "drop user if exists $USER" +${CLICKHOUSE_CLIENT} -q "drop role if exists ${ROLE}" +${CLICKHOUSE_CLIENT} -q "drop policy if exists ${POLICY} on t_01889" +${CLICKHOUSE_CLIENT} -q "create user $USER identified with plaintext_password by 'dfsdffdf5t123'" +${CLICKHOUSE_CLIENT} -q "revoke all on *.* from $USER" +${CLICKHOUSE_CLIENT} -q "create role ${ROLE}" ${CLICKHOUSE_CLIENT} -q "create table t_01889(a Int64, user_id String) Engine=MergeTree order by a" -${CLICKHOUSE_CLIENT} -q "insert into t_01889 select number, 'u_01889' from numbers(1000)" +${CLICKHOUSE_CLIENT} -q "insert into t_01889 select number, '$USER' from numbers(1000)" ${CLICKHOUSE_CLIENT} -q "insert into t_01889 select number, 'xxxxxxx' from numbers(1000)" -${CLICKHOUSE_CLIENT} -q "grant select on t_01889 to r_01889" -${CLICKHOUSE_CLIENT} -q "create row policy t_01889_filter ON t_01889 FOR SELECT USING user_id = user() TO r_01889" -${CLICKHOUSE_CLIENT} -q "grant r_01889 to u_01889" -${CLICKHOUSE_CLIENT} -q "alter user u_01889 default role r_01889 settings none" +${CLICKHOUSE_CLIENT} -q "grant select on t_01889 to ${ROLE}" +${CLICKHOUSE_CLIENT} -q "create row policy ${POLICY} ON t_01889 FOR SELECT USING user_id = user() TO ${ROLE}" +${CLICKHOUSE_CLIENT} -q "grant ${ROLE} to $USER" +${CLICKHOUSE_CLIENT} -q "alter user $USER default role ${ROLE} settings none" -${CLICKHOUSE_CLIENT_BINARY} --database=${CLICKHOUSE_DATABASE} --user=u_01889 --password=dfsdffdf5t123 --query="select count() from t_01889" +${CLICKHOUSE_CLIENT_BINARY} --database=${CLICKHOUSE_DATABASE} --user=$USER --password=dfsdffdf5t123 --query="select count() from t_01889" -${CLICKHOUSE_CLIENT} -q "drop user u_01889" -${CLICKHOUSE_CLIENT} -q "drop policy t_01889_filter on t_01889" -${CLICKHOUSE_CLIENT} -q "drop role r_01889" +${CLICKHOUSE_CLIENT} -q "drop user $USER" +${CLICKHOUSE_CLIENT} -q "drop policy ${POLICY} on t_01889" +${CLICKHOUSE_CLIENT} -q "drop role ${ROLE}" ${CLICKHOUSE_CLIENT} -q "drop table t_01889" diff --git a/tests/queries/0_stateless/01889_sqlite_read_write.sh b/tests/queries/0_stateless/01889_sqlite_read_write.sh index 30496af46f6..63ce5dc909c 100755 --- a/tests/queries/0_stateless/01889_sqlite_read_write.sh +++ b/tests/queries/0_stateless/01889_sqlite_read_write.sh @@ -6,15 +6,9 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -# See 01658_read_file_to_string_column.sh -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -mkdir -p "${user_files_path}/" -chmod 777 "${user_files_path}" - export CURR_DATABASE="test_01889_sqllite_${CLICKHOUSE_DATABASE}" -DB_PATH=${user_files_path}/${CURR_DATABASE}_db1 +DB_PATH=${USER_FILES_PATH}/${CURR_DATABASE}_db1 DB_PATH2=$CUR_DIR/${CURR_DATABASE}_db2 function cleanup() diff --git a/tests/queries/0_stateless/01903_ssd_cache_dictionary_array_type.sh b/tests/queries/0_stateless/01903_ssd_cache_dictionary_array_type.sh index 3676f1429b2..853445daf3f 100755 --- a/tests/queries/0_stateless/01903_ssd_cache_dictionary_array_type.sh +++ b/tests/queries/0_stateless/01903_ssd_cache_dictionary_array_type.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - $CLICKHOUSE_CLIENT -n --query=" DROP TABLE IF EXISTS dictionary_array_source_table; CREATE TABLE dictionary_array_source_table diff --git a/tests/queries/0_stateless/01904_ssd_cache_dictionary_default_nullable_type.sh b/tests/queries/0_stateless/01904_ssd_cache_dictionary_default_nullable_type.sh index 6aecb20329a..0b555cf82c2 100755 --- a/tests/queries/0_stateless/01904_ssd_cache_dictionary_default_nullable_type.sh +++ b/tests/queries/0_stateless/01904_ssd_cache_dictionary_default_nullable_type.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - $CLICKHOUSE_CLIENT -n --query=" DROP TABLE IF EXISTS dictionary_nullable_source_table; CREATE TABLE dictionary_nullable_source_table diff --git a/tests/queries/0_stateless/01910_view_dictionary.sql b/tests/queries/0_stateless/01910_view_dictionary.sql index 05a67889825..51f46decadd 100644 --- a/tests/queries/0_stateless/01910_view_dictionary.sql +++ b/tests/queries/0_stateless/01910_view_dictionary.sql @@ -34,7 +34,7 @@ CREATE DICTIONARY flat_dictionary value_ru String ) PRIMARY KEY id -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' PASSWORD '' TABLE 'dictionary_source_view')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' PASSWORD '' TABLE 'dictionary_source_view')) LIFETIME(MIN 1 MAX 1000) LAYOUT(FLAT()); diff --git a/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh b/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh index 5e1600a0673..edffc0a3807 100755 --- a/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh +++ b/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, zookeeper, no-parallel +# Tags: long, zookeeper CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=error @@ -50,7 +50,7 @@ function insert_thread export -f insert_thread; export -f optimize_thread; -TIMEOUT=30 +TIMEOUT=20 timeout $TIMEOUT bash -c insert_thread 2> /dev/null & timeout $TIMEOUT bash -c insert_thread 2> /dev/null & diff --git a/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh b/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh index 4b230e4f738..0aedef028a2 100755 --- a/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh +++ b/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh @@ -6,10 +6,8 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh -# See 01658_read_file_to_string_column.sh -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -mkdir -p ${user_files_path}/ -cp $CUR_DIR/data_zstd/test_01946.zstd ${user_files_path}/ +mkdir -p ${USER_FILES_PATH}/ +cp $CUR_DIR/data_zstd/test_01946.zstd ${USER_FILES_PATH}/ ${CLICKHOUSE_CLIENT} --multiline --multiquery --query " set min_chunk_bytes_for_parallel_parsing=10485760; diff --git a/tests/queries/0_stateless/02003_compress_bz2.sh b/tests/queries/0_stateless/02003_compress_bz2.sh index b17effb20b6..edc433ad69b 100755 --- a/tests/queries/0_stateless/02003_compress_bz2.sh +++ b/tests/queries/0_stateless/02003_compress_bz2.sh @@ -6,7 +6,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') WORKING_FOLDER_02003="${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}" rm -rf "${WORKING_FOLDER_02003}" diff --git a/tests/queries/0_stateless/02010_array_index_bad_cast.reference b/tests/queries/0_stateless/02010_array_index_bad_cast.reference index e69de29bb2d..e22493782f0 100644 --- a/tests/queries/0_stateless/02010_array_index_bad_cast.reference +++ b/tests/queries/0_stateless/02010_array_index_bad_cast.reference @@ -0,0 +1,3 @@ +1 +0 +0 diff --git a/tests/queries/0_stateless/02010_array_index_bad_cast.sql b/tests/queries/0_stateless/02010_array_index_bad_cast.sql index 14162e0d2e2..590e60eb42e 100644 --- a/tests/queries/0_stateless/02010_array_index_bad_cast.sql +++ b/tests/queries/0_stateless/02010_array_index_bad_cast.sql @@ -1,3 +1,4 @@ --- This query throws exception about uncomparable data types (but at least it does not introduce bad cast in code). SET allow_suspicious_low_cardinality_types=1; -SELECT has(materialize(CAST(['2021-07-14'] AS Array(LowCardinality(Nullable(DateTime))))), materialize('2021-07-14'::DateTime64(7))); -- { serverError ILLEGAL_COLUMN } +SELECT has(materialize(CAST(['2021-07-14'] AS Array(LowCardinality(Nullable(DateTime))))), materialize('2021-07-14'::DateTime64(7))); +SELECT has(materialize(CAST(['2021-07-14'] AS Array(LowCardinality(Nullable(DateTime))))), materialize('2021-07-14 00:00:01'::DateTime64(7))); +SELECT has(materialize(CAST(['2021-07-14'] AS Array(LowCardinality(Nullable(DateTime))))), materialize(NULL)); diff --git a/tests/queries/0_stateless/02012_compress_lz4.sh b/tests/queries/0_stateless/02012_compress_lz4.sh index aad437c8011..700bff613da 100755 --- a/tests/queries/0_stateless/02012_compress_lz4.sh +++ b/tests/queries/0_stateless/02012_compress_lz4.sh @@ -4,7 +4,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') WORKING_FOLDER_02012="${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}" rm -rf "${WORKING_FOLDER_02012}" diff --git a/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.reference b/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.reference index e6a2b2b6aaf..ab832478da0 100644 --- a/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.reference +++ b/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.reference @@ -41,8 +41,6 @@ String ConstConst 38 Hello 00000001 39 Hello 00000000 40 Hello -41 Hello -42 Hello FixedString ConstConst 1 0 Hello\0\0\0\0\0 01001000011001010110110001101100011011110000000000000000000000000000000000000000 @@ -92,10 +90,8 @@ FixedString ConstConst 78 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000001 79 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 80 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -81 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 String VectorVector --1 Hello 0 Hello 0100100001100101011011000110110001101111 1 Hello 0010010000110010101101100011011000110111 7 Hello 0000000010010000110010101101100011011000 @@ -112,8 +108,6 @@ String VectorVector 33 Hello 00100100 39 Hello 00000000 40 Hello -41 Hello -42 Hello 7 Hel 000000001001000011001010 8 Hel 0100100001100101 9 Hel 0010010000110010 @@ -125,7 +119,6 @@ String VectorVector 9 Hel 0010010000110010 FixedString VectorVector --1 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0 Hello\0\0\0\0\0 01001000011001010110110001101100011011110000000000000000000000000000000000000000 1 Hello\0\0\0\0\0 00100100001100101011011000110110001101111000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 @@ -142,8 +135,6 @@ FixedString VectorVector 33 Hello\0\0\0\0\0 00000000000000000000000000000000001001000011001010110110001101100011011110000000 39 Hello\0\0\0\0\0 00000000000000000000000000000000000000001001000011001010110110001101100011011110 40 Hello\0\0\0\0\0 00000000000000000000000000000000000000000100100001100101011011000110110001101111 -41 Hello\0\0\0\0\0 00000000000000000000000000000000000000000010010000110010101101100011011000110111 -42 Hello\0\0\0\0\0 00000000000000000000000000000000000000000001001000011001010110110001101100011011 7 Hel\0\0\0\0\0\0\0 00000000100100001100101011011000000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 00000000010010000110010101101100000000000000000000000000000000000000000000000000 9 Hel\0\0\0\0\0\0\0 00000000001001000011001010110110000000000000000000000000000000000000000000000000 @@ -171,9 +162,6 @@ String VectorConst 7 Hello 0000000010010000110010101101100011011000 7 Hello 0000000010010000110010101101100011011000 7 Hello 0000000010010000110010101101100011011000 -7 Hello 0000000010010000110010101101100011011000 -7 Hello 0000000010010000110010101101100011011000 -7 Hello 0000000010010000110010101101100011011000 7 Hel 000000001001000011001010 7 Hel 000000001001000011001010 7 Hel 000000001001000011001010 @@ -193,9 +181,6 @@ String VectorConst 8 Hello 01001000011001010110110001101100 8 Hello 01001000011001010110110001101100 8 Hello 01001000011001010110110001101100 -8 Hello 01001000011001010110110001101100 -8 Hello 01001000011001010110110001101100 -8 Hello 01001000011001010110110001101100 8 Hel 0100100001100101 8 Hel 0100100001100101 8 Hel 0100100001100101 @@ -217,9 +202,6 @@ FixedString VectorConst 7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 -7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 -7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 -7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00000000100100001100101011011000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00000000100100001100101011011000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00000000100100001100101011011000000000000000000000000000000000000000000000000000 @@ -239,15 +221,11 @@ FixedString VectorConst 8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 -8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 -8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 -8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 00000000010010000110010101101100000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 00000000010010000110010101101100000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 00000000010010000110010101101100000000000000000000000000000000000000000000000000 String ConstVector --1 Hello 0 Hello 0100100001100101011011000110110001101111 1 Hello 0010010000110010101101100011011000110111 7 Hello 0000000010010000110010101101100011011000 @@ -264,12 +242,9 @@ String ConstVector 33 Hello 00100100 39 Hello 00000000 40 Hello -41 Hello -42 Hello 7 Hello 0000000010010000110010101101100011011000 8 Hello 01001000011001010110110001101100 9 Hello 00100100001100101011011000110110 --1 Hel 0 Hel 010010000110010101101100 1 Hel 001001000011001010110110 7 Hel 000000001001000011001010 @@ -280,20 +255,11 @@ String ConstVector 17 Hel 00100100 23 Hel 00000000 24 Hel -25 Hel -31 Hel -32 Hel -33 Hel -39 Hel -40 Hel -41 Hel -42 Hel 7 Hel 000000001001000011001010 8 Hel 0100100001100101 9 Hel 0010010000110010 FixedString ConstVector --1 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0 Hello\0\0\0\0\0 01001000011001010110110001101100011011110000000000000000000000000000000000000000 1 Hello\0\0\0\0\0 00100100001100101011011000110110001101111000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 @@ -310,12 +276,9 @@ FixedString ConstVector 33 Hello\0\0\0\0\0 00000000000000000000000000000000001001000011001010110110001101100011011110000000 39 Hello\0\0\0\0\0 00000000000000000000000000000000000000001001000011001010110110001101100011011110 40 Hello\0\0\0\0\0 00000000000000000000000000000000000000000100100001100101011011000110110001101111 -41 Hello\0\0\0\0\0 00000000000000000000000000000000000000000010010000110010101101100011011000110111 -42 Hello\0\0\0\0\0 00000000000000000000000000000000000000000001001000011001010110110001101100011011 7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 9 Hello\0\0\0\0\0 00000000001001000011001010110110001101100011011110000000000000000000000000000000 --1 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0 Hel\0\0\0\0\0\0\0 01001000011001010110110000000000000000000000000000000000000000000000000000000000 1 Hel\0\0\0\0\0\0\0 00100100001100101011011000000000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00000000100100001100101011011000000000000000000000000000000000000000000000000000 @@ -332,8 +295,6 @@ FixedString ConstVector 33 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000001001000011001010110110000000000000000000000000 39 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000001001000011001010110110000000000000000000 40 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000100100001100101011011000000000000000000 -41 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000010010000110010101101100000000000000000 -42 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000001001000011001010110110000000000000000 7 Hel\0\0\0\0\0\0\0 00000000100100001100101011011000000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 00000000010010000110010101101100000000000000000000000000000000000000000000000000 9 Hel\0\0\0\0\0\0\0 00000000001001000011001010110110000000000000000000000000000000000000000000000000 diff --git a/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.sql b/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.sql index 0ee04e408ba..40fccbc89e6 100644 --- a/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.sql +++ b/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.sql @@ -41,8 +41,6 @@ SELECT 37,'Hello',bin(bitShiftRight('Hello', 37)); SELECT 38,'Hello',bin(bitShiftRight('Hello', 38)); SELECT 39,'Hello',bin(bitShiftRight('Hello', 39)); SELECT 40,'Hello',bin(bitShiftRight('Hello', 40)); -SELECT 41,'Hello',bin(bitShiftRight('Hello', 41)); -SELECT 42,'Hello',bin(bitShiftRight('Hello', 42)); SELECT 'FixedString ConstConst'; SELECT bin(toFixedString('Hello', 10)) == bin(bitShiftRight(toFixedString('Hello', 10), 0)); @@ -93,40 +91,39 @@ SELECT 77,toFixedString('Hello', 10), bin(bitShiftRight(toFixedString('Hello', 1 SELECT 78,toFixedString('Hello', 10), bin(bitShiftRight(toFixedString('Hello', 10), 78)); SELECT 79,toFixedString('Hello', 10), bin(bitShiftRight(toFixedString('Hello', 10), 79)); SELECT 80,toFixedString('Hello', 10), bin(bitShiftRight(toFixedString('Hello', 10), 80)); -SELECT 81,toFixedString('Hello', 10), bin(bitShiftRight(toFixedString('Hello', 10), 81)); DROP TABLE IF EXISTS test_bit_shift_right_string_integer; CREATE TABLE test_bit_shift_right_string_integer (str String, fixedStr FixedString(10), id Int64) engine=Log; -INSERT INTO test_bit_shift_right_string_integer VALUES('Hello','Hello',-1)('Hello','Hello',0),('Hello','Hello',1),('Hello','Hello',7),('Hello','Hello',8),('Hello','Hello',9),('Hello','Hello',15),('Hello','Hello',16),('Hello','Hello',17),('Hello','Hello',23),('Hello','Hello',24),('Hello','Hello',25),('Hello','Hello',31),('Hello','Hello',32),('Hello','Hello',33),('Hello','Hello',39),('Hello','Hello',40),('Hello','Hello',41),('Hello','Hello',42),('Hel','Hel',7),('Hel','Hel',8),('Hel','Hel',9); +INSERT INTO test_bit_shift_right_string_integer VALUES('Hello','Hello',0),('Hello','Hello',1),('Hello','Hello',7),('Hello','Hello',8),('Hello','Hello',9),('Hello','Hello',15),('Hello','Hello',16),('Hello','Hello',17),('Hello','Hello',23),('Hello','Hello',24),('Hello','Hello',25),('Hello','Hello',31),('Hello','Hello',32),('Hello','Hello',33),('Hello','Hello',39),('Hello','Hello',40),('Hel','Hel',7),('Hel','Hel',8),('Hel','Hel',9); -SELECT bin(bitShiftRight('Hello', 42)); --A blank line +SELECT bin(bitShiftRight('Hello', 40)); --A blank line SELECT 'String VectorVector'; SELECT id as shift_right_bit,str as arg,bin(bitShiftRight(str, id)) as string_res FROM test_bit_shift_right_string_integer; SELECT id as shift_right_bit,str as arg,bin(bitShiftRight(str, id)) as string_res FROM test_bit_shift_right_string_integer WHERE (str='Hello' AND (id=23 OR id=24 OR id=25)) OR (str='Hel' AND (id=7 OR id=8 OR id=9)); -SELECT bin(bitShiftRight('Hello', 42)); +SELECT bin(bitShiftRight('Hello', 40)); SELECT 'FixedString VectorVector'; SELECT id as shift_right_bit,fixedStr as arg,bin(bitShiftRight(fixedStr, id)) as fixed_string_res FROM test_bit_shift_right_string_integer; SELECT id as shift_right_bit,fixedStr as arg,bin(bitShiftRight(fixedStr, id)) as fixed_string_res FROM test_bit_shift_right_string_integer WHERE (str='Hello' AND (id=23 OR id=24 OR id=25)) OR (str='Hel' AND (id=7 OR id=8 OR id=9)); -SELECT bin(bitShiftRight('Hello', 42)); --A blank line +SELECT bin(bitShiftRight('Hello', 40)); --A blank line SELECT 'String VectorConst'; SELECT 7 as shift_right_bit,str as arg,bin(bitShiftRight(str, 7)) as string_res FROM test_bit_shift_right_string_integer; SELECT 8 as shift_right_bit,str as arg,bin(bitShiftRight(str, 8)) as string_res FROM test_bit_shift_right_string_integer; -SELECT bin(bitShiftRight('Hello', 42)); --A blank line +SELECT bin(bitShiftRight('Hello', 40)); --A blank line SELECT 'FixedString VectorConst'; SELECT 7 as shift_right_bit,fixedStr as arg,bin(bitShiftRight(fixedStr, 7)) as fixed_string_res FROM test_bit_shift_right_string_integer; SELECT 8 as shift_right_bit,fixedStr as arg,bin(bitShiftRight(fixedStr, 8)) as fixed_string_res FROM test_bit_shift_right_string_integer; -SELECT bin(bitShiftRight('Hello', 42)); --A blank line +SELECT bin(bitShiftRight('Hello', 40)); --A blank line SELECT 'String ConstVector'; SELECT id as shift_right_bit,'Hello' as arg,bin(bitShiftRight('Hello', id)) as string_res FROM test_bit_shift_right_string_integer; -SELECT id as shift_right_bit,'Hel' as arg,bin(bitShiftRight('Hel', id)) as string_res FROM test_bit_shift_right_string_integer; +SELECT id as shift_right_bit,'Hel' as arg,bin(bitShiftRight('Hel', id)) as string_res FROM test_bit_shift_right_string_integer WHERE id <= 8 * 3; -SELECT bin(bitShiftRight('Hello', 42)); --A blank line +SELECT bin(bitShiftRight('Hello', 40)); --A blank line SELECT 'FixedString ConstVector'; SELECT id as shift_right_bit,toFixedString('Hello', 10) as arg,bin(bitShiftRight(toFixedString('Hello', 10), id)) as fixed_string_res FROM test_bit_shift_right_string_integer; SELECT id as shift_right_bit,toFixedString('Hel', 10) as arg,bin(bitShiftRight(toFixedString('Hel', 10), id)) as fixed_string_res FROM test_bit_shift_right_string_integer; diff --git a/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.reference b/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.reference index ff5a09c0d48..a20c44bbb9a 100644 --- a/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.reference +++ b/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.reference @@ -41,8 +41,6 @@ String ConstConst 38 Hello 00010010000110010101101100011011000110111100000000000000000000000000000000000000 39 Hello 00100100001100101011011000110110001101111000000000000000000000000000000000000000 40 Hello -41 Hello -42 Hello FixedString ConstConst 1 0 Hello\0\0\0\0\0 01001000011001010110110001101100011011110000000000000000000000000000000000000000 @@ -92,10 +90,8 @@ FixedString ConstConst 78 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 79 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 80 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -81 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 String VectorVector --1 Hello 0 Hello 0100100001100101011011000110110001101111 1 Hello 000000001001000011001010110110001101100011011110 7 Hello 001001000011001010110110001101100011011110000000 @@ -112,8 +108,6 @@ String VectorVector 33 Hello 00000000100100001100101011011000110110001101111000000000000000000000000000000000 39 Hello 00100100001100101011011000110110001101111000000000000000000000000000000000000000 40 Hello -41 Hello -42 Hello 7 Hel 00100100001100101011011000000000 8 Hel 01001000011001010110110000000000 9 Hel 0000000010010000110010101101100000000000 @@ -125,7 +119,6 @@ String VectorVector 9 Hel 0000000010010000110010101101100000000000 FixedString VectorVector --1 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0 Hello\0\0\0\0\0 01001000011001010110110001101100011011110000000000000000000000000000000000000000 1 Hello\0\0\0\0\0 10010000110010101101100011011000110111100000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 @@ -142,8 +135,6 @@ FixedString VectorVector 33 Hello\0\0\0\0\0 11011110000000000000000000000000000000000000000000000000000000000000000000000000 39 Hello\0\0\0\0\0 10000000000000000000000000000000000000000000000000000000000000000000000000000000 40 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -41 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -42 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00110010101101100000000000000000000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 01100101011011000000000000000000000000000000000000000000000000000000000000000000 9 Hel\0\0\0\0\0\0\0 11001010110110000000000000000000000000000000000000000000000000000000000000000000 @@ -171,9 +162,6 @@ String VectorConst 7 Hello 001001000011001010110110001101100011011110000000 7 Hello 001001000011001010110110001101100011011110000000 7 Hello 001001000011001010110110001101100011011110000000 -7 Hello 001001000011001010110110001101100011011110000000 -7 Hello 001001000011001010110110001101100011011110000000 -7 Hello 001001000011001010110110001101100011011110000000 7 Hel 00100100001100101011011000000000 7 Hel 00100100001100101011011000000000 7 Hel 00100100001100101011011000000000 @@ -193,9 +181,6 @@ String VectorConst 8 Hello 010010000110010101101100011011000110111100000000 8 Hello 010010000110010101101100011011000110111100000000 8 Hello 010010000110010101101100011011000110111100000000 -8 Hello 010010000110010101101100011011000110111100000000 -8 Hello 010010000110010101101100011011000110111100000000 -8 Hello 010010000110010101101100011011000110111100000000 8 Hel 01001000011001010110110000000000 8 Hel 01001000011001010110110000000000 8 Hel 01001000011001010110110000000000 @@ -217,9 +202,6 @@ FixedString VectorConst 7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 -7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 -7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 -7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00110010101101100000000000000000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00110010101101100000000000000000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00110010101101100000000000000000000000000000000000000000000000000000000000000000 @@ -239,15 +221,11 @@ FixedString VectorConst 8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 -8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 -8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 -8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 01100101011011000000000000000000000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 01100101011011000000000000000000000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 01100101011011000000000000000000000000000000000000000000000000000000000000000000 String ConstVector --1 Hello 0 Hello 0100100001100101011011000110110001101111 1 Hello 000000001001000011001010110110001101100011011110 7 Hello 001001000011001010110110001101100011011110000000 @@ -264,12 +242,9 @@ String ConstVector 33 Hello 00000000100100001100101011011000110110001101111000000000000000000000000000000000 39 Hello 00100100001100101011011000110110001101111000000000000000000000000000000000000000 40 Hello -41 Hello -42 Hello 7 Hello 001001000011001010110110001101100011011110000000 8 Hello 010010000110010101101100011011000110111100000000 9 Hello 00000000100100001100101011011000110110001101111000000000 --1 Hel 0 Hel 010010000110010101101100 1 Hel 00000000100100001100101011011000 7 Hel 00100100001100101011011000000000 @@ -280,20 +255,11 @@ String ConstVector 17 Hel 000000001001000011001010110110000000000000000000 23 Hel 001001000011001010110110000000000000000000000000 24 Hel -25 Hel -31 Hel -32 Hel -33 Hel -39 Hel -40 Hel -41 Hel -42 Hel 7 Hel 00100100001100101011011000000000 8 Hel 01001000011001010110110000000000 9 Hel 0000000010010000110010101101100000000000 FixedString ConstVector --1 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0 Hello\0\0\0\0\0 01001000011001010110110001101100011011110000000000000000000000000000000000000000 1 Hello\0\0\0\0\0 10010000110010101101100011011000110111100000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 @@ -310,12 +276,9 @@ FixedString ConstVector 33 Hello\0\0\0\0\0 11011110000000000000000000000000000000000000000000000000000000000000000000000000 39 Hello\0\0\0\0\0 10000000000000000000000000000000000000000000000000000000000000000000000000000000 40 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -41 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -42 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 9 Hello\0\0\0\0\0 11001010110110001101100011011110000000000000000000000000000000000000000000000000 --1 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0 Hel\0\0\0\0\0\0\0 01001000011001010110110000000000000000000000000000000000000000000000000000000000 1 Hel\0\0\0\0\0\0\0 10010000110010101101100000000000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00110010101101100000000000000000000000000000000000000000000000000000000000000000 @@ -332,8 +295,6 @@ FixedString ConstVector 33 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 39 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 40 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -41 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -42 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00110010101101100000000000000000000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 01100101011011000000000000000000000000000000000000000000000000000000000000000000 9 Hel\0\0\0\0\0\0\0 11001010110110000000000000000000000000000000000000000000000000000000000000000000 diff --git a/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.sql b/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.sql index 5c7a9901dae..a8e66eda281 100644 --- a/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.sql +++ b/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.sql @@ -41,8 +41,6 @@ SELECT 37,'Hello',bin(bitShiftLeft('Hello', 37)); SELECT 38,'Hello',bin(bitShiftLeft('Hello', 38)); SELECT 39,'Hello',bin(bitShiftLeft('Hello', 39)); SELECT 40,'Hello',bin(bitShiftLeft('Hello', 40)); -SELECT 41,'Hello',bin(bitShiftLeft('Hello', 41)); -SELECT 42,'Hello',bin(bitShiftLeft('Hello', 42)); SELECT 'FixedString ConstConst'; SELECT bin(toFixedString('Hello', 10)) == bin(bitShiftLeft(toFixedString('Hello', 10), 0)); @@ -93,40 +91,39 @@ SELECT 77,toFixedString('Hello', 10), bin(bitShiftLeft(toFixedString('Hello', 10 SELECT 78,toFixedString('Hello', 10), bin(bitShiftLeft(toFixedString('Hello', 10), 78)); SELECT 79,toFixedString('Hello', 10), bin(bitShiftLeft(toFixedString('Hello', 10), 79)); SELECT 80,toFixedString('Hello', 10), bin(bitShiftLeft(toFixedString('Hello', 10), 80)); -SELECT 81,toFixedString('Hello', 10), bin(bitShiftLeft(toFixedString('Hello', 10), 81)); DROP TABLE IF EXISTS test_bit_shift_left_string_integer; CREATE TABLE test_bit_shift_left_string_integer (str String, fixedStr FixedString(10), id Int64) engine=Log; -INSERT INTO test_bit_shift_left_string_integer VALUES('Hello','Hello',-1)('Hello','Hello',0),('Hello','Hello',1),('Hello','Hello',7),('Hello','Hello',8),('Hello','Hello',9),('Hello','Hello',15),('Hello','Hello',16),('Hello','Hello',17),('Hello','Hello',23),('Hello','Hello',24),('Hello','Hello',25),('Hello','Hello',31),('Hello','Hello',32),('Hello','Hello',33),('Hello','Hello',39),('Hello','Hello',40),('Hello','Hello',41),('Hello','Hello',42),('Hel','Hel',7),('Hel','Hel',8),('Hel','Hel',9); +INSERT INTO test_bit_shift_left_string_integer VALUES('Hello','Hello',0),('Hello','Hello',1),('Hello','Hello',7),('Hello','Hello',8),('Hello','Hello',9),('Hello','Hello',15),('Hello','Hello',16),('Hello','Hello',17),('Hello','Hello',23),('Hello','Hello',24),('Hello','Hello',25),('Hello','Hello',31),('Hello','Hello',32),('Hello','Hello',33),('Hello','Hello',39),('Hello','Hello',40),('Hel','Hel',7),('Hel','Hel',8),('Hel','Hel',9); -SELECT bin(bitShiftLeft('Hello', 42)); --A blank line +SELECT bin(bitShiftLeft('Hello', 40)); --A blank line SELECT 'String VectorVector'; SELECT id as shift_right_bit,str as arg,bin(bitShiftLeft(str, id)) as string_res FROM test_bit_shift_left_string_integer; SELECT id as shift_right_bit,str as arg,bin(bitShiftLeft(str, id)) as string_res FROM test_bit_shift_left_string_integer WHERE (str='Hello' AND (id=23 OR id=24 OR id=25)) OR (str='Hel' AND (id=7 OR id=8 OR id=9)); -SELECT bin(bitShiftLeft('Hello', 42)); +SELECT bin(bitShiftLeft('Hello', 40)); SELECT 'FixedString VectorVector'; SELECT id as shift_right_bit,fixedStr as arg,bin(bitShiftLeft(fixedStr, id)) as fixed_string_res FROM test_bit_shift_left_string_integer; SELECT id as shift_right_bit,fixedStr as arg,bin(bitShiftLeft(fixedStr, id)) as fixed_string_res FROM test_bit_shift_left_string_integer WHERE (str='Hello' AND (id=23 OR id=24 OR id=25)) OR (str='Hel' AND (id=7 OR id=8 OR id=9)); -SELECT bin(bitShiftLeft('Hello', 42)); --A blank line +SELECT bin(bitShiftLeft('Hello', 40)); --A blank line SELECT 'String VectorConst'; SELECT 7 as shift_right_bit,str as arg,bin(bitShiftLeft(str, 7)) as string_res FROM test_bit_shift_left_string_integer; SELECT 8 as shift_right_bit,str as arg,bin(bitShiftLeft(str, 8)) as string_res FROM test_bit_shift_left_string_integer; -SELECT bin(bitShiftLeft('Hello', 42)); --A blank line +SELECT bin(bitShiftLeft('Hello', 40)); --A blank line SELECT 'FixedString VectorConst'; SELECT 7 as shift_right_bit,fixedStr as arg,bin(bitShiftLeft(fixedStr, 7)) as fixed_string_res FROM test_bit_shift_left_string_integer; SELECT 8 as shift_right_bit,fixedStr as arg,bin(bitShiftLeft(fixedStr, 8)) as fixed_string_res FROM test_bit_shift_left_string_integer; -SELECT bin(bitShiftLeft('Hello', 42)); --A blank line +SELECT bin(bitShiftLeft('Hello', 40)); --A blank line SELECT 'String ConstVector'; SELECT id as shift_right_bit,'Hello' as arg,bin(bitShiftLeft('Hello', id)) as string_res FROM test_bit_shift_left_string_integer; -SELECT id as shift_right_bit,'Hel' as arg,bin(bitShiftLeft('Hel', id)) as string_res FROM test_bit_shift_left_string_integer; +SELECT id as shift_right_bit,'Hel' as arg,bin(bitShiftLeft('Hel', id)) as string_res FROM test_bit_shift_left_string_integer WHERE id <= 8 * 3; -SELECT bin(bitShiftLeft('Hello', 42)); --A blank line +SELECT bin(bitShiftLeft('Hello', 40)); --A blank line SELECT 'FixedString ConstVector'; SELECT id as shift_right_bit,toFixedString('Hello', 10) as arg,bin(bitShiftLeft(toFixedString('Hello', 10), id)) as fixed_string_res FROM test_bit_shift_left_string_integer; SELECT id as shift_right_bit,toFixedString('Hel', 10) as arg,bin(bitShiftLeft(toFixedString('Hel', 10), id)) as fixed_string_res FROM test_bit_shift_left_string_integer; diff --git a/tests/queries/0_stateless/02022_storage_filelog_one_file.sh b/tests/queries/0_stateless/02022_storage_filelog_one_file.sh index ea703d69aa5..29bc28849c8 100755 --- a/tests/queries/0_stateless/02022_storage_filelog_one_file.sh +++ b/tests/queries/0_stateless/02022_storage_filelog_one_file.sh @@ -6,30 +6,26 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Data preparation. -# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') for i in {1..20} do - echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt + echo $i, $i >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt done ${CLICKHOUSE_CLIENT} --query "drop table if exists file_log;" -${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt', 'CSV');" +${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt', 'CSV');" ${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" for i in {100..120} do - echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt + echo $i, $i >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt done ${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" # touch does not change file content, no event -touch ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt +touch ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt ${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" ${CLICKHOUSE_CLIENT} --query "detach table file_log;" @@ -40,4 +36,4 @@ ${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_ ${CLICKHOUSE_CLIENT} --query "drop table file_log;" -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt diff --git a/tests/queries/0_stateless/02023_storage_filelog.sh b/tests/queries/0_stateless/02023_storage_filelog.sh index 51c8dc8ab3e..798ffe66ed9 100755 --- a/tests/queries/0_stateless/02023_storage_filelog.sh +++ b/tests/queries/0_stateless/02023_storage_filelog.sh @@ -6,57 +6,52 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Data preparation. -# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ - -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* for i in {1..20} do - echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt + echo $i, $i >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt done ${CLICKHOUSE_CLIENT} --query "drop table if exists file_log;" -${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" +${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" ${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.txt ${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" for i in {100..120} do - echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt + echo $i, $i >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt done # touch does not change file content, no event -touch ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt +touch ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/c.txt -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/e.txt -mv ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/j.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/c.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/e.txt +mv ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/j.txt -rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt +rm ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt ${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" ${CLICKHOUSE_CLIENT} --query "detach table file_log;" -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/e.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/f.txt -mv ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/e.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/g.txt -mv ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/c.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/h.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/e.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/f.txt +mv ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/e.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/g.txt +mv ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/c.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/h.txt for i in {150..200} do - echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/h.txt + echo $i, $i >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/h.txt done for i in {200..250} do - echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/i.txt + echo $i, $i >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/i.txt done ${CLICKHOUSE_CLIENT} --query "attach table file_log;" @@ -68,11 +63,11 @@ ${CLICKHOUSE_CLIENT} --query "attach table file_log;" # should no records return ${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" -truncate ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt --size 0 +truncate ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt --size 0 # exception happend ${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" 2>&1 | grep -q "Code: 33" && echo 'OK' || echo 'FAIL' ${CLICKHOUSE_CLIENT} --query "drop table file_log;" -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} diff --git a/tests/queries/0_stateless/02024_storage_filelog_mv.sh b/tests/queries/0_stateless/02024_storage_filelog_mv.sh index 33c8693648c..a47ca6c7507 100755 --- a/tests/queries/0_stateless/02024_storage_filelog_mv.sh +++ b/tests/queries/0_stateless/02024_storage_filelog_mv.sh @@ -7,21 +7,16 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Data preparation. -# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* for i in {1..20} do - echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt + echo $i, $i >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt done ${CLICKHOUSE_CLIENT} --query "drop table if exists file_log;" -${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" +${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" ${CLICKHOUSE_CLIENT} --query "drop table if exists mv;" ${CLICKHOUSE_CLIENT} --query "create Materialized View mv engine=MergeTree order by k as select * from file_log;" @@ -39,17 +34,17 @@ done ${CLICKHOUSE_CLIENT} --query "select * from mv order by k;" -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.txt # touch does not change file content, no event -touch ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt +touch ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/c.txt -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/c.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt for i in {100..120} do - echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt + echo $i, $i >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt done while true; do @@ -62,4 +57,4 @@ ${CLICKHOUSE_CLIENT} --query "select * from mv order by k;" ${CLICKHOUSE_CLIENT} --query "drop table mv;" ${CLICKHOUSE_CLIENT} --query "drop table file_log;" -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} diff --git a/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh b/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh index f027b61c3ef..7e414b8863e 100755 --- a/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh +++ b/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh @@ -6,42 +6,37 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Data preparation. -# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ - -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* for i in {1..20} do - echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt + echo $i, $i >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt done ${CLICKHOUSE_CLIENT} --query "drop table if exists file_log;" -${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" +${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" ${CLICKHOUSE_CLIENT} --query "select *, _filename, _offset from file_log order by _filename, _offset settings stream_like_engine_allow_direct_select=1;" -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.txt ${CLICKHOUSE_CLIENT} --query "select *, _filename, _offset from file_log order by _filename, _offset settings stream_like_engine_allow_direct_select=1;" for i in {100..120} do - echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt + echo $i, $i >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt done # touch does not change file content, no event -touch ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt +touch ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/c.txt -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/e.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/c.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/e.txt -rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt +rm ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt ${CLICKHOUSE_CLIENT} --query "select *, _filename, _offset from file_log order by _filename, _offset settings stream_like_engine_allow_direct_select=1;" @@ -51,11 +46,11 @@ ${CLICKHOUSE_CLIENT} --query "attach table file_log;" # should no records return ${CLICKHOUSE_CLIENT} --query "select *, _filename, _offset from file_log order by _filename, _offset settings stream_like_engine_allow_direct_select=1;" -truncate ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt --size 0 +truncate ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt --size 0 # exception happend ${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" 2>&1 | grep -q "Code: 33" && echo 'OK' || echo 'FAIL' ${CLICKHOUSE_CLIENT} --query "drop table file_log;" -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} diff --git a/tests/queries/0_stateless/02026_storage_filelog_largefile.sh b/tests/queries/0_stateless/02026_storage_filelog_largefile.sh index b0a9a4357f3..b7377fb026a 100755 --- a/tests/queries/0_stateless/02026_storage_filelog_largefile.sh +++ b/tests/queries/0_stateless/02026_storage_filelog_largefile.sh @@ -7,34 +7,27 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Data preparation. -# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ - -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* - -chmod 777 ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* +chmod 777 ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ for i in {1..10} do - ${CLICKHOUSE_CLIENT} --query "insert into function file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/test$i.csv', 'CSV', 'k UInt32, v UInt32') select number, number from numbers(10000);" + ${CLICKHOUSE_CLIENT} --query "insert into function file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/test$i.csv', 'CSV', 'k UInt32, v UInt32') select number, number from numbers(10000);" done ${CLICKHOUSE_CLIENT} --query "drop table if exists file_log;" -${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt32, v UInt32) engine=FileLog('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" +${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt32, v UInt32) engine=FileLog('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" ${CLICKHOUSE_CLIENT} --query "select count() from file_log settings stream_like_engine_allow_direct_select=1;" for i in {11..20} do - ${CLICKHOUSE_CLIENT} --query "insert into function file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/test$i.csv', 'CSV', 'k UInt32, v UInt32') select number, number from numbers(10000);" + ${CLICKHOUSE_CLIENT} --query "insert into function file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/test$i.csv', 'CSV', 'k UInt32, v UInt32') select number, number from numbers(10000);" done ${CLICKHOUSE_CLIENT} --query "select count() from file_log settings stream_like_engine_allow_direct_select=1;" ${CLICKHOUSE_CLIENT} --query "drop table file_log;" -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} diff --git a/tests/queries/0_stateless/02030_capnp_format.sh b/tests/queries/0_stateless/02030_capnp_format.sh index 6fcfef23cc7..b34bdc9f670 100755 --- a/tests/queries/0_stateless/02030_capnp_format.sh +++ b/tests/queries/0_stateless/02030_capnp_format.sh @@ -1,17 +1,16 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-replicated-database +# Tags: no-fasttest, no-replicated-database CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') CAPN_PROTO_FILE=$USER_FILES_PATH/data.capnp touch $CAPN_PROTO_FILE -SCHEMADIR=$($CLICKHOUSE_CLIENT_BINARY --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") +SCHEMADIR=${CLICKHOUSE_SCHEMA_FILES} CLIENT_SCHEMADIR=$CURDIR/format_schemas -SERVER_SCHEMADIR=test_02030 +SERVER_SCHEMADIR=${CLICKHOUSE_DATABASE} mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR cp -r $CLIENT_SCHEMADIR/02030_* $SCHEMADIR/$SERVER_SCHEMADIR/ diff --git a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh index eab44e74d88..afdb14c4191 100755 --- a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh +++ b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh @@ -5,10 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -# See 01658_read_file_to_string_column.sh -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -FILE_PATH="${user_files_path}/file" +FILE_PATH="${USER_FILES_PATH}/file" mkdir -p ${FILE_PATH} chmod 777 ${FILE_PATH} diff --git a/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh b/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh index 7ea6739e932..25ad4f37e45 100755 --- a/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh +++ b/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -DATA_FILE=$CLICKHOUSE_TMP/test_02103_null.data +DATA_FILE=${CLICKHOUSE_TMP}/${CLICKHOUSE_DATABASE}_null.data # Wrapper for clickhouse-client to always output in JSONEachRow format, that # way format settings will not affect output. diff --git a/tests/queries/0_stateless/02103_with_names_and_types_parallel_parsing.sh b/tests/queries/0_stateless/02103_with_names_and_types_parallel_parsing.sh index a6e704093a2..07d40539358 100755 --- a/tests/queries/0_stateless/02103_with_names_and_types_parallel_parsing.sh +++ b/tests/queries/0_stateless/02103_with_names_and_types_parallel_parsing.sh @@ -1,20 +1,17 @@ #!/usr/bin/env bash -# Tags: no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$(clickhouse client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -DATA_FILE=$USER_FILES_PATH/test_02103.data +DATA_FILE=$USER_FILES_PATH/${CLICKHOUSE_DATABASE}.data FORMATS=('TSVWithNames' 'TSVWithNamesAndTypes' 'TSVRawWithNames' 'TSVRawWithNamesAndTypes' 'CSVWithNames' 'CSVWithNamesAndTypes' 'JSONCompactEachRowWithNames' 'JSONCompactEachRowWithNamesAndTypes') for format in "${FORMATS[@]}" do $CLICKHOUSE_CLIENT -q "SELECT number, range(number + 10) AS array, toString(number) AS string FROM numbers(10) FORMAT $format" > $DATA_FILE - $CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103.data', '$format', 'number UInt64, array Array(UInt64), string String') ORDER BY number SETTINGS input_format_parallel_parsing=1, min_chunk_bytes_for_parallel_parsing=40" + $CLICKHOUSE_CLIENT -q "SELECT * FROM file('${CLICKHOUSE_DATABASE}.data', '$format', 'number UInt64, array Array(UInt64), string String') ORDER BY number SETTINGS input_format_parallel_parsing=1, min_chunk_bytes_for_parallel_parsing=40" done rm $DATA_FILE diff --git a/tests/queries/0_stateless/02104_json_strings_nullable_string.sh b/tests/queries/0_stateless/02104_json_strings_nullable_string.sh index b3b156b5787..d46b0704b0e 100755 --- a/tests/queries/0_stateless/02104_json_strings_nullable_string.sh +++ b/tests/queries/0_stateless/02104_json_strings_nullable_string.sh @@ -5,7 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') DATA_FILE=$USER_FILES_PATH/test_02104_null.data echo -e '{"s" : "NULLSome string"}' > $DATA_FILE diff --git a/tests/queries/0_stateless/02105_table_function_file_partiotion_by.sh b/tests/queries/0_stateless/02105_table_function_file_partiotion_by.sh index c79b5d0eee5..38c68ca0005 100755 --- a/tests/queries/0_stateless/02105_table_function_file_partiotion_by.sh +++ b/tests/queries/0_stateless/02105_table_function_file_partiotion_by.sh @@ -5,13 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -# See 01658_read_file_to_string_column.sh -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -mkdir -p "${user_files_path}/" -chmod 777 ${user_files_path} - -FILE_PATH="${user_files_path}/test_table_function_file" +FILE_PATH="${USER_FILES_PATH}/test_table_function_file" function cleanup() { diff --git a/tests/queries/0_stateless/02115_map_contains.reference b/tests/queries/0_stateless/02115_map_contains.reference deleted file mode 100644 index 975e9876237..00000000000 --- a/tests/queries/0_stateless/02115_map_contains.reference +++ /dev/null @@ -1,4 +0,0 @@ -SELECT has(`m.keys`, \'a\') -FROM t_map_contains -1 -0 diff --git a/tests/queries/0_stateless/02115_map_contains_analyzer.reference b/tests/queries/0_stateless/02115_map_contains_analyzer.reference new file mode 100644 index 00000000000..7da5243e727 --- /dev/null +++ b/tests/queries/0_stateless/02115_map_contains_analyzer.reference @@ -0,0 +1,4 @@ +SELECT has(__table1.`m.keys`, \'a\') AS `mapContains(m, \'a\')` +FROM default.t_map_contains AS __table1 +1 +0 diff --git a/tests/queries/0_stateless/02115_map_contains.sql b/tests/queries/0_stateless/02115_map_contains_analyzer.sql similarity index 70% rename from tests/queries/0_stateless/02115_map_contains.sql rename to tests/queries/0_stateless/02115_map_contains_analyzer.sql index 3c7f21cb4f1..46e02eca4f0 100644 --- a/tests/queries/0_stateless/02115_map_contains.sql +++ b/tests/queries/0_stateless/02115_map_contains_analyzer.sql @@ -5,8 +5,9 @@ CREATE TABLE t_map_contains (m Map(String, UInt32)) ENGINE = Memory; INSERT INTO t_map_contains VALUES (map('a', 1, 'b', 2)), (map('c', 3, 'd', 4)); SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; -EXPLAIN SYNTAX SELECT mapContains(m, 'a') FROM t_map_contains; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT mapContains(m, 'a') FROM t_map_contains; SELECT mapContains(m, 'a') FROM t_map_contains; DROP TABLE t_map_contains; diff --git a/tests/queries/0_stateless/02115_write_buffers_finalize.sh b/tests/queries/0_stateless/02115_write_buffers_finalize.sh index d8a3c29bbbd..fb3a77f6105 100755 --- a/tests/queries/0_stateless/02115_write_buffers_finalize.sh +++ b/tests/queries/0_stateless/02115_write_buffers_finalize.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel +# Tags: no-fasttest # Tag no-fasttest: depends on brotli and bzip2 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/02116_tuple_element.reference b/tests/queries/0_stateless/02116_tuple_element.reference deleted file mode 100644 index 121b08d02f1..00000000000 --- a/tests/queries/0_stateless/02116_tuple_element.reference +++ /dev/null @@ -1,25 +0,0 @@ -1 -SELECT `t1.a` -FROM t_tuple_element -a -SELECT `t1.s` -FROM t_tuple_element -1 -SELECT `t1.a` -FROM t_tuple_element -2 -SELECT `t2.1` -FROM t_tuple_element -2 -SELECT `t2.1` -FROM t_tuple_element -1 2 -WITH (1, 2) AS t -SELECT - t.1, - t.2 -1 2 -WITH CAST(\'(1, 2)\', \'Tuple(a UInt32, b UInt32)\') AS t -SELECT - t.1, - tupleElement(t, \'b\') diff --git a/tests/queries/0_stateless/02116_tuple_element_analyzer.reference b/tests/queries/0_stateless/02116_tuple_element_analyzer.reference new file mode 100644 index 00000000000..22d48ffb2f3 --- /dev/null +++ b/tests/queries/0_stateless/02116_tuple_element_analyzer.reference @@ -0,0 +1,25 @@ +1 +SELECT __table1.`t1.a` AS `tupleElement(t1, 1)` +FROM default.t_tuple_element AS __table1 +a +SELECT __table1.`t1.s` AS `tupleElement(t1, 2)` +FROM default.t_tuple_element AS __table1 +1 +SELECT __table1.`t1.a` AS `tupleElement(t1, \'a\')` +FROM default.t_tuple_element AS __table1 +2 +SELECT __table1.`t2.1` AS `tupleElement(t2, 1)` +FROM default.t_tuple_element AS __table1 +2 +SELECT __table1.`t2.1` AS `tupleElement(t2, 1)` +FROM default.t_tuple_element AS __table1 +1 2 +SELECT + _CAST(1, \'UInt8\') AS `tupleElement(t, 1)`, + _CAST(2, \'UInt8\') AS `tupleElement(t, 2)` +FROM system.one AS __table1 +1 2 +SELECT + _CAST(1, \'UInt32\') AS `tupleElement(t, 1)`, + _CAST(2, \'UInt32\') AS `tupleElement(t, \'b\')` +FROM system.one AS __table1 diff --git a/tests/queries/0_stateless/02116_tuple_element.sql b/tests/queries/0_stateless/02116_tuple_element_analyzer.sql similarity index 60% rename from tests/queries/0_stateless/02116_tuple_element.sql rename to tests/queries/0_stateless/02116_tuple_element_analyzer.sql index 64d9b9db331..5aeb72c9ee4 100644 --- a/tests/queries/0_stateless/02116_tuple_element.sql +++ b/tests/queries/0_stateless/02116_tuple_element_analyzer.sql @@ -4,39 +4,40 @@ CREATE TABLE t_tuple_element(t1 Tuple(a UInt32, s String), t2 Tuple(UInt32, Stri INSERT INTO t_tuple_element VALUES ((1, 'a'), (2, 'b')); SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; SELECT t1.1 FROM t_tuple_element; -EXPLAIN SYNTAX SELECT t1.1 FROM t_tuple_element; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT t1.1 FROM t_tuple_element; SELECT tupleElement(t1, 2) FROM t_tuple_element; -EXPLAIN SYNTAX SELECT tupleElement(t1, 2) FROM t_tuple_element; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT tupleElement(t1, 2) FROM t_tuple_element; SELECT tupleElement(t1, 'a') FROM t_tuple_element; -EXPLAIN SYNTAX SELECT tupleElement(t1, 'a') FROM t_tuple_element; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT tupleElement(t1, 'a') FROM t_tuple_element; SELECT tupleElement(number, 1) FROM numbers(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT tupleElement(t1) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } SELECT tupleElement(t1, 'b') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } -SELECT tupleElement(t1, 0) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t1, 3) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t1, 0) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t1, 3) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT t2.1 FROM t_tuple_element; -EXPLAIN SYNTAX SELECT t2.1 FROM t_tuple_element; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT t2.1 FROM t_tuple_element; SELECT tupleElement(t2, 1) FROM t_tuple_element; -EXPLAIN SYNTAX SELECT tupleElement(t2, 1) FROM t_tuple_element; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT tupleElement(t2, 1) FROM t_tuple_element; SELECT tupleElement(t2) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } SELECT tupleElement(t2, 'a') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } -SELECT tupleElement(t2, 0) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t2, 3) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t2, 0) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t2, 3) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } DROP TABLE t_tuple_element; WITH (1, 2) AS t SELECT t.1, t.2; -EXPLAIN SYNTAX WITH (1, 2) AS t SELECT t.1, t.2; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 WITH (1, 2) AS t SELECT t.1, t.2; WITH (1, 2)::Tuple(a UInt32, b UInt32) AS t SELECT t.1, tupleElement(t, 'b'); -EXPLAIN SYNTAX WITH (1, 2)::Tuple(a UInt32, b UInt32) AS t SELECT t.1, tupleElement(t, 'b'); +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 WITH (1, 2)::Tuple(a UInt32, b UInt32) AS t SELECT t.1, tupleElement(t, 'b'); diff --git a/tests/queries/0_stateless/02117_custom_separated_with_names_and_types.sh b/tests/queries/0_stateless/02117_custom_separated_with_names_and_types.sh index 400bf2a56fa..d925c962da4 100755 --- a/tests/queries/0_stateless/02117_custom_separated_with_names_and_types.sh +++ b/tests/queries/0_stateless/02117_custom_separated_with_names_and_types.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02118_deserialize_whole_text.sh b/tests/queries/0_stateless/02118_deserialize_whole_text.sh index ccbfc5abe97..d544f1452a9 100755 --- a/tests/queries/0_stateless/02118_deserialize_whole_text.sh +++ b/tests/queries/0_stateless/02118_deserialize_whole_text.sh @@ -1,65 +1,65 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-replicated-database +# Tags: no-fasttest, no-replicated-database CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -DATA_FILE=$USER_FILES_PATH/data_02118 +DATA_FILE=$USER_FILES_PATH/${CLICKHOUSE_DATABASE} +FILE=${CLICKHOUSE_DATABASE} echo "[\"[1,2,3]trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x Array(UInt32)')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x Array(UInt32)')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"1970-01-02trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x Date')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x Date')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"1970-01-02trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x Date32')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x Date32')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"1970-01-01 03:00:01trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x DateTime')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x DateTime')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"1970-01-01 03:00:01.0000trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x DateTime64(4)')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x DateTime64(4)')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"42trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x UInt32')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x UInt32')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"42.4242trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x Decimal32(4)')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x Decimal32(4)')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"255.255.255.255trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x IPv4')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x IPv4')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "255.255.255.255trash" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'TSV', 'x IPv4')" 2>&1 | grep -F -q "CANNOT_PARSE_INPUT_ASSERTION_FAILED" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'TSV', 'x IPv4')" 2>&1 | grep -F -q "CANNOT_PARSE_INPUT_ASSERTION_FAILED" && echo 'OK' || echo 'FAIL' echo "255.255.255.255trash" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'CSV', 'x IPv4')" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'CSV', 'x IPv4')" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' echo "[\"255.255.255.255trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactEachRow', 'x IPv4')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactEachRow', 'x IPv4')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'TSV', 'x IPv6')" 2>&1 | grep -F -q "CANNOT_PARSE_INPUT_ASSERTION_FAILED" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'TSV', 'x IPv6')" 2>&1 | grep -F -q "CANNOT_PARSE_INPUT_ASSERTION_FAILED" && echo 'OK' || echo 'FAIL' echo "0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'CSV', 'x IPv6')" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'CSV', 'x IPv6')" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' echo "[\"0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactEachRow', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactEachRow', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"{1:2, 2:3}trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x Map(UInt32, UInt32)')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x Map(UInt32, UInt32)')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"(1, 2)trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x Tuple(UInt32, UInt32)')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x Tuple(UInt32, UInt32)')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"ed9fd45d-6287-47c1-ad9f-d45d628767c1trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x UUID')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x UUID')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' rm $DATA_FILE diff --git a/tests/queries/0_stateless/02125_tskv_proper_names_reading.sh b/tests/queries/0_stateless/02125_tskv_proper_names_reading.sh index 0abf411d38f..4a169897520 100755 --- a/tests/queries/0_stateless/02125_tskv_proper_names_reading.sh +++ b/tests/queries/0_stateless/02125_tskv_proper_names_reading.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - DATA_FILE=$USER_FILES_PATH/test_02125.data echo "number=1" > $DATA_FILE diff --git a/tests/queries/0_stateless/02126_fix_filelog.sh b/tests/queries/0_stateless/02126_fix_filelog.sh index b266b582428..0e136a34c62 100755 --- a/tests/queries/0_stateless/02126_fix_filelog.sh +++ b/tests/queries/0_stateless/02126_fix_filelog.sh @@ -6,14 +6,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Data preparation. -# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ - -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* ${CLICKHOUSE_CLIENT} --query "drop table if exists file_log;" @@ -21,8 +15,8 @@ ${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=Fil ${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('/tmp/aaa.csv', 'CSV');" 2>&1 | grep -q "Code: 36" && echo 'OK' || echo 'FAIL'; ${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('/tmp/aaa.csv', 'CSV');" 2>&1 | grep -q "Code: 36" && echo 'OK' || echo 'FAIL'; -${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" +${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" ${CLICKHOUSE_CLIENT} --query "drop table file_log;" -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} diff --git a/tests/queries/0_stateless/02129_skip_quoted_fields.sh b/tests/queries/0_stateless/02129_skip_quoted_fields.sh index ac702d3c750..701d7a30b68 100755 --- a/tests/queries/0_stateless/02129_skip_quoted_fields.sh +++ b/tests/queries/0_stateless/02129_skip_quoted_fields.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02130_parse_quoted_null.sh b/tests/queries/0_stateless/02130_parse_quoted_null.sh index 0c72d0e85a7..44e6ee93599 100755 --- a/tests/queries/0_stateless/02130_parse_quoted_null.sh +++ b/tests/queries/0_stateless/02130_parse_quoted_null.sh @@ -1,14 +1,12 @@ #!/usr/bin/env bash -# Tags: no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -DATA_FILE=$USER_FILES_PATH/test_02130.data -SELECT_QUERY="select * from file('test_02130.data', 'CustomSeparated', 'x Nullable(Float64), y Nullable(UInt64)') settings input_format_parallel_parsing=0, format_custom_escaping_rule='Quoted'" +DATA_FILE=$USER_FILES_PATH/${CLICKHOUSE_DATABASE}.data +SELECT_QUERY="select * from file('${CLICKHOUSE_DATABASE}.data', 'CustomSeparated', 'x Nullable(Float64), y Nullable(UInt64)') settings input_format_parallel_parsing=0, format_custom_escaping_rule='Quoted'" $CLICKHOUSE_CLIENT -q "drop table if exists test_02130" diff --git a/tests/queries/0_stateless/02149_external_schema_inference.sh b/tests/queries/0_stateless/02149_external_schema_inference.sh index 41f8bfee2bc..edb4e915701 100755 --- a/tests/queries/0_stateless/02149_external_schema_inference.sh +++ b/tests/queries/0_stateless/02149_external_schema_inference.sh @@ -1,20 +1,19 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -FILE_NAME=test_$CLICKHOUSE_TEST_UNIQUE_NAME.data +FILE_NAME=test_${CLICKHOUSE_TEST_UNIQUE_NAME}_${CLICKHOUSE_DATABASE}.data DATA_FILE=$USER_FILES_PATH/$FILE_NAME touch $DATA_FILE -SCHEMADIR=$($CLICKHOUSE_CLIENT_BINARY --query "select * from file('$FILE_NAME', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") +SCHEMADIR=${CLICKHOUSE_SCHEMA_FILES} CLIENT_SCHEMADIR=$CURDIR/format_schemas -SERVER_SCHEMADIR=test_02149 +SERVER_SCHEMADIR=${CLICKHOUSE_DATABASE} mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR cp -r $CLIENT_SCHEMADIR/* $SCHEMADIR/$SERVER_SCHEMADIR/ diff --git a/tests/queries/0_stateless/02149_schema_inference.sh b/tests/queries/0_stateless/02149_schema_inference.sh index 856549f2215..fba1e6e9137 100755 --- a/tests/queries/0_stateless/02149_schema_inference.sh +++ b/tests/queries/0_stateless/02149_schema_inference.sh @@ -1,18 +1,16 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') FILE_NAME=test_$CLICKHOUSE_TEST_UNIQUE_NAME.data -DATA_FILE=${USER_FILES_PATH:?}/$FILE_NAME - +DATA_FILE=${CLICKHOUSE_USER_FILES:?}/$FILE_NAME touch $DATA_FILE -SCHEMADIR=$($CLICKHOUSE_CLIENT_BINARY --query "select * from file('$FILE_NAME', 'Template', 'val1 char') settings format_template_row='nonexist'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist)") +SCHEMADIR=${CLICKHOUSE_SCHEMA_FILES} echo "TSV" diff --git a/tests/queries/0_stateless/02149_schema_inference_create_table_syntax.sh b/tests/queries/0_stateless/02149_schema_inference_create_table_syntax.sh index 8de2ab8c57a..bf247817323 100755 --- a/tests/queries/0_stateless/02149_schema_inference_create_table_syntax.sh +++ b/tests/queries/0_stateless/02149_schema_inference_create_table_syntax.sh @@ -1,14 +1,13 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -mkdir $USER_FILES_PATH/test_02149 -FILE_NAME=test_02149/data.Parquet +mkdir $USER_FILES_PATH/${CLICKHOUSE_DATABASE}/ +FILE_NAME=data.Parquet DATA_FILE=$USER_FILES_PATH/$FILE_NAME $CLICKHOUSE_CLIENT -q "select number as num, concat('Str: ', toString(number)) as str, [number, number + 1] as arr from numbers(10) format Parquet" > $DATA_FILE diff --git a/tests/queries/0_stateless/02151_hash_table_sizes_stats_joins.reference b/tests/queries/0_stateless/02151_hash_table_sizes_stats_joins.reference new file mode 100644 index 00000000000..d3d171221e8 --- /dev/null +++ b/tests/queries/0_stateless/02151_hash_table_sizes_stats_joins.reference @@ -0,0 +1,10 @@ +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02151_hash_table_sizes_stats_joins.sh b/tests/queries/0_stateless/02151_hash_table_sizes_stats_joins.sh new file mode 100755 index 00000000000..6d715604d93 --- /dev/null +++ b/tests/queries/0_stateless/02151_hash_table_sizes_stats_joins.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash +# Tags: long, distributed, no-debug, no-tsan, no-msan, no-ubsan, no-asan, no-random-settings, no-random-merge-tree-settings + +# shellcheck disable=SC2154 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +opts=( + --join_algorithm='parallel_hash' +) + +$CLICKHOUSE_CLIENT -nq " + CREATE TABLE t1(a UInt32, b UInt32) ENGINE=MergeTree ORDER BY (); + INSERT INTO t1 SELECT number, number FROM numbers_mt(1e6); + + CREATE TABLE t2(a UInt32, b UInt32) ENGINE=MergeTree ORDER BY (); + INSERT INTO t2 SELECT number, number FROM numbers_mt(1e6); +" + +queries_without_preallocation=() +queries_with_preallocation=() + +run_new_query() { + query_id1="hash_table_sizes_stats_joins_$RANDOM$RANDOM" + # when we see a query for the first time we only collect it stats when execution ends. preallocation will happen only on the next run + queries_without_preallocation+=("$query_id1") + $CLICKHOUSE_CLIENT "${opts[@]}" --query_id="$query_id1" -q "$1" --format Null + + query_id2="hash_table_sizes_stats_joins_$RANDOM$RANDOM" + queries_with_preallocation+=("$query_id2") + $CLICKHOUSE_CLIENT "${opts[@]}" --query_id="$query_id2" -q "$1" --format Null +} + +run_new_query "SELECT * FROM t1 AS x INNER JOIN t2 AS y ON x.a = y.a" +# it only matters what columns from the right table are part of the join key, as soon as we change them - it is a new cache entry +run_new_query "SELECT * FROM t1 AS x INNER JOIN t2 AS y ON x.a = y.b" +run_new_query "SELECT * FROM t1 AS x INNER JOIN t2 AS y USING (a, b)" + +# we already had a join on t2.a, so cache should be populated +query_id="hash_table_sizes_stats_joins_$RANDOM$RANDOM" +queries_with_preallocation+=("$query_id") +$CLICKHOUSE_CLIENT "${opts[@]}" --query_id="$query_id" -q "SELECT * FROM t1 AS x INNER JOIN t2 AS y ON x.b = y.a" --format Null +# the same query with a different alias for the t2 +query_id="hash_table_sizes_stats_joins_$RANDOM$RANDOM" +queries_with_preallocation+=("$query_id") +$CLICKHOUSE_CLIENT "${opts[@]}" --query_id="$query_id" -q "SELECT * FROM t1 AS x INNER JOIN t2 AS z ON x.b = z.a" --format Null + +# now t1 is the right table +run_new_query "SELECT * FROM t2 AS x INNER JOIN t1 AS y ON x.a = y.a" + +################################## + +$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" + +for i in "${!queries_without_preallocation[@]}"; do + $CLICKHOUSE_CLIENT --param_query_id="${queries_without_preallocation[$i]}" -q " + -- the old analyzer is not supported + SELECT sum(if(getSetting('allow_experimental_analyzer'), ProfileEvents['HashJoinPreallocatedElementsInHashTables'] = 0, 1)) + FROM system.query_log + WHERE event_date >= yesterday() AND query_id = {query_id:String} AND current_database = currentDatabase() AND type = 'QueryFinish' + " +done + +for i in "${!queries_with_preallocation[@]}"; do + $CLICKHOUSE_CLIENT --param_query_id="${queries_with_preallocation[$i]}" -q " + -- the old analyzer is not supported + SELECT sum(if(getSetting('allow_experimental_analyzer'), ProfileEvents['HashJoinPreallocatedElementsInHashTables'] > 0, 1)) + FROM system.query_log + WHERE event_date >= yesterday() AND query_id = {query_id:String} AND current_database = currentDatabase() AND type = 'QueryFinish' + " +done diff --git a/tests/queries/0_stateless/02167_format_from_file_extension.sh b/tests/queries/0_stateless/02167_format_from_file_extension.sh index 14985233524..0a0efff3228 100755 --- a/tests/queries/0_stateless/02167_format_from_file_extension.sh +++ b/tests/queries/0_stateless/02167_format_from_file_extension.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -7,28 +7,26 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function cleanup() { - # this command expects an error message like 'Code: 107. DB::Exception: Received <...> nonexist.txt doesn't exist. (FILE_DOESNT_EXIST)' - user_files_path=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}') - rm $user_files_path/test_02167.* + rm ${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}_test_02167.* } trap cleanup EXIT for format in TSV TabSeparated TSVWithNames TSVWithNamesAndTypes CSV Parquet ORC Arrow JSONEachRow JSONCompactEachRow CustomSeparatedWithNamesAndTypes do - $CLICKHOUSE_CLIENT -q "insert into table function file('test_02167.$format', 'auto', 'x UInt64') select * from numbers(2)" - $CLICKHOUSE_CLIENT -q "select * from file('test_02167.$format')" - $CLICKHOUSE_CLIENT -q "select * from file('test_02167.$format', '$format')" + $CLICKHOUSE_CLIENT -q "insert into table function file('${CLICKHOUSE_DATABASE}_test_02167.$format', 'auto', 'x UInt64') select * from numbers(2)" + $CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_DATABASE}_test_02167.$format')" + $CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_DATABASE}_test_02167.$format', '$format')" done -$CLICKHOUSE_CLIENT -q "insert into table function file('test_02167.bin', 'auto', 'x UInt64') select * from numbers(2)" -$CLICKHOUSE_CLIENT -q "select * from file('test_02167.bin', 'auto', 'x UInt64')" -$CLICKHOUSE_CLIENT -q "select * from file('test_02167.bin', 'RowBinary', 'x UInt64')" +$CLICKHOUSE_CLIENT -q "insert into table function file('${CLICKHOUSE_DATABASE}_test_02167.bin', 'auto', 'x UInt64') select * from numbers(2)" +$CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_DATABASE}_test_02167.bin', 'auto', 'x UInt64')" +$CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_DATABASE}_test_02167.bin', 'RowBinary', 'x UInt64')" -$CLICKHOUSE_CLIENT -q "insert into table function file('test_02167.ndjson', 'auto', 'x UInt64') select * from numbers(2)" -$CLICKHOUSE_CLIENT -q "select * from file('test_02167.ndjson')" -$CLICKHOUSE_CLIENT -q "select * from file('test_02167.ndjson', 'JSONEachRow', 'x UInt64')" +$CLICKHOUSE_CLIENT -q "insert into table function file('${CLICKHOUSE_DATABASE}_test_02167.ndjson', 'auto', 'x UInt64') select * from numbers(2)" +$CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_DATABASE}_test_02167.ndjson')" +$CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_DATABASE}_test_02167.ndjson', 'JSONEachRow', 'x UInt64')" -$CLICKHOUSE_CLIENT -q "insert into table function file('test_02167.messagepack', 'auto', 'x UInt64') select * from numbers(2)" -$CLICKHOUSE_CLIENT -q "select * from file('test_02167.messagepack') settings input_format_msgpack_number_of_columns=1" -$CLICKHOUSE_CLIENT -q "select * from file('test_02167.messagepack', 'MsgPack', 'x UInt64')" +$CLICKHOUSE_CLIENT -q "insert into table function file('${CLICKHOUSE_DATABASE}_test_02167.messagepack', 'auto', 'x UInt64') select * from numbers(2)" +$CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_DATABASE}_test_02167.messagepack') settings input_format_msgpack_number_of_columns=1" +$CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_DATABASE}_test_02167.messagepack', 'MsgPack', 'x UInt64')" diff --git a/tests/queries/0_stateless/02185_orc_corrupted_file.sh b/tests/queries/0_stateless/02185_orc_corrupted_file.sh index 12510ae3836..8cf4334845d 100755 --- a/tests/queries/0_stateless/02185_orc_corrupted_file.sh +++ b/tests/queries/0_stateless/02185_orc_corrupted_file.sh @@ -5,7 +5,6 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') cp $CUR_DIR/data_orc/corrupted.orc $USER_FILES_PATH/ ${CLICKHOUSE_CLIENT} --query="select * from file('corrupted.orc')" 2>&1 | grep -F -q 'CANNOT_EXTRACT_TABLE_STRUCTURE' && echo 'OK' || echo 'FAIL' diff --git a/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.sh b/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.sh index 0345a0e6394..dc3cb0de110 100755 --- a/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.sh +++ b/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -cp /etc/clickhouse-server/users.xml "$CURDIR"/users.xml +cp ${CLICKHOUSE_CONFIG_DIR}/users.xml "$CURDIR"/users.xml sed -i 's/<\/password>/c64c5e4e53ea1a9f1427d2713b3a22bbebe8940bc807adaf654744b1568c70ab<\/password_sha256_hex>/g' "$CURDIR"/users.xml sed -i 's//1<\/access_management>/g' "$CURDIR"/users.xml diff --git a/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh b/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh index d49c3610852..73d1c2b9b42 100755 --- a/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh +++ b/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - $CLICKHOUSE_CLIENT -q "insert into table function file(data.jsonl, 'JSONEachRow', 'x UInt32 default 42, y String') select number as x, 'String' as y from numbers(10)" $CLICKHOUSE_CLIENT -q "drop table if exists test" diff --git a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh index 9d87542d84d..d0e61541b15 100755 --- a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh +++ b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings, no-replicated-database +# Tags: no-fasttest, no-parallel, no-object-storage, no-random-settings, no-replicated-database # set -x diff --git a/tests/queries/0_stateless/02227_test_create_empty_sqlite_db.sh b/tests/queries/0_stateless/02227_test_create_empty_sqlite_db.sh index 37fdde95ea7..344452767cc 100755 --- a/tests/queries/0_stateless/02227_test_create_empty_sqlite_db.sh +++ b/tests/queries/0_stateless/02227_test_create_empty_sqlite_db.sh @@ -6,9 +6,6 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -# See 01658_read_file_to_string_column.sh -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - function cleanup() { ${CLICKHOUSE_CLIENT} --query="DROP DATABASE IF EXISTS ${CURR_DATABASE}" @@ -18,7 +15,7 @@ trap cleanup EXIT export CURR_DATABASE="test_01889_sqllite_${CLICKHOUSE_DATABASE}" -DB_PATH=${user_files_path}/${CURR_DATABASE}_db1 +DB_PATH=${USER_FILES_PATH}/${CURR_DATABASE}_db1 ${CLICKHOUSE_CLIENT} --multiquery --multiline --query=""" DROP DATABASE IF EXISTS ${CURR_DATABASE}; diff --git a/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql b/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql index ca1ee2738c7..72b6cc06f26 100644 --- a/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql +++ b/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql @@ -1,5 +1,5 @@ --- Tags: long, no-parallel, no-s3-storage --- no-s3-storage: Avoid flakiness due to cache / buffer usage +-- Tags: long, no-object-storage +-- no-object-storage: Avoid flakiness due to cache / buffer usage SET insert_keeper_fault_injection_probability=0; -- to succeed this test can require too many retries due to 100 partitions, so disable fault injections -- regression for MEMORY_LIMIT_EXCEEDED error because of deferred final part flush diff --git a/tests/queries/0_stateless/02233_optimize_aggregation_in_order_prefix.sql b/tests/queries/0_stateless/02233_optimize_aggregation_in_order_prefix.sql index 8bc75040e5a..48af5ae0031 100644 --- a/tests/queries/0_stateless/02233_optimize_aggregation_in_order_prefix.sql +++ b/tests/queries/0_stateless/02233_optimize_aggregation_in_order_prefix.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0; diff --git a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql index ee92931ec54..b791ee18e82 100644 --- a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql +++ b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest, no-s3-storage, no-random-settings +-- Tags: no-parallel, no-fasttest, no-object-storage, no-random-settings -- { echo } diff --git a/tests/queries/0_stateless/02240_filesystem_query_cache.sql b/tests/queries/0_stateless/02240_filesystem_query_cache.sql index a609702f22a..40c80e04697 100644 --- a/tests/queries/0_stateless/02240_filesystem_query_cache.sql +++ b/tests/queries/0_stateless/02240_filesystem_query_cache.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest, no-s3-storage, no-random-settings +-- Tags: no-parallel, no-fasttest, no-object-storage, no-random-settings -- { echo } diff --git a/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh b/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh index 57b8cec7864..8faf0a08f1f 100755 --- a/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh +++ b/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: long, no-fasttest, no-parallel, no-object-storage, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02240_tskv_schema_inference_bug.sh b/tests/queries/0_stateless/02240_tskv_schema_inference_bug.sh index ce545a27317..d4a4e54acbd 100755 --- a/tests/queries/0_stateless/02240_tskv_schema_inference_bug.sh +++ b/tests/queries/0_stateless/02240_tskv_schema_inference_bug.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh - -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') FILE_NAME=test_02240.data DATA_FILE=${USER_FILES_PATH:?}/$FILE_NAME diff --git a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh index 1028fba76f5..f8e7b7e7e72 100755 --- a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh +++ b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: long, no-fasttest, no-parallel, no-object-storage, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.sh b/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.sh index e03c62cfc5f..0f37bff45d6 100755 --- a/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.sh +++ b/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.sh @@ -6,7 +6,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') FILE_NAME=test_02242.data DATA_FILE=$USER_FILES_PATH/$FILE_NAME diff --git a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh index 7a665d81eab..fe016f5a27f 100755 --- a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh +++ b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: long, no-fasttest, no-parallel, no-object-storage, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh b/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh index 8ff6e28b123..7bfeb747cc2 100755 --- a/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh +++ b/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh @@ -5,7 +5,6 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') FILE_NAME=test_02245.parquet DATA_FILE=$USER_FILES_PATH/$FILE_NAME diff --git a/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.sh b/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.sh index 233db7a534d..07c2a33c4d5 100755 --- a/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.sh +++ b/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.sh @@ -1,12 +1,11 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') FILE_NAME=test_02149.data DATA_FILE=${USER_FILES_PATH:?}/$FILE_NAME diff --git a/tests/queries/0_stateless/02247_names_order_in_json_and_tskv.sh b/tests/queries/0_stateless/02247_names_order_in_json_and_tskv.sh index e8e3bf88ac4..3385f62af38 100755 --- a/tests/queries/0_stateless/02247_names_order_in_json_and_tskv.sh +++ b/tests/queries/0_stateless/02247_names_order_in_json_and_tskv.sh @@ -6,7 +6,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') FILE_NAME=test_02247.data DATA_FILE=${USER_FILES_PATH:?}/$FILE_NAME diff --git a/tests/queries/0_stateless/02247_read_bools_as_numbers_json.sh b/tests/queries/0_stateless/02247_read_bools_as_numbers_json.sh index 523b5934543..76133df2b37 100755 --- a/tests/queries/0_stateless/02247_read_bools_as_numbers_json.sh +++ b/tests/queries/0_stateless/02247_read_bools_as_numbers_json.sh @@ -1,13 +1,12 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -FILE_NAME=test_02247.data +FILE_NAME=${CLICKHOUSE_DATABASE}.data DATA_FILE=${USER_FILES_PATH:?}/$FILE_NAME touch $DATA_FILE diff --git a/tests/queries/0_stateless/02263_lazy_mark_load.sh b/tests/queries/0_stateless/02263_lazy_mark_load.sh index 5f80d9d7f6d..f1602e47e01 100755 --- a/tests/queries/0_stateless/02263_lazy_mark_load.sh +++ b/tests/queries/0_stateless/02263_lazy_mark_load.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-s3-storage, no-random-settings, no-parallel +# Tags: no-object-storage, no-random-settings, no-parallel set -eo pipefail CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/02270_errors_in_files.sh b/tests/queries/0_stateless/02270_errors_in_files.sh index 517547c6ef8..ab8bb28787d 100755 --- a/tests/queries/0_stateless/02270_errors_in_files.sh +++ b/tests/queries/0_stateless/02270_errors_in_files.sh @@ -13,8 +13,6 @@ echo "Error" > "${CLICKHOUSE_TMP}"/test_02270_2.csv ${CLICKHOUSE_LOCAL} --query "SELECT * FROM file('${CLICKHOUSE_TMP}/test_02270*.csv', CSV, 'a String, b String')" 2>&1 | grep -o "test_02270_2.csv" ${CLICKHOUSE_LOCAL} --query "SELECT * FROM file('${CLICKHOUSE_TMP}/test_02270*.csv', CSV, 'a String, b String')" --input_format_parallel_parsing 0 2>&1 | grep -o "test_02270_2.csv" -user_files_path=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}') - ${CLICKHOUSE_CLIENT} --query "INSERT INTO TABLE FUNCTION file('test_02270_1.csv') SELECT 'Hello', 'World'" ${CLICKHOUSE_CLIENT} --query "INSERT INTO TABLE FUNCTION file('test_02270_2.csv') SELECT 'Error'" @@ -27,9 +25,9 @@ ${CLICKHOUSE_CLIENT} --query "INSERT INTO TABLE FUNCTION file('test_02270_2.csv. ${CLICKHOUSE_CLIENT} --query "SELECT * FROM file('test_02270*.csv.gz', 'CSV', 'a String, b String')" 2>&1 | grep -o -m1 "test_02270_2.csv.gz" ${CLICKHOUSE_CLIENT} --query "SELECT * FROM file('test_02270*.csv.gz', 'CSV', 'a String, b String')" --input_format_parallel_parsing 0 2>&1 | grep -o -m1 "test_02270_2.csv.gz" -rm "${CLICKHOUSE_TMP}"/test_02270_1.csv -rm "${CLICKHOUSE_TMP}"/test_02270_2.csv -rm "${user_files_path}"/test_02270_1.csv -rm "${user_files_path}"/test_02270_2.csv -rm "${user_files_path}"/test_02270_1.csv.gz -rm "${user_files_path}"/test_02270_2.csv.gz +rm -f "${CLICKHOUSE_TMP}"/test_02270_1.csv +rm -f "${CLICKHOUSE_TMP}"/test_02270_2.csv +rm -f "${USER_FILES_PATH}"/test_02270_1.csv +rm -f "${USER_FILES_PATH}"/test_02270_2.csv +rm -f "${USER_FILES_PATH}"/test_02270_1.csv.gz +rm -f "${USER_FILES_PATH}"/test_02270_2.csv.gz diff --git a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh index a2c9352b7aa..32c9e9cb060 100755 --- a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh +++ b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: long, no-fasttest, no-parallel, no-object-storage, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02286_mysql_dump_input_format.sh b/tests/queries/0_stateless/02286_mysql_dump_input_format.sh index 2f6167c3ddf..59528d97b93 100755 --- a/tests/queries/0_stateless/02286_mysql_dump_input_format.sh +++ b/tests/queries/0_stateless/02286_mysql_dump_input_format.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - cp $CURDIR/data_mysql_dump/dump*.sql $USER_FILES_PATH $CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32), y Nullable(Int32)') order by x, y" diff --git a/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql b/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql index f723284ad61..151ff275f7b 100644 --- a/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql +++ b/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql @@ -12,9 +12,9 @@ SELECT * FROM t_tuple_numeric FORMAT JSONEachRow; SELECT `t`.`1`.`2`, `t`.`1`.`3`, `t`.`4` FROM t_tuple_numeric; SELECT t.1.1, t.1.2, t.2 FROM t_tuple_numeric; -SELECT t.1.3 FROM t_tuple_numeric; -- {serverError NOT_FOUND_COLUMN_IN_BLOCK} -SELECT t.4 FROM t_tuple_numeric; -- {serverError NOT_FOUND_COLUMN_IN_BLOCK} -SELECT `t`.`1`.`1`, `t`.`1`.`2`, `t`.`2` FROM t_tuple_numeric; -- {serverError UNKNOWN_IDENTIFIER} +SELECT t.1.3 FROM t_tuple_numeric; -- {serverError NOT_FOUND_COLUMN_IN_BLOCK, ARGUMENT_OUT_OF_BOUND} +SELECT t.4 FROM t_tuple_numeric; -- {serverError NOT_FOUND_COLUMN_IN_BLOCK, ARGUMENT_OUT_OF_BOUND} +SELECT `t`.`1`.`1`, `t`.`1`.`2`, `t`.`2` FROM t_tuple_numeric; -- {serverError UNKNOWN_IDENTIFIER, ARGUMENT_OUT_OF_BOUND} DROP TABLE t_tuple_numeric; diff --git a/tests/queries/0_stateless/02293_formats_json_columns.sh b/tests/queries/0_stateless/02293_formats_json_columns.sh index 4eae5a1abb4..bf605d6f591 100755 --- a/tests/queries/0_stateless/02293_formats_json_columns.sh +++ b/tests/queries/0_stateless/02293_formats_json_columns.sh @@ -5,8 +5,6 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -USER_FILES_PATH=$(clickhouse client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - DATA_FILE=$USER_FILES_PATH/data_02293 $CLICKHOUSE_CLIENT -q "drop table if exists test_02293" diff --git a/tests/queries/0_stateless/02294_nothing_arguments_in_functions.sql b/tests/queries/0_stateless/02294_nothing_arguments_in_functions.sql index 4406a05df0c..ecf4f9cab93 100644 --- a/tests/queries/0_stateless/02294_nothing_arguments_in_functions.sql +++ b/tests/queries/0_stateless/02294_nothing_arguments_in_functions.sql @@ -1,3 +1,5 @@ +set enable_named_columns_in_function_tuple = 0; + select arrayMap(x -> 2 * x, []); select toTypeName(arrayMap(x -> 2 * x, [])); select arrayMap((x, y) -> x + y, [], []); diff --git a/tests/queries/0_stateless/02297_regex_parsing_file_names.sh b/tests/queries/0_stateless/02297_regex_parsing_file_names.sh index 5973e24844a..666cb4d87fc 100755 --- a/tests/queries/0_stateless/02297_regex_parsing_file_names.sh +++ b/tests/queries/0_stateless/02297_regex_parsing_file_names.sh @@ -5,29 +5,21 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Data preparation. +rm -rf ${USER_FILES_PATH}/file_{0..10}.csv -# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -CLICKHOUSE_USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path, _file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +echo '0' > ${USER_FILES_PATH}/file_0.csv +echo '0' > ${USER_FILES_PATH}/file_1.csv +echo '0' > ${USER_FILES_PATH}/file_2.csv +echo '0' > ${USER_FILES_PATH}/file_3.csv +echo '0' > ${USER_FILES_PATH}/file_4.csv +echo '0' > ${USER_FILES_PATH}/file_5.csv +echo '0' > ${USER_FILES_PATH}/file_6.csv +echo '0' > ${USER_FILES_PATH}/file_7.csv +echo '0' > ${USER_FILES_PATH}/file_8.csv +echo '0' > ${USER_FILES_PATH}/file_9.csv +echo '0' > ${USER_FILES_PATH}/file_10.csv -mkdir -p ${CLICKHOUSE_USER_FILES_PATH}/ - -rm -rf ${CLICKHOUSE_USER_FILES_PATH}/file_{0..10}.csv - -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_0.csv -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_1.csv -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_2.csv -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_3.csv -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_4.csv -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_5.csv -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_6.csv -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_7.csv -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_8.csv -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_9.csv -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_10.csv - -# echo '' > ${CLICKHOUSE_USER_FILES_PATH}/file_10.csv +# echo '' > ${USER_FILES_PATH}/file_10.csv ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_regex;" @@ -36,5 +28,5 @@ ${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_regex (id UInt64) ENGINE = MergeTree() o ${CLICKHOUSE_CLIENT} -q "INSERT INTO t_regex SELECT * FROM file('file_{0..10}.csv','CSV');" ${CLICKHOUSE_CLIENT} -q "SELECT count() from t_regex;" -rm -rf ${CLICKHOUSE_USER_FILES_PATH}/file_{0..10}.csv; +rm -rf ${USER_FILES_PATH}/file_{0..10}.csv; ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_regex;" diff --git a/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh b/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh index fbaec1ffaa7..b54e3d7f805 100755 --- a/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh +++ b/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: long, no-fasttest, no-parallel, no-object-storage, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh b/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh index 650faf6985e..89e5c827a48 100755 --- a/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh +++ b/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh @@ -5,10 +5,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') touch $USER_FILES_PATH/data.capnp -SCHEMADIR=$($CLICKHOUSE_CLIENT_BINARY --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") +SCHEMADIR=${CLICKHOUSE_SCHEMA_FILES} CLIENT_SCHEMADIR=$CURDIR/format_schemas SERVER_SCHEMADIR=test_02327 mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR diff --git a/tests/queries/0_stateless/02336_sparse_columns_s3.sql b/tests/queries/0_stateless/02336_sparse_columns_s3.sql index bf4622adedc..1dc1e980846 100644 --- a/tests/queries/0_stateless/02336_sparse_columns_s3.sql +++ b/tests/queries/0_stateless/02336_sparse_columns_s3.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest, no-s3-storage +-- Tags: no-parallel, no-fasttest, no-object-storage DROP TABLE IF EXISTS t_sparse_s3; diff --git a/tests/queries/0_stateless/02343_aggregation_pipeline.sql b/tests/queries/0_stateless/02343_aggregation_pipeline.sql index d73ac66763e..0f9dbd0247d 100644 --- a/tests/queries/0_stateless/02343_aggregation_pipeline.sql +++ b/tests/queries/0_stateless/02343_aggregation_pipeline.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage -- produces different pipeline if enabled set enable_memory_bound_merging_of_aggregation_results = 0; diff --git a/tests/queries/0_stateless/02350_views_max_insert_threads.sql b/tests/queries/0_stateless/02350_views_max_insert_threads.sql index 25e0fdeadba..a4f7e2546ed 100644 --- a/tests/queries/0_stateless/02350_views_max_insert_threads.sql +++ b/tests/queries/0_stateless/02350_views_max_insert_threads.sql @@ -10,7 +10,7 @@ create materialized view t_mv Engine = Null AS select now() as ts, max(a) from t insert into t select * from numbers_mt(10e6) settings max_threads = 16, max_insert_threads=16, max_block_size=100000; system flush logs; -select arrayUniq(thread_ids)>=16 from system.query_log where +select peak_threads_usage>=16 from system.query_log where event_date >= yesterday() and current_database = currentDatabase() and type = 'QueryFinish' and diff --git a/tests/queries/0_stateless/02353_compression_level.sh b/tests/queries/0_stateless/02353_compression_level.sh index 8d6a9c899ad..9e102d12fed 100755 --- a/tests/queries/0_stateless/02353_compression_level.sh +++ b/tests/queries/0_stateless/02353_compression_level.sh @@ -1,12 +1,11 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel +# Tags: no-fasttest # Tag no-fasttest: depends on brotli and bzip2 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') WORKING_FOLDER_02353="${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}" rm -rf "${WORKING_FOLDER_02353}" diff --git a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql index a5a3da82324..105fb500461 100644 --- a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql +++ b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql @@ -1,4 +1,4 @@ --- Tags: long, no-tsan, no-msan, no-asan, no-ubsan, no-debug, no-s3-storage +-- Tags: long, no-tsan, no-msan, no-asan, no-ubsan, no-debug, no-object-storage DROP TABLE IF EXISTS t_2354_dist_with_external_aggr; diff --git a/tests/queries/0_stateless/02358_file_default_value.sh b/tests/queries/0_stateless/02358_file_default_value.sh index a7c4c17c129..0fd97a09546 100755 --- a/tests/queries/0_stateless/02358_file_default_value.sh +++ b/tests/queries/0_stateless/02358_file_default_value.sh @@ -4,7 +4,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') WORKING_FOLDER_02357="${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}" rm -rf "${WORKING_FOLDER_02357}" diff --git a/tests/queries/0_stateless/02360_clickhouse_local_config-option.sh b/tests/queries/0_stateless/02360_clickhouse_local_config-option.sh index b58cfd7ec21..50e07ca8612 100755 --- a/tests/queries/0_stateless/02360_clickhouse_local_config-option.sh +++ b/tests/queries/0_stateless/02360_clickhouse_local_config-option.sh @@ -15,7 +15,7 @@ echo " true - 9000 + ${CLICKHOUSE_PORT_TCP} ${SAFE_DIR} diff --git a/tests/queries/0_stateless/02361_fsync_profile_events.sh b/tests/queries/0_stateless/02361_fsync_profile_events.sh index e150d70b896..98c9cf9b7b4 100755 --- a/tests/queries/0_stateless/02361_fsync_profile_events.sh +++ b/tests/queries/0_stateless/02361_fsync_profile_events.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Tags: no-s3-storage, no-random-merge-tree-settings -# Tag no-s3-storage: s3 does not have fsync +# Tags: no-object-storage, no-random-merge-tree-settings +# Tag no-object-storage: s3 does not have fsync CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02366_kql_func_binary.reference b/tests/queries/0_stateless/02366_kql_func_binary.reference index 6276cd6d867..360c1aa9899 100644 --- a/tests/queries/0_stateless/02366_kql_func_binary.reference +++ b/tests/queries/0_stateless/02366_kql_func_binary.reference @@ -1,7 +1,4 @@ -- binary functions 4 7 -1 -1 -1 7 3 1 diff --git a/tests/queries/0_stateless/02366_kql_func_binary.sql b/tests/queries/0_stateless/02366_kql_func_binary.sql index 824022b564c..687f3afb5ee 100644 --- a/tests/queries/0_stateless/02366_kql_func_binary.sql +++ b/tests/queries/0_stateless/02366_kql_func_binary.sql @@ -1,8 +1,5 @@ set dialect='kusto'; print ' -- binary functions'; print binary_and(4,7), binary_or(4,7); -print binary_shift_left(1, 1) == binary_shift_left(1, 65); -print binary_shift_right(2, 1) == binary_shift_right(2, 65); -print binary_shift_right(binary_shift_left(1, 65), 65) == 1; print binary_xor(2, 5), bitset_count_ones(42); print bitset_count_ones(binary_shift_left(binary_and(4,7), 1)); diff --git a/tests/queries/0_stateless/02372_data_race_in_avro.sh b/tests/queries/0_stateless/02372_data_race_in_avro.sh index 50a7ae1e3c5..49c34e31923 100755 --- a/tests/queries/0_stateless/02372_data_race_in_avro.sh +++ b/tests/queries/0_stateless/02372_data_race_in_avro.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02373_heap_buffer_overflow_in_avro.sh b/tests/queries/0_stateless/02373_heap_buffer_overflow_in_avro.sh index 3461287d28a..e95bda2adfb 100755 --- a/tests/queries/0_stateless/02373_heap_buffer_overflow_in_avro.sh +++ b/tests/queries/0_stateless/02373_heap_buffer_overflow_in_avro.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - cp $CURDIR/data_avro/corrupted.avro $USER_FILES_PATH/ $CLICKHOUSE_CLIENT -q "select * from file(corrupted.avro)" 2>&1 | grep -F -q "Cannot read compressed data" && echo "OK" || echo "FAIL" diff --git a/tests/queries/0_stateless/02378_analyzer_projection_names.reference b/tests/queries/0_stateless/02378_analyzer_projection_names.reference index fd5bc7d4ae8..532414f117c 100644 --- a/tests/queries/0_stateless/02378_analyzer_projection_names.reference +++ b/tests/queries/0_stateless/02378_analyzer_projection_names.reference @@ -443,6 +443,18 @@ SELECT '--'; -- DESCRIBE (WITH test_table_in_cte AS (SELECT id FROM test_table) SELECT id IN test_table_in_cte FROM test_table); in(id, test_table_in_cte) UInt8 +SELECT '--'; +-- +DESCRIBE (WITH test_table_in_cte_1 AS (SELECT 1 AS c1), test_table_in_cte_2 AS (SELECT 1 AS c1) SELECT * +FROM test_table_in_cte_1 INNER JOIN test_table_in_cte_2 as test_table_in_cte_2 ON test_table_in_cte_1.c1 = test_table_in_cte_2.c1); +test_table_in_cte_1.c1 UInt8 +test_table_in_cte_2.c1 UInt8 +SELECT '--'; +-- +DESCRIBE (WITH test_table_in_cte_1 AS (SELECT 1 AS c1), test_table_in_cte_2 AS (SELECT 1 AS c1 UNION ALL SELECT 1 AS c1) SELECT * +FROM test_table_in_cte_1 INNER JOIN test_table_in_cte_2 as test_table_in_cte_2 ON test_table_in_cte_1.c1 = test_table_in_cte_2.c1); +test_table_in_cte_1.c1 UInt8 +test_table_in_cte_2.c1 UInt8 SELECT 'Joins'; Joins DESCRIBE (SELECT * FROM test_table_join_1, test_table_join_2); diff --git a/tests/queries/0_stateless/02378_analyzer_projection_names.sql b/tests/queries/0_stateless/02378_analyzer_projection_names.sql index f5ac5f7476f..f41afe6a950 100644 --- a/tests/queries/0_stateless/02378_analyzer_projection_names.sql +++ b/tests/queries/0_stateless/02378_analyzer_projection_names.sql @@ -408,6 +408,16 @@ SELECT '--'; DESCRIBE (WITH test_table_in_cte AS (SELECT id FROM test_table) SELECT id IN test_table_in_cte FROM test_table); +SELECT '--'; + +DESCRIBE (WITH test_table_in_cte_1 AS (SELECT 1 AS c1), test_table_in_cte_2 AS (SELECT 1 AS c1) SELECT * +FROM test_table_in_cte_1 INNER JOIN test_table_in_cte_2 as test_table_in_cte_2 ON test_table_in_cte_1.c1 = test_table_in_cte_2.c1); + +SELECT '--'; + +DESCRIBE (WITH test_table_in_cte_1 AS (SELECT 1 AS c1), test_table_in_cte_2 AS (SELECT 1 AS c1 UNION ALL SELECT 1 AS c1) SELECT * +FROM test_table_in_cte_1 INNER JOIN test_table_in_cte_2 as test_table_in_cte_2 ON test_table_in_cte_1.c1 = test_table_in_cte_2.c1); + SELECT 'Joins'; DESCRIBE (SELECT * FROM test_table_join_1, test_table_join_2); diff --git a/tests/queries/0_stateless/02381_client_prints_server_side_time.sh b/tests/queries/0_stateless/02381_client_prints_server_side_time.sh index e6cd63da95d..81376ee3791 100755 --- a/tests/queries/0_stateless/02381_client_prints_server_side_time.sh +++ b/tests/queries/0_stateless/02381_client_prints_server_side_time.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-s3-storage +# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-object-storage CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02383_arrow_dict_special_cases.sh b/tests/queries/0_stateless/02383_arrow_dict_special_cases.sh index 80743a97dd0..ae08941da63 100755 --- a/tests/queries/0_stateless/02383_arrow_dict_special_cases.sh +++ b/tests/queries/0_stateless/02383_arrow_dict_special_cases.sh @@ -5,7 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') UNIQ_DEST_PATH=$USER_FILES_PATH/test-02383-$RANDOM-$RANDOM mkdir -p $UNIQ_DEST_PATH diff --git a/tests/queries/0_stateless/02402_capnp_format_segments_overflow.sh b/tests/queries/0_stateless/02402_capnp_format_segments_overflow.sh index 8aad68ffe5c..3028451a5f5 100755 --- a/tests/queries/0_stateless/02402_capnp_format_segments_overflow.sh +++ b/tests/queries/0_stateless/02402_capnp_format_segments_overflow.sh @@ -5,11 +5,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') mkdir -p $USER_FILES_PATH/test_02402 cp $CURDIR/data_capnp/overflow.capnp $USER_FILES_PATH/test_02402/ -SCHEMADIR=$($CLICKHOUSE_CLIENT_BINARY --query "select * from file('test_02402/overflow.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") +SCHEMADIR=${CLICKHOUSE_SCHEMA_FILES} CLIENT_SCHEMADIR=$CURDIR/format_schemas SERVER_SCHEMADIR=test_02402 diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index a152066a460..8dd8910c858 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -876,7 +876,6 @@ tryBase58Decode tumble tumbleEnd tumbleStart -tuple tupleConcat tupleDivide tupleDivideByNumber diff --git a/tests/queries/0_stateless/02421_record_errors_row_by_input_format.sh b/tests/queries/0_stateless/02421_record_errors_row_by_input_format.sh index df304eeeba5..72b55144348 100755 --- a/tests/queries/0_stateless/02421_record_errors_row_by_input_format.sh +++ b/tests/queries/0_stateless/02421_record_errors_row_by_input_format.sh @@ -9,10 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # Data preparation. -CLICKHOUSE_USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path, _file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -mkdir -p ${CLICKHOUSE_USER_FILES_PATH}/ -echo -e "1,1\n2,a\nb,3\n4,4\n5,c\n6,6" > ${CLICKHOUSE_USER_FILES_PATH}/a.csv +echo -e "1,1\n2,a\nb,3\n4,4\n5,c\n6,6" > ${USER_FILES_PATH}/a.csv ${CLICKHOUSE_CLIENT} --query "drop table if exists data;" ${CLICKHOUSE_CLIENT} --query "create table data (A UInt8, B UInt8) engine=MergeTree() order by A;" @@ -23,12 +20,12 @@ sleep 2 ${CLICKHOUSE_CLIENT} --query "select * except (time) from file('errors_server', 'CSV', 'time DateTime, database Nullable(String), table Nullable(String), offset UInt32, reason String, raw_data String');" # Client side -${CLICKHOUSE_CLIENT} --input_format_allow_errors_num 4 --input_format_record_errors_file_path "${CLICKHOUSE_USER_FILES_PATH}/errors_client" --query "insert into data(A, B) format CSV" < ${CLICKHOUSE_USER_FILES_PATH}/a.csv +${CLICKHOUSE_CLIENT} --input_format_allow_errors_num 4 --input_format_record_errors_file_path "${USER_FILES_PATH}/errors_client" --query "insert into data(A, B) format CSV" < ${USER_FILES_PATH}/a.csv sleep 2 ${CLICKHOUSE_CLIENT} --query "select * except (time) from file('errors_client', 'CSV', 'time DateTime, database Nullable(String), table Nullable(String), offset UInt32, reason String, raw_data String');" # Restore ${CLICKHOUSE_CLIENT} --query "drop table if exists data;" -rm ${CLICKHOUSE_USER_FILES_PATH}/a.csv -rm ${CLICKHOUSE_USER_FILES_PATH}/errors_server -rm ${CLICKHOUSE_USER_FILES_PATH}/errors_client +rm ${USER_FILES_PATH}/a.csv +rm ${USER_FILES_PATH}/errors_server +rm ${USER_FILES_PATH}/errors_client diff --git a/tests/queries/0_stateless/02422_allow_implicit_no_password.sh b/tests/queries/0_stateless/02422_allow_implicit_no_password.sh index 013c367e079..3c433856be2 100755 --- a/tests/queries/0_stateless/02422_allow_implicit_no_password.sh +++ b/tests/queries/0_stateless/02422_allow_implicit_no_password.sh @@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -cp /etc/clickhouse-server/users.xml "$CURDIR"/users.xml +cp ${CLICKHOUSE_CONFIG_DIR}/users.xml "$CURDIR"/users.xml sed -i 's/<\/password>/c64c5e4e53ea1a9f1427d2713b3a22bbebe8940bc807adaf654744b1568c70ab<\/password_sha256_hex>/g' "$CURDIR"/users.xml sed -i 's//1<\/access_management>/g' "$CURDIR"/users.xml diff --git a/tests/queries/0_stateless/02447_drop_database_replica.reference b/tests/queries/0_stateless/02447_drop_database_replica.reference index bd3b689ca3c..d997b7ba830 100644 --- a/tests/queries/0_stateless/02447_drop_database_replica.reference +++ b/tests/queries/0_stateless/02447_drop_database_replica.reference @@ -12,11 +12,21 @@ t 2 rdb_default 1 1 s1 r1 1 2 -2 -2 +skip inactive s1 r1 OK 2 0 s1 r2 QUEUED 2 0 s2 r1 QUEUED 2 0 +s1 r1 OK 2 0 +s1 r2 QUEUED 2 0 +s2 r1 QUEUED 2 0 +timeout on active +2 +2 +s1 r1 OK 3 0 +s1 r2 QUEUED 3 0 +s2 r1 QUEUED 3 0 +s9 r9 QUEUED 3 0 +drop replica 2 rdb_default 1 1 s1 r1 1 rdb_default 1 2 s1 r2 0 @@ -24,6 +34,9 @@ rdb_default 1 2 s1 r2 0 2 t t2 +t22 t3 +t33 t4 +t44 rdb_default_4 1 1 s1 r1 1 diff --git a/tests/queries/0_stateless/02447_drop_database_replica.sh b/tests/queries/0_stateless/02447_drop_database_replica.sh index 1604d527f2b..93a5fcee8e2 100755 --- a/tests/queries/0_stateless/02447_drop_database_replica.sh +++ b/tests/queries/0_stateless/02447_drop_database_replica.sh @@ -33,10 +33,27 @@ $CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num, database_shard_na $CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from database $db2" 2>&1| grep -Fac "is active, cannot drop it" # Also check that it doesn't exceed distributed_ddl_task_timeout waiting for inactive replicas -timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=none_only_active -q "create table $db.t2 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED" -timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t3 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED" +echo 'skip inactive' +timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=none_only_active -q "create table $db.t2 (n int) engine=Log" +timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t3 (n int) engine=Log" | sort timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=null_status_on_timeout_only_active -q "create table $db.t4 (n int) engine=Log" | sort +# And that it still throws TIMEOUT_EXCEEDED for active replicas +echo 'timeout on active' +db9="${db}_9" +$CLICKHOUSE_CLIENT -q "create database $db9 engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's9', 'r9')" +$CLICKHOUSE_CLIENT -q "detach database $db9" +$CLICKHOUSE_CLIENT -q "insert into system.zookeeper(name, path, value) values ('active', '/test/$CLICKHOUSE_DATABASE/rdb/replicas/s9|r9', '$($CLICKHOUSE_CLIENT -q "select serverUUID()")')" + +$CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=5 --distributed_ddl_output_mode=none_only_active -q "create table $db.t22 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED" +$CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=5 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t33 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED" +$CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=5 --distributed_ddl_output_mode=null_status_on_timeout_only_active -q "create table $db.t44 (n int) engine=Log" | sort + +$CLICKHOUSE_CLIENT -q "attach database $db9" +$CLICKHOUSE_CLIENT -q "drop database $db9" + +echo 'drop replica' + $CLICKHOUSE_CLIENT -q "detach database $db3" $CLICKHOUSE_CLIENT -q "system drop database replica 'r1' from shard 's2' from database $db" $CLICKHOUSE_CLIENT -q "attach database $db3" 2>/dev/null diff --git a/tests/queries/0_stateless/02454_create_table_with_custom_disk.sql b/tests/queries/0_stateless/02454_create_table_with_custom_disk.sql index 6cb1c0774aa..a2d46cf6d1b 100644 --- a/tests/queries/0_stateless/02454_create_table_with_custom_disk.sql +++ b/tests/queries/0_stateless/02454_create_table_with_custom_disk.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage, no-replicated-database +-- Tags: no-object-storage, no-replicated-database DROP TABLE IF EXISTS test; diff --git a/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh b/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh index 05de3f05562..7906f2917c4 100755 --- a/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh +++ b/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh @@ -5,11 +5,9 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}') +cp "$CUR_DIR"/data_csv/10m_rows.csv.xz $USER_FILES_PATH/${CLICKHOUSE_DATABASE}_10m_rows.csv.xz -cp "$CUR_DIR"/data_csv/10m_rows.csv.xz $USER_FILES_PATH/ +${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('${CLICKHOUSE_DATABASE}_10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings input_format_parallel_parsing=1, max_threads=1, max_parsing_threads=16, min_chunk_bytes_for_parallel_parsing=10485760, max_memory_usage=1000000000" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('${CLICKHOUSE_DATABASE}_10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings input_format_parallel_parsing=1, max_threads=1, max_parsing_threads=16, min_chunk_bytes_for_parallel_parsing=10485760, max_memory_usage=100000000" -${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings input_format_parallel_parsing=1, max_threads=1, max_parsing_threads=16, min_chunk_bytes_for_parallel_parsing=10485760, max_memory_usage=1000000000" -${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings input_format_parallel_parsing=1, max_threads=1, max_parsing_threads=16, min_chunk_bytes_for_parallel_parsing=10485760, max_memory_usage=100000000" - -rm $USER_FILES_PATH/10m_rows.csv.xz +rm $USER_FILES_PATH/${CLICKHOUSE_DATABASE}_10m_rows.csv.xz diff --git a/tests/queries/0_stateless/02457_bz2_concatenated.sh b/tests/queries/0_stateless/02457_bz2_concatenated.sh index 96e23cbfa2a..a9991cf44e7 100755 --- a/tests/queries/0_stateless/02457_bz2_concatenated.sh +++ b/tests/queries/0_stateless/02457_bz2_concatenated.sh @@ -6,7 +6,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') WORKING_FOLDER_02457="${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}" rm -rf "${WORKING_FOLDER_02457}" diff --git a/tests/queries/0_stateless/02459_glob_for_recursive_directory_traversal.sh b/tests/queries/0_stateless/02459_glob_for_recursive_directory_traversal.sh index b8430307ea3..b86385b72c4 100755 --- a/tests/queries/0_stateless/02459_glob_for_recursive_directory_traversal.sh +++ b/tests/queries/0_stateless/02459_glob_for_recursive_directory_traversal.sh @@ -5,28 +5,26 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -mkdir $user_files_path/d1 -touch $user_files_path/d1/text1.txt +mkdir $USER_FILES_PATH/d1 +touch $USER_FILES_PATH/d1/text1.txt for i in {1..2} do - echo $i$'\t'$i >> $user_files_path/d1/text1.txt + echo $i$'\t'$i >> $USER_FILES_PATH/d1/text1.txt done -mkdir $user_files_path/d1/d2 -touch $user_files_path/d1/d2/text2.txt +mkdir $USER_FILES_PATH/d1/d2 +touch $USER_FILES_PATH/d1/d2/text2.txt for i in {3..4} do - echo $i$'\t'$i >> $user_files_path/d1/d2/text2.txt + echo $i$'\t'$i >> $USER_FILES_PATH/d1/d2/text2.txt done -mkdir $user_files_path/d1/d2/d3 -touch $user_files_path/d1/d2/d3/text3.txt +mkdir $USER_FILES_PATH/d1/d2/d3 +touch $USER_FILES_PATH/d1/d2/d3/text3.txt for i in {5..6} do - echo $i$'\t'$i >> $user_files_path/d1/d2/d3/text3.txt + echo $i$'\t'$i >> $USER_FILES_PATH/d1/d2/d3/text3.txt done ${CLICKHOUSE_CLIENT} -q "SELECT * from file ('d1/*','TSV', 'Index UInt8, Number UInt8')" | sort --numeric-sort @@ -35,9 +33,9 @@ ${CLICKHOUSE_CLIENT} -q "SELECT * from file ('d1/*/tex*','TSV', 'Index UInt8, Nu ${CLICKHOUSE_CLIENT} -q "SELECT * from file ('d1/**/tex*','TSV', 'Index UInt8, Number UInt8')" | sort --numeric-sort -rm $user_files_path/d1/d2/d3/text3.txt -rmdir $user_files_path/d1/d2/d3 -rm $user_files_path/d1/d2/text2.txt -rmdir $user_files_path/d1/d2 -rm $user_files_path/d1/text1.txt -rmdir $user_files_path/d1 +rm $USER_FILES_PATH/d1/d2/d3/text3.txt +rmdir $USER_FILES_PATH/d1/d2/d3 +rm $USER_FILES_PATH/d1/d2/text2.txt +rmdir $USER_FILES_PATH/d1/d2 +rm $USER_FILES_PATH/d1/text1.txt +rmdir $USER_FILES_PATH/d1 diff --git a/tests/queries/0_stateless/02475_bson_each_row_format.sh b/tests/queries/0_stateless/02475_bson_each_row_format.sh index 474a6cd0e47..2975b40c868 100755 --- a/tests/queries/0_stateless/02475_bson_each_row_format.sh +++ b/tests/queries/0_stateless/02475_bson_each_row_format.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-debug +# Tags: no-debug CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh b/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh index 6899b31d1d9..935ea03a947 100755 --- a/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh +++ b/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh @@ -38,7 +38,7 @@ ${CLICKHOUSE_CLIENT} -q "CREATE TABLE test_race_condition_landing (number Int64, export -f drop_mv; export -f insert; -TIMEOUT=55 +TIMEOUT=50 for i in {1..4} do diff --git a/tests/queries/0_stateless/02482_capnp_list_of_structs.sh b/tests/queries/0_stateless/02482_capnp_list_of_structs.sh index 9d78b9893dd..a04c631c411 100755 --- a/tests/queries/0_stateless/02482_capnp_list_of_structs.sh +++ b/tests/queries/0_stateless/02482_capnp_list_of_structs.sh @@ -5,10 +5,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') touch $USER_FILES_PATH/data.capnp -SCHEMADIR=$($CLICKHOUSE_CLIENT_BINARY --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") +SCHEMADIR=${CLICKHOUSE_SCHEMA_FILES} CLIENT_SCHEMADIR=$CURDIR/format_schemas SERVER_SCHEMADIR=test_02482 mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR diff --git a/tests/queries/0_stateless/02483_capnp_decimals.sh b/tests/queries/0_stateless/02483_capnp_decimals.sh index ef545a5539f..bc19b63fc8b 100755 --- a/tests/queries/0_stateless/02483_capnp_decimals.sh +++ b/tests/queries/0_stateless/02483_capnp_decimals.sh @@ -5,19 +5,17 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -touch $USER_FILES_PATH/data.capnp +touch $CLICKHOUSE_USER_FILES/data.capnp -SCHEMADIR=$($CLICKHOUSE_CLIENT_BINARY --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") CLIENT_SCHEMADIR=$CURDIR/format_schemas SERVER_SCHEMADIR=test_02483 -mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR -cp -r $CLIENT_SCHEMADIR/02483_* $SCHEMADIR/$SERVER_SCHEMADIR/ +mkdir -p $CLICKHOUSE_SCHEMA_FILES/$SERVER_SCHEMADIR +cp -r $CLIENT_SCHEMADIR/02483_* $CLICKHOUSE_SCHEMA_FILES/$SERVER_SCHEMADIR/ $CLICKHOUSE_CLIENT -q "insert into function file(02483_data.capnp, auto, 'decimal32 Decimal32(3), decimal64 Decimal64(6)') select 42.42, 4242.424242 settings format_schema='$SERVER_SCHEMADIR/02483_decimals.capnp:Message', engine_file_truncate_on_insert=1" $CLICKHOUSE_CLIENT -q "select * from file(02483_data.capnp) settings format_schema='$SERVER_SCHEMADIR/02483_decimals.capnp:Message'" $CLICKHOUSE_CLIENT -q "select * from file(02483_data.capnp, auto, 'decimal64 Decimal64(6), decimal32 Decimal32(3)') settings format_schema='$SERVER_SCHEMADIR/02483_decimals.capnp:Message'" -rm $USER_FILES_PATH/data.capnp -rm $USER_FILES_PATH/02483_data.capnp +rm $CLICKHOUSE_USER_FILES/data.capnp +rm $CLICKHOUSE_USER_FILES/02483_data.capnp diff --git a/tests/queries/0_stateless/02494_query_cache_empty_tuple.reference b/tests/queries/0_stateless/02494_query_cache_empty_tuple.reference new file mode 100644 index 00000000000..50e44edaecb --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_empty_tuple.reference @@ -0,0 +1,2 @@ +() 0 +() 0 diff --git a/tests/queries/0_stateless/02494_query_cache_empty_tuple.sql b/tests/queries/0_stateless/02494_query_cache_empty_tuple.sql new file mode 100644 index 00000000000..8e133143ef8 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_empty_tuple.sql @@ -0,0 +1,2 @@ +SELECT tuple(), 0 FROM numbers(1) SETTINGS use_query_cache = true; +SELECT tuple(), 0 FROM numbers(1) SETTINGS use_query_cache = true; diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference index 4a4e898c5bd..77ef213b36d 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference @@ -332,12 +332,13 @@ SETTINGS optimize_aggregators_of_group_by_keys=0 -- avoid removing any() as it d Expression (Projection) Sorting (Sorting for ORDER BY) Expression (Before ORDER BY) - Filter (((WHERE + (Projection + Before ORDER BY)) + HAVING)) - Aggregating - Expression ((Before GROUP BY + Projection)) - Sorting (Sorting for ORDER BY) - Expression ((Before ORDER BY + (Projection + Before ORDER BY))) - ReadFromSystemNumbers + Filter ((WHERE + (Projection + Before ORDER BY))) + Filter (HAVING) + Aggregating + Expression ((Before GROUP BY + Projection)) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + Before ORDER BY))) + ReadFromSystemNumbers -- execute 1 2 diff --git a/tests/queries/0_stateless/02497_trace_events_stress_long.sh b/tests/queries/0_stateless/02497_trace_events_stress_long.sh index c111ed40a29..dfd2f12b55b 100755 --- a/tests/queries/0_stateless/02497_trace_events_stress_long.sh +++ b/tests/queries/0_stateless/02497_trace_events_stress_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-parallel, no-tsan, no-asan, no-debug, no-s3-storage, no-fasttest, no-replicated-database +# Tags: long, no-parallel, no-tsan, no-asan, no-debug, no-object-storage, no-fasttest, no-replicated-database set -e diff --git a/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh b/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh index 4f3fd0e54f6..5aeab4c746e 100755 --- a/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh +++ b/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest, no-s3-storage, no-random-settings +# Tags: no-parallel, no-fasttest, no-object-storage, no-random-settings CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none diff --git a/tests/queries/0_stateless/02504_regexp_dictionary_ua_parser.sh b/tests/queries/0_stateless/02504_regexp_dictionary_ua_parser.sh index d3a8743b880..e389cf410e8 100755 --- a/tests/queries/0_stateless/02504_regexp_dictionary_ua_parser.sh +++ b/tests/queries/0_stateless/02504_regexp_dictionary_ua_parser.sh @@ -1,18 +1,15 @@ #!/usr/bin/env bash - -# Tags: no-fasttest, no-parallel +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +mkdir -p ${USER_FILES_PATH:?}/${CLICKHOUSE_DATABASE} -mkdir -p $user_files_path/test_02504 - -cp $CURDIR/data_ua_parser/os.yaml ${user_files_path}/test_02504/ -cp $CURDIR/data_ua_parser/browser.yaml ${user_files_path}/test_02504/ -cp $CURDIR/data_ua_parser/device.yaml ${user_files_path}/test_02504/ +cp $CURDIR/data_ua_parser/os.yaml ${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}/ +cp $CURDIR/data_ua_parser/browser.yaml ${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}/ +cp $CURDIR/data_ua_parser/device.yaml ${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}/ $CLICKHOUSE_CLIENT -n --query=" drop dictionary if exists regexp_os; @@ -29,7 +26,7 @@ create dictionary regexp_os os_v4_replacement String default '0' ) PRIMARY KEY(regex) -SOURCE(YAMLRegExpTree(PATH '${user_files_path}/test_02504/os.yaml')) +SOURCE(YAMLRegExpTree(PATH '${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}/os.yaml')) LIFETIME(0) LAYOUT(regexp_tree); @@ -41,7 +38,7 @@ create dictionary regexp_browser v2_replacement String default '0' ) PRIMARY KEY(regex) -SOURCE(YAMLRegExpTree(PATH '${user_files_path}/test_02504/browser.yaml')) +SOURCE(YAMLRegExpTree(PATH '${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}/browser.yaml')) LIFETIME(0) LAYOUT(regexp_tree); @@ -53,7 +50,7 @@ create dictionary regexp_device model_replacement String ) PRIMARY KEY(regex) -SOURCE(YAMLRegExpTree(PATH '${user_files_path}/test_02504/device.yaml')) +SOURCE(YAMLRegExpTree(PATH '${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}/device.yaml')) LIFETIME(0) LAYOUT(regexp_tree); @@ -84,4 +81,4 @@ drop dictionary if exists regexp_device; drop table if exists user_agents; " -rm -rf "$user_files_path/test_02504" +rm -rf ${USER_FILES_PATH:?}/${CLICKHOUSE_DATABASE} diff --git a/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.sh b/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.sh index 7211372f2f7..68a87a14320 100755 --- a/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.sh +++ b/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.sh @@ -1,13 +1,10 @@ #!/usr/bin/env bash - # Tags: use-vectorscan, no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - mkdir -p $USER_FILES_PATH/test_02504 yaml=$USER_FILES_PATH/test_02504/test.yaml diff --git a/tests/queries/0_stateless/02521_aggregation_by_partitions.sql b/tests/queries/0_stateless/02521_aggregation_by_partitions.sql index 55723360c38..b4d31e234d8 100644 --- a/tests/queries/0_stateless/02521_aggregation_by_partitions.sql +++ b/tests/queries/0_stateless/02521_aggregation_by_partitions.sql @@ -1,4 +1,4 @@ --- Tags: long, no-s3-storage +-- Tags: long, no-object-storage SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0; diff --git a/tests/queries/0_stateless/02532_send_logs_level_test.sh b/tests/queries/0_stateless/02532_send_logs_level_test.sh index 4afc6d4496b..71f42e2a6db 100755 --- a/tests/queries/0_stateless/02532_send_logs_level_test.sh +++ b/tests/queries/0_stateless/02532_send_logs_level_test.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Tags: no-s3-storage, no-debug -# - no-s3-storage - S3 has additional logging +# Tags: no-object-storage, no-debug +# - no-object-storage - S3 has additional logging # - no-debug - debug builds also has additional logging CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference deleted file mode 100644 index 8d0f56ba185..00000000000 --- a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference +++ /dev/null @@ -1,173 +0,0 @@ -query='SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key)' with custom_key='sipHash64(x)' -filter_type='default' max_replicas=1 prefer_localhost_replica=0 -Hello -filter_type='default' max_replicas=2 prefer_localhost_replica=0 -Hello -filter_type='default' max_replicas=3 prefer_localhost_replica=0 -Hello -filter_type='range' max_replicas=1 prefer_localhost_replica=0 -Hello -filter_type='range' max_replicas=2 prefer_localhost_replica=0 -Hello -filter_type='range' max_replicas=3 prefer_localhost_replica=0 -Hello -filter_type='default' max_replicas=1 prefer_localhost_replica=1 -Hello -filter_type='default' max_replicas=2 prefer_localhost_replica=1 -Hello -filter_type='default' max_replicas=3 prefer_localhost_replica=1 -Hello -filter_type='range' max_replicas=1 prefer_localhost_replica=1 -Hello -filter_type='range' max_replicas=2 prefer_localhost_replica=1 -Hello -filter_type='range' max_replicas=3 prefer_localhost_replica=1 -Hello -query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) GROUP BY y ORDER BY y' with custom_key='y' -filter_type='default' max_replicas=1 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=2 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=3 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=1 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=2 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=3 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=1 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=2 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=3 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=1 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=2 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=3 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) GROUP BY y ORDER BY y' with custom_key='cityHash64(y)' -filter_type='default' max_replicas=1 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=2 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=3 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=1 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=2 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=3 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=1 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=2 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=3 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=1 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=2 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=3 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) GROUP BY y ORDER BY y' with custom_key='cityHash64(y) + 1' -filter_type='default' max_replicas=1 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=2 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=3 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=1 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=2 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=3 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=1 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=2 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=3 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=1 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=2 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=3 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -1 diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh deleted file mode 100755 index dccb680be42..00000000000 --- a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-parallel, long - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -function run_with_custom_key { - echo "query='$1' with custom_key='$2'" - for prefer_localhost_replica in 0 1; do - for filter_type in 'default' 'range'; do - for max_replicas in {1..3}; do - echo "filter_type='$filter_type' max_replicas=$max_replicas prefer_localhost_replica=$prefer_localhost_replica" - query="$1 SETTINGS max_parallel_replicas=$max_replicas\ - , parallel_replicas_custom_key='$2'\ - , parallel_replicas_custom_key_filter_type='$filter_type'\ - , prefer_localhost_replica=$prefer_localhost_replica" - $CLICKHOUSE_CLIENT --query="$query" - done - done - done -} - -$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS 02535_custom_key"; - -$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key (x String) ENGINE = MergeTree ORDER BY x"; -$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key VALUES ('Hello')"; - -run_with_custom_key "SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key)" "sipHash64(x)" - -$CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key" - -$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key (x String, y UInt32) ENGINE = MergeTree ORDER BY cityHash64(x)" -$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key SELECT toString(number), number % 3 FROM numbers(1000)" - -function run_count_with_custom_key { - run_with_custom_key "SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) GROUP BY y ORDER BY y" "$1" -} - -run_count_with_custom_key "y" -run_count_with_custom_key "cityHash64(y)" -run_count_with_custom_key "cityHash64(y) + 1" - -$CLICKHOUSE_CLIENT --query="SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) as t1 JOIN 02535_custom_key USING y" --allow_repeated_settings --parallel_replicas_custom_key="y" --send_logs_level="trace" 2>&1 | grep -Fac "JOINs are not supported with" - -$CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key" diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_mt.reference b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_mt.reference new file mode 100644 index 00000000000..1bb07f0d916 --- /dev/null +++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_mt.reference @@ -0,0 +1,177 @@ +query='SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt)' with custom_key='sipHash64(x)' +filter_type='default' max_replicas=1 +Hello +filter_type='default' max_replicas=2 +Hello +filter_type='default' max_replicas=3 +Hello +filter_type='range' max_replicas=1 +Hello +filter_type='range' max_replicas=2 +Hello +filter_type='range' max_replicas=3 +Hello +query='SELECT * FROM 02535_custom_key_mt' with custom_key='sipHash64(x)' +filter_type='default' max_replicas=1 +Hello +filter_type='default' max_replicas=2 +Hello +filter_type='default' max_replicas=3 +Hello +filter_type='range' max_replicas=1 +Hello +filter_type='range' max_replicas=2 +Hello +filter_type='range' max_replicas=3 +Hello +query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt) GROUP BY y ORDER BY y' with custom_key='y' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt) GROUP BY y ORDER BY y' with custom_key='cityHash64(y)' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt) GROUP BY y ORDER BY y' with custom_key='cityHash64(y) + 1' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM 02535_custom_key_mt GROUP BY y ORDER BY y' with custom_key='y' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM 02535_custom_key_mt GROUP BY y ORDER BY y' with custom_key='cityHash64(y)' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM 02535_custom_key_mt GROUP BY y ORDER BY y' with custom_key='cityHash64(y) + 1' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +1 diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_mt.sh b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_mt.sh new file mode 100755 index 00000000000..fad43ea9070 --- /dev/null +++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_mt.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# Tags: no-parallel, long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +function run_with_custom_key { + echo "query='$1' with custom_key='$2'" + for filter_type in 'default' 'range'; do + for max_replicas in {1..3}; do + echo "filter_type='$filter_type' max_replicas=$max_replicas" + query="$1 SETTINGS max_parallel_replicas=$max_replicas\ +, parallel_replicas_custom_key='$2'\ +, parallel_replicas_custom_key_filter_type='$filter_type'\ +, parallel_replicas_for_non_replicated_merge_tree=1 \ +, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost'" + $CLICKHOUSE_CLIENT --query="$query" + done + done +} + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS 02535_custom_key_mt"; + +$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key_mt (x String) ENGINE = MergeTree ORDER BY x"; +$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key_mt VALUES ('Hello')"; + +run_with_custom_key "SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt)" "sipHash64(x)" +run_with_custom_key "SELECT * FROM 02535_custom_key_mt" "sipHash64(x)" + +$CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key_mt" + +$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key_mt (x String, y UInt32) ENGINE = MergeTree ORDER BY cityHash64(x)" +$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key_mt SELECT toString(number), number % 3 FROM numbers(1000)" + +function run_count_with_custom_key_distributed { + run_with_custom_key "SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt) GROUP BY y ORDER BY y" "$1" +} + +run_count_with_custom_key_distributed "y" +run_count_with_custom_key_distributed "cityHash64(y)" +run_count_with_custom_key_distributed "cityHash64(y) + 1" + +function run_count_with_custom_key_merge_tree { + run_with_custom_key "SELECT y, count() FROM 02535_custom_key_mt GROUP BY y ORDER BY y" "$1" +} + +run_count_with_custom_key_merge_tree "y" +run_count_with_custom_key_merge_tree "cityHash64(y)" +run_count_with_custom_key_merge_tree "cityHash64(y) + 1" + +$CLICKHOUSE_CLIENT --query="SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt) as t1 JOIN 02535_custom_key_mt USING y" --allow_repeated_settings --parallel_replicas_custom_key="y" --send_logs_level="trace" 2>&1 | grep -Fac "JOINs are not supported with" + +$CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key_mt" diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_rmt.reference b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_rmt.reference new file mode 100644 index 00000000000..c6526b506d3 --- /dev/null +++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_rmt.reference @@ -0,0 +1,177 @@ +query='SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt)' with custom_key='sipHash64(x)' +filter_type='default' max_replicas=1 +Hello +filter_type='default' max_replicas=2 +Hello +filter_type='default' max_replicas=3 +Hello +filter_type='range' max_replicas=1 +Hello +filter_type='range' max_replicas=2 +Hello +filter_type='range' max_replicas=3 +Hello +query='SELECT * FROM 02535_custom_key_rmt' with custom_key='sipHash64(x)' +filter_type='default' max_replicas=1 +Hello +filter_type='default' max_replicas=2 +Hello +filter_type='default' max_replicas=3 +Hello +filter_type='range' max_replicas=1 +Hello +filter_type='range' max_replicas=2 +Hello +filter_type='range' max_replicas=3 +Hello +query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt_hash) GROUP BY y ORDER BY y' with custom_key='y' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt_hash) GROUP BY y ORDER BY y' with custom_key='cityHash64(y)' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt_hash) GROUP BY y ORDER BY y' with custom_key='cityHash64(y) + 1' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM 02535_custom_key_rmt_hash GROUP BY y ORDER BY y' with custom_key='y' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM 02535_custom_key_rmt_hash GROUP BY y ORDER BY y' with custom_key='cityHash64(y)' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM 02535_custom_key_rmt_hash GROUP BY y ORDER BY y' with custom_key='cityHash64(y) + 1' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +1 diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_rmt.sh b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_rmt.sh new file mode 100755 index 00000000000..6350f5027f9 --- /dev/null +++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_rmt.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# Tags: no-parallel, long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +function run_with_custom_key { + echo "query='$1' with custom_key='$2'" + for filter_type in 'default' 'range'; do + for max_replicas in {1..3}; do + echo "filter_type='$filter_type' max_replicas=$max_replicas" + query="$1 SETTINGS max_parallel_replicas=$max_replicas\ +, parallel_replicas_custom_key='$2'\ +, parallel_replicas_custom_key_filter_type='$filter_type'\ +, parallel_replicas_for_non_replicated_merge_tree=1 \ +, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost'" + $CLICKHOUSE_CLIENT --query="$query" + done + done +} + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS 02535_custom_key_rmt"; + +$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key_rmt (x String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_02535', 'r1') ORDER BY x"; +$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key_rmt VALUES ('Hello')"; + +run_with_custom_key "SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt)" "sipHash64(x)" +run_with_custom_key "SELECT * FROM 02535_custom_key_rmt" "sipHash64(x)" + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS 02535_custom_key_rmt_hash"; + +$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key_rmt_hash (x String, y UInt32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_02535_hash', 'r1') ORDER BY cityHash64(x)" +$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key_rmt_hash SELECT toString(number), number % 3 FROM numbers(1000)" + +function run_count_with_custom_key { + run_with_custom_key "SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt_hash) GROUP BY y ORDER BY y" "$1" +} + +run_count_with_custom_key "y" +run_count_with_custom_key "cityHash64(y)" +run_count_with_custom_key "cityHash64(y) + 1" + +function run_count_with_custom_key_merge_tree { + run_with_custom_key "SELECT y, count() FROM 02535_custom_key_rmt_hash GROUP BY y ORDER BY y" "$1" +} + +run_count_with_custom_key_merge_tree "y" +run_count_with_custom_key_merge_tree "cityHash64(y)" +run_count_with_custom_key_merge_tree "cityHash64(y) + 1" + +$CLICKHOUSE_CLIENT --query="SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt_hash) as t1 JOIN 02535_custom_key_rmt_hash USING y" --allow_repeated_settings --parallel_replicas_custom_key="y" --send_logs_level="trace" 2>&1 | grep -Fac "JOINs are not supported with" + +$CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key_rmt_hash" diff --git a/tests/queries/0_stateless/02541_tuple_element_with_null.sql b/tests/queries/0_stateless/02541_tuple_element_with_null.sql index d2062b60d49..e1581ce3755 100644 --- a/tests/queries/0_stateless/02541_tuple_element_with_null.sql +++ b/tests/queries/0_stateless/02541_tuple_element_with_null.sql @@ -9,7 +9,7 @@ SETTINGS index_granularity = 8192; INSERT INTO test_tuple_element VALUES (tuple(1,2)), (tuple(NULL, 3)); -SELECT +SELECT tupleElement(tuple, 'k1', 0) fine_k1_with_0, tupleElement(tuple, 'k1', NULL) k1_with_null, tupleElement(tuple, 'k2', 0) k2_with_0, diff --git a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference index 70bcd7f255b..9bb0c022752 100644 --- a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference +++ b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference @@ -29,16 +29,20 @@ WHERE type_1 = \'all\' ExpressionTransform × 2 (Filter) FilterTransform × 2 - (Aggregating) - ExpressionTransform × 2 - AggregatingTransform × 2 - Copy 1 → 2 - (Expression) - ExpressionTransform - (Expression) - ExpressionTransform - (ReadFromMergeTree) - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 + (Filter) + FilterTransform × 2 + (Filter) + FilterTransform × 2 + (Aggregating) + ExpressionTransform × 2 + AggregatingTransform × 2 + Copy 1 → 2 + (Expression) + ExpressionTransform + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 (Expression) ExpressionTransform × 2 (Filter) @@ -64,10 +68,14 @@ ExpressionTransform × 2 ExpressionTransform × 2 AggregatingTransform × 2 Copy 1 → 2 - (Expression) - ExpressionTransform - (ReadFromMergeTree) - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 + (Filter) + FilterTransform + (Filter) + FilterTransform + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 (Expression) ExpressionTransform × 2 (Aggregating) diff --git a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.sql b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.sql index 0891f1aa8a2..f926b9037d2 100644 --- a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.sql +++ b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage DROP TABLE IF EXISTS test_grouping_sets_predicate; diff --git a/tests/queries/0_stateless/02560_vertical_merge_memory_usage.sql b/tests/queries/0_stateless/02560_vertical_merge_memory_usage.sql index 785fb10f70b..361305bac6d 100644 --- a/tests/queries/0_stateless/02560_vertical_merge_memory_usage.sql +++ b/tests/queries/0_stateless/02560_vertical_merge_memory_usage.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage drop table if exists tvm; create table tvm (c0 UInt64, c1 UInt64, c2 UInt64, c3 UInt64, c4 UInt64, c5 UInt64, c6 UInt64, c7 UInt64, c8 UInt64, c9 UInt64, c10 UInt64, c11 UInt64, c12 UInt64, c13 UInt64, c14 UInt64, c15 UInt64, c16 UInt64, c17 UInt64, c18 UInt64, c19 UInt64, c20 UInt64, c21 UInt64, c22 UInt64, c23 UInt64, c24 UInt64, c25 UInt64, c26 UInt64, c27 UInt64, c28 UInt64, c29 UInt64, c30 UInt64, c31 UInt64, c32 UInt64, c33 UInt64, c34 UInt64, c35 UInt64, c36 UInt64, c37 UInt64, c38 UInt64, c39 UInt64, c40 UInt64, c41 UInt64, c42 UInt64, c43 UInt64, c44 UInt64, c45 UInt64, c46 UInt64, c47 UInt64, c48 UInt64, c49 UInt64, c50 UInt64, c51 UInt64, c52 UInt64, c53 UInt64, c54 UInt64, c55 UInt64, c56 UInt64, c57 UInt64, c58 UInt64, c59 UInt64, c60 UInt64, c61 UInt64, c62 UInt64, c63 UInt64, c64 UInt64, c65 UInt64, c66 UInt64, c67 UInt64, c68 UInt64, c69 UInt64, c70 UInt64, c71 UInt64, c72 UInt64, c73 UInt64, c74 UInt64, c75 UInt64, c76 UInt64, c77 UInt64, c78 UInt64, c79 UInt64, c80 UInt64, c81 UInt64, c82 UInt64, c83 UInt64, c84 UInt64, c85 UInt64, c86 UInt64, c87 UInt64, c88 UInt64, c89 UInt64, c90 UInt64, c91 UInt64, c92 UInt64, c93 UInt64, c94 UInt64, c95 UInt64, c96 UInt64, c97 UInt64, c98 UInt64, c99 UInt64, c100 UInt64, c101 UInt64, c102 UInt64, c103 UInt64, c104 UInt64, c105 UInt64, c106 UInt64, c107 UInt64, c108 UInt64, c109 UInt64, c110 UInt64, c111 UInt64, c112 UInt64, c113 UInt64, c114 UInt64, c115 UInt64, c116 UInt64, c117 UInt64, c118 UInt64, c119 UInt64, c120 UInt64, c121 UInt64, c122 UInt64, c123 UInt64, c124 UInt64, c125 UInt64, c126 UInt64, c127 UInt64, c128 UInt64, c129 UInt64, c130 UInt64, c131 UInt64, c132 UInt64, c133 UInt64, c134 UInt64, c135 UInt64, c136 UInt64, c137 UInt64, c138 UInt64, c139 UInt64, c140 UInt64, c141 UInt64, c142 UInt64, c143 UInt64, c144 UInt64, c145 UInt64, c146 UInt64, c147 UInt64, c148 UInt64, c149 UInt64, c150 UInt64, c151 UInt64, c152 UInt64, c153 UInt64, c154 UInt64, c155 UInt64, c156 UInt64, c157 UInt64, c158 UInt64, c159 UInt64, c160 UInt64, c161 UInt64, c162 UInt64, c163 UInt64, c164 UInt64, c165 UInt64, c166 UInt64, c167 UInt64, c168 UInt64, c169 UInt64, c170 UInt64, c171 UInt64, c172 UInt64, c173 UInt64, c174 UInt64, c175 UInt64, c176 UInt64, c177 UInt64, c178 UInt64, c179 UInt64, c180 UInt64, c181 UInt64, c182 UInt64, c183 UInt64, c184 UInt64, c185 UInt64, c186 UInt64, c187 UInt64, c188 UInt64, c189 UInt64, c190 UInt64, c191 UInt64, c192 UInt64, c193 UInt64, c194 UInt64, c195 UInt64, c196 UInt64, c197 UInt64, c198 UInt64, c199 UInt64, c200 UInt64, c201 UInt64, c202 UInt64, c203 UInt64, c204 UInt64, c205 UInt64, c206 UInt64, c207 UInt64, c208 UInt64, c209 UInt64, c210 UInt64, c211 UInt64, c212 UInt64, c213 UInt64, c214 UInt64, c215 UInt64, c216 UInt64, c217 UInt64, c218 UInt64, c219 UInt64, c220 UInt64, c221 UInt64, c222 UInt64, c223 UInt64, c224 UInt64, c225 UInt64, c226 UInt64, c227 UInt64, c228 UInt64, c229 UInt64, c230 UInt64, c231 UInt64, c232 UInt64, c233 UInt64, c234 UInt64, c235 UInt64, c236 UInt64, c237 UInt64, c238 UInt64, c239 UInt64, c240 UInt64, c241 UInt64, c242 UInt64, c243 UInt64, c244 UInt64, c245 UInt64, c246 UInt64, c247 UInt64, c248 UInt64, c249 UInt64, c250 UInt64, c251 UInt64, c252 UInt64, c253 UInt64, c254 UInt64, c255 UInt64, c256 UInt64, c257 UInt64, c258 UInt64, c259 UInt64, c260 UInt64, c261 UInt64, c262 UInt64, c263 UInt64, c264 UInt64, c265 UInt64, c266 UInt64, c267 UInt64, c268 UInt64, c269 UInt64, c270 UInt64, c271 UInt64, c272 UInt64, c273 UInt64, c274 UInt64, c275 UInt64, c276 UInt64, c277 UInt64, c278 UInt64, c279 UInt64, c280 UInt64, c281 UInt64, c282 UInt64, c283 UInt64, c284 UInt64, c285 UInt64, c286 UInt64, c287 UInt64, c288 UInt64, c289 UInt64, c290 UInt64, c291 UInt64, c292 UInt64, c293 UInt64, c294 UInt64, c295 UInt64, c296 UInt64, c297 UInt64, c298 UInt64, c299 UInt64) engine = MergeTree order by tuple() settings min_rows_for_wide_part = 10, min_bytes_for_wide_part=0, vertical_merge_algorithm_min_rows_to_activate=1; diff --git a/tests/queries/0_stateless/02582_async_reading_with_small_limit.sql b/tests/queries/0_stateless/02582_async_reading_with_small_limit.sql index cb6b1b6083e..406cab82183 100644 --- a/tests/queries/0_stateless/02582_async_reading_with_small_limit.sql +++ b/tests/queries/0_stateless/02582_async_reading_with_small_limit.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0; diff --git a/tests/queries/0_stateless/02661_read_from_archive.lib b/tests/queries/0_stateless/02661_read_from_archive.lib index 908b6bd38d2..56f1a0f163c 100644 --- a/tests/queries/0_stateless/02661_read_from_archive.lib +++ b/tests/queries/0_stateless/02661_read_from_archive.lib @@ -6,10 +6,10 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh function read_archive_file() { - $CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/$1') ORDER BY 1, 2" - $CLICKHOUSE_CLIENT --query "SELECT * FROM file('${user_files_path}/$1') ORDER BY 1, 2" - $CLICKHOUSE_CLIENT --query "DESC file('${user_files_path}/$1')" - $CLICKHOUSE_CLIENT --query "CREATE TABLE 02661_archive_table Engine=File('CSV', '${user_files_path}/$1')" + $CLICKHOUSE_LOCAL --query "SELECT * FROM file('${USER_FILES_PATH}/$1') ORDER BY 1, 2" + $CLICKHOUSE_CLIENT --query "SELECT * FROM file('${USER_FILES_PATH}/$1') ORDER BY 1, 2" + $CLICKHOUSE_CLIENT --query "DESC file('${USER_FILES_PATH}/$1')" + $CLICKHOUSE_CLIENT --query "CREATE TABLE 02661_archive_table Engine=File('CSV', '${USER_FILES_PATH}/$1')" $CLICKHOUSE_CLIENT --query "SELECT * FROM 02661_archive_table ORDER BY 1, 2" $CLICKHOUSE_CLIENT --query "DROP TABLE 02661_archive_table" } @@ -20,16 +20,14 @@ function run_archive_test() { extension_without_dot=$(echo $1 | sed -e 's/\.//g') FILE_PREFIX="02661_read_from_archive_${CLICKHOUSE_DATABASE}_$extension_without_dot" - user_files_path=$(clickhouse-client --query "select _path, _file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -o "/[^[:space:]]*nonexist.txt" | awk '{gsub("/nonexist.txt","",$1); print $1}') - touch ${FILE_PREFIX}_data0.csv echo -e "1,2\n3,4" > ${FILE_PREFIX}_data1.csv echo -e "5,6\n7,8" > ${FILE_PREFIX}_data2.csv echo -e "9,10\n11,12" > ${FILE_PREFIX}_data3.csv - eval "$2 ${user_files_path}/${FILE_PREFIX}_archive1.$1 ${FILE_PREFIX}_data0.csv ${FILE_PREFIX}_data1.csv ${FILE_PREFIX}_data2.csv > /dev/null" - eval "$2 ${user_files_path}/${FILE_PREFIX}_archive2.$1 ${FILE_PREFIX}_data1.csv ${FILE_PREFIX}_data3.csv > /dev/null" - eval "$2 ${user_files_path}/${FILE_PREFIX}_archive3.$1 ${FILE_PREFIX}_data2.csv ${FILE_PREFIX}_data3.csv > /dev/null" + eval "$2 ${USER_FILES_PATH}/${FILE_PREFIX}_archive1.$1 ${FILE_PREFIX}_data0.csv ${FILE_PREFIX}_data1.csv ${FILE_PREFIX}_data2.csv > /dev/null" + eval "$2 ${USER_FILES_PATH}/${FILE_PREFIX}_archive2.$1 ${FILE_PREFIX}_data1.csv ${FILE_PREFIX}_data3.csv > /dev/null" + eval "$2 ${USER_FILES_PATH}/${FILE_PREFIX}_archive3.$1 ${FILE_PREFIX}_data2.csv ${FILE_PREFIX}_data3.csv > /dev/null" echo "archive1 data1.csv" read_archive_file "${FILE_PREFIX}_archive1.$1 :: ${FILE_PREFIX}_data1.csv" @@ -44,10 +42,10 @@ function run_archive_test() { echo "archive* {2..3}.csv" read_archive_file "${FILE_PREFIX}_archive*.$1 :: ${FILE_PREFIX}_data{2..3}.csv" - $CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/${FILE_PREFIX}_archive1.$1::nonexistent.csv')" 2>&1 | grep -q "CANNOT_EXTRACT_TABLE_STRUCTURE" && echo "OK" || echo "FAIL" - $CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/${FILE_PREFIX}_archive3.$1::{2..3}.csv')" 2>&1 | grep -q "CANNOT_EXTRACT_TABLE_STRUCTURE" && echo "OK" || echo "FAIL" + $CLICKHOUSE_LOCAL --query "SELECT * FROM file('${USER_FILES_PATH}/${FILE_PREFIX}_archive1.$1::nonexistent.csv')" 2>&1 | grep -q "CANNOT_EXTRACT_TABLE_STRUCTURE" && echo "OK" || echo "FAIL" + $CLICKHOUSE_LOCAL --query "SELECT * FROM file('${USER_FILES_PATH}/${FILE_PREFIX}_archive3.$1::{2..3}.csv')" 2>&1 | grep -q "CANNOT_EXTRACT_TABLE_STRUCTURE" && echo "OK" || echo "FAIL" - rm ${user_files_path}/${FILE_PREFIX}_archive{1..3}.$1 + rm ${USER_FILES_PATH}/${FILE_PREFIX}_archive{1..3}.$1 rm ${FILE_PREFIX}_data{0..3}.csv } diff --git a/tests/queries/0_stateless/02703_keeper_map_concurrent_create_drop.sh b/tests/queries/0_stateless/02703_keeper_map_concurrent_create_drop.sh index 17d1fa92377..3e629ece33f 100755 --- a/tests/queries/0_stateless/02703_keeper_map_concurrent_create_drop.sh +++ b/tests/queries/0_stateless/02703_keeper_map_concurrent_create_drop.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-ordinary-database, zookeeper, no-fasttest, no-parallel +# Tags: no-ordinary-database, zookeeper, no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -37,7 +37,7 @@ function create_drop_loop() export -f create_drop_loop; THREADS=10 -TIMEOUT=30 +TIMEOUT=20 for i in `seq $THREADS` do diff --git a/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh b/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh index c78cd202f1b..6f43c1ae869 100755 --- a/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh +++ b/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-s3-storage, no-random-settings, no-random-merge-tree-settings +# Tags: no-object-storage, no-random-settings, no-random-merge-tree-settings CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh b/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh index 31cf6e9606e..4f6a300c5b3 100755 --- a/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh +++ b/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-s3-storage +# Tags: no-object-storage CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02704_max_backup_bandwidth.sh b/tests/queries/0_stateless/02704_max_backup_bandwidth.sh index 748bf856deb..8cb03a93a7a 100755 --- a/tests/queries/0_stateless/02704_max_backup_bandwidth.sh +++ b/tests/queries/0_stateless/02704_max_backup_bandwidth.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-s3-storage, no-random-settings, no-random-merge-tree-settings +# Tags: no-object-storage, no-random-settings, no-random-merge-tree-settings CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02722_database_filesystem.sh b/tests/queries/0_stateless/02722_database_filesystem.sh index 374dd246c96..2d0ff256c95 100755 --- a/tests/queries/0_stateless/02722_database_filesystem.sh +++ b/tests/queries/0_stateless/02722_database_filesystem.sh @@ -5,12 +5,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# see 01658_read_file_to_stringcolumn.sh -CLICKHOUSE_USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path, _file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - # Prepare data unique_name=${CLICKHOUSE_TEST_UNIQUE_NAME} -user_files_tmp_dir=${CLICKHOUSE_USER_FILES_PATH}/${unique_name} +user_files_tmp_dir=${USER_FILES_PATH}/${unique_name} mkdir -p ${user_files_tmp_dir}/tmp/ echo '"id","str","int","text"' > ${user_files_tmp_dir}/tmp.csv echo '1,"abc",123,"abacaba"' >> ${user_files_tmp_dir}/tmp.csv diff --git a/tests/queries/0_stateless/02724_decompress_filename_exception.sh b/tests/queries/0_stateless/02724_decompress_filename_exception.sh index e413910b934..8b5a2f23aa9 100755 --- a/tests/queries/0_stateless/02724_decompress_filename_exception.sh +++ b/tests/queries/0_stateless/02724_decompress_filename_exception.sh @@ -5,7 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') FILENAME="${USER_FILES_PATH}/corrupted_file.tsv.xx" echo 'corrupted file' > $FILENAME; diff --git a/tests/queries/0_stateless/02725_memory-for-merges.sql b/tests/queries/0_stateless/02725_memory-for-merges.sql index 1a8402dff4b..8e4d4f5b3e0 100644 --- a/tests/queries/0_stateless/02725_memory-for-merges.sql +++ b/tests/queries/0_stateless/02725_memory-for-merges.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage, no-random-merge-tree-settings +-- Tags: no-object-storage, no-random-merge-tree-settings -- We allocate a lot of memory for buffers when reading or writing to S3 DROP TABLE IF EXISTS 02725_memory_for_merges SYNC; diff --git a/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh b/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh index eef52002e36..78659b70129 100755 --- a/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh +++ b/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-s3-storage +# Tags: no-fasttest, no-object-storage CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02732_rename_after_processing.sh b/tests/queries/0_stateless/02732_rename_after_processing.sh index 9d44ff9fc34..c3f2274570e 100755 --- a/tests/queries/0_stateless/02732_rename_after_processing.sh +++ b/tests/queries/0_stateless/02732_rename_after_processing.sh @@ -4,12 +4,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# see 01658_read_file_to_stringcolumn.sh -CLICKHOUSE_USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path, _file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - # Prepare data unique_name=${CLICKHOUSE_TEST_UNIQUE_NAME} -tmp_dir=${CLICKHOUSE_USER_FILES_PATH}/${unique_name} +tmp_dir=${USER_FILES_PATH}/${unique_name} mkdir -p $tmp_dir rm -rf ${tmp_dir:?}/* diff --git a/tests/queries/0_stateless/02766_bitshift_with_const_arguments.sql b/tests/queries/0_stateless/02766_bitshift_with_const_arguments.sql index 6b2961f0555..91e8624057c 100644 --- a/tests/queries/0_stateless/02766_bitshift_with_const_arguments.sql +++ b/tests/queries/0_stateless/02766_bitshift_with_const_arguments.sql @@ -10,7 +10,7 @@ DROP TABLE IF EXISTS t1; CREATE TABLE t0 (vkey UInt32, pkey UInt32, c0 UInt32) engine = TinyLog; CREATE TABLE t1 (vkey UInt32) ENGINE = AggregatingMergeTree ORDER BY vkey; INSERT INTO t0 VALUES (15, 25000, 58); -SELECT ref_5.pkey AS c_2_c2392_6 FROM t0 AS ref_5 WHERE 'J[' < multiIf(ref_5.pkey IN ( SELECT 1 ), bitShiftLeft(multiIf(ref_5.c0 > NULL, '1', ')'), 40), NULL); +SELECT ref_5.pkey AS c_2_c2392_6 FROM t0 AS ref_5 WHERE 'J[' < multiIf(ref_5.pkey IN ( SELECT 1 ), bitShiftLeft(multiIf(ref_5.c0 > NULL, '1', ')'), 40), NULL); -- { serverError ARGUMENT_OUT_OF_BOUND } DROP TABLE t0; DROP TABLE t1; diff --git a/tests/queries/0_stateless/02771_multidirectory_globs_storage_file.sh b/tests/queries/0_stateless/02771_multidirectory_globs_storage_file.sh index 932837b83db..60df3ed2762 100755 --- a/tests/queries/0_stateless/02771_multidirectory_globs_storage_file.sh +++ b/tests/queries/0_stateless/02771_multidirectory_globs_storage_file.sh @@ -7,27 +7,22 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Data preparation. -# Now we can get the user_files_path by using the file function, we can also get it by this query: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +rm -rf ${USER_FILES_PATH:?}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ -rm -rf ${user_files_path:?}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ - -${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir{?/subdir?1/da,2/subdir2?/da}ta/non_existing.csv', CSV);" 2>&1 | grep -q "CANNOT_EXTRACT_TABLE_STRUCTURE" && echo 'OK' || echo 'FAIL' +${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir{?/subdir?1/da,2/subdir2?/da}ta/non_existing.csv', CSV);" 2>&1 | grep -q "CANNOT_EXTRACT_TABLE_STRUCTURE" && echo 'OK' || echo 'FAIL' # Create two files in different directories -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir1/subdir11/ -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir2/subdir22/ +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir1/subdir11/ +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir2/subdir22/ -echo 'This is file data1' > ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir1/subdir11/data1.csv -echo 'This is file data2' > ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir2/subdir22/data2.csv +echo 'This is file data1' > ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir1/subdir11/data1.csv +echo 'This is file data2' > ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir2/subdir22/data2.csv -${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir{?/subdir?1/da,2/subdir2?/da}ta1.csv', CSV);" -${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir{?/subdir?1/da,2/subdir2?/da}ta2.csv', CSV);" +${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir{?/subdir?1/da,2/subdir2?/da}ta1.csv', CSV);" +${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir{?/subdir?1/da,2/subdir2?/da}ta2.csv', CSV);" -${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir?/{subdir?1/data1,subdir2?/data2}.csv', CSV) WHERE _file == 'data1.csv';" -${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir?/{subdir?1/data1,subdir2?/data2}.csv', CSV) WHERE _file == 'data2.csv';" +${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir?/{subdir?1/data1,subdir2?/data2}.csv', CSV) WHERE _file == 'data1.csv';" +${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir?/{subdir?1/data1,subdir2?/data2}.csv', CSV) WHERE _file == 'data2.csv';" -rm -rf ${user_files_path:?}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} +rm -rf ${USER_FILES_PATH:?}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} diff --git a/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh b/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh index 2b9e5296a05..20b02bcba32 100755 --- a/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh +++ b/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh @@ -14,14 +14,11 @@ function run_test_for_disk() echo "$disk" - clickhouse-disks -C "$config" --disk "$disk" write --input "$config" $CLICKHOUSE_DATABASE/test - clickhouse-disks -C "$config" --log-level test --disk "$disk" copy $CLICKHOUSE_DATABASE/test $CLICKHOUSE_DATABASE/test.copy |& { + clickhouse-disks -C "$config" --disk "$disk" --query "write --path-from $config $CLICKHOUSE_DATABASE/test" + clickhouse-disks -C "$config" --log-level test --disk "$disk" --query "copy -r $CLICKHOUSE_DATABASE/test $CLICKHOUSE_DATABASE/test.copy" |& { grep -o -e "Single part upload has completed." -e "Single operation copy has completed." } - clickhouse-disks -C "$config" --disk "$disk" remove $CLICKHOUSE_DATABASE/test - # NOTE: this is due to "copy" does works like "cp -R from to/" instead of "cp from to" - clickhouse-disks -C "$config" --disk "$disk" remove $CLICKHOUSE_DATABASE/test.copy/test - clickhouse-disks -C "$config" --disk "$disk" remove $CLICKHOUSE_DATABASE/test.copy + clickhouse-disks -C "$config" --disk "$disk" --query "remove -r $CLICKHOUSE_DATABASE/test" } function run_test_copy_from_s3_to_s3(){ @@ -29,13 +26,12 @@ function run_test_copy_from_s3_to_s3(){ local disk_dest=$1 && shift echo "copy from $disk_src to $disk_dest" - clickhouse-disks -C "$config" --disk "$disk_src" write --input "$config" $CLICKHOUSE_DATABASE/test + clickhouse-disks -C "$config" --disk "$disk_src" --query "write --path-from $config $CLICKHOUSE_DATABASE/test" - clickhouse-disks -C "$config" --log-level test copy --disk-from "$disk_src" --disk-to "$disk_dest" $CLICKHOUSE_DATABASE/test $CLICKHOUSE_DATABASE/test.copy |& { + clickhouse-disks -C "$config" --log-level test --query "copy -r --disk-from $disk_src --disk-to $disk_dest $CLICKHOUSE_DATABASE/test $CLICKHOUSE_DATABASE/test.copy" |& { grep -o -e "Single part upload has completed." -e "Single operation copy has completed." } - clickhouse-disks -C "$config" --disk "$disk_dest" remove $CLICKHOUSE_DATABASE/test.copy/test - clickhouse-disks -C "$config" --disk "$disk_dest" remove $CLICKHOUSE_DATABASE/test.copy + clickhouse-disks -C "$config" --disk "$disk_dest" --query "remove -r $CLICKHOUSE_DATABASE/test.copy" } run_test_for_disk s3_plain_native_copy diff --git a/tests/queries/0_stateless/02806_system_parts_columns_modification_time.sql.j2 b/tests/queries/0_stateless/02806_system_parts_columns_modification_time.sql.j2 index eee236ff681..1ca5cc0bb7e 100644 --- a/tests/queries/0_stateless/02806_system_parts_columns_modification_time.sql.j2 +++ b/tests/queries/0_stateless/02806_system_parts_columns_modification_time.sql.j2 @@ -1,5 +1,5 @@ --- Tags: no-s3-storage --- Tag: no-s3-storage because S3 updates metadata for the virtual link file on metadata disk (see CreateHardlinkOperation::execute() for details) +-- Tags: no-object-storage +-- Tag: no-object-storage because S3 updates metadata for the virtual link file on metadata disk (see CreateHardlinkOperation::execute() for details) set mutations_sync=1; diff --git a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh index b44f9e50513..8a4a2e906b0 100755 --- a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh +++ b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: no-fasttest, no-parallel, no-object-storage, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh b/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh index a3b0d17f1be..87fbffdb1e6 100755 --- a/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh +++ b/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh @@ -1,10 +1,12 @@ #!/usr/bin/env bash +# Tags: no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh SESSION_ID_PREFIX="02832_alter_max_sessions_session_$$" +QUERY_ID_PREFIX="02832_alter_max_sessions_query_$$" PROFILE="02832_alter_max_sessions_profile_$$" USER="02832_alter_max_sessions_user_$$" USER2="02832_alter_max_sessions_user_two_$$" @@ -15,6 +17,26 @@ ${CLICKHOUSE_CLIENT} -q $"DROP PROFILE IF EXISTS ${PROFILE}" ${CLICKHOUSE_CLIENT} -q $"CREATE SETTINGS PROFILE ${PROFILE}" ${CLICKHOUSE_CLIENT} -q $"CREATE USER '${USER}' SETTINGS PROFILE '${PROFILE}'" +function run_sessions_set() +{ + local sessions_count="$1" + local session_check="$2" + for ((i = 1 ; i <= ${sessions_count} ; i++)); do + local session_id="${SESSION_ID_PREFIX}_${i}" + local query_id="${QUERY_ID_PREFIX}_${i}" + # Write only expected error text + # More than alter_sessions_count queries will not start. + ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${USER}&query_id=${query_id}&session_id=${session_id}&session_check=${session_check}&session_timeout=600&function_sleep_max_microseconds_per_block=120000000" --data-binary "SELECT sleep(120)" | grep -o -m 1 'USER_SESSION_LIMIT_EXCEEDED' & + done + + for ((i = 1 ; i <= ${sessions_count} ; i++)); do + local query_id="${QUERY_ID_PREFIX}_${i}" + $CLICKHOUSE_CLIENT --query "KILL QUERY WHERE query_id='$query_id' SYNC" >/dev/null + done + + wait +} + function test_alter_profile() { local max_session_count="$1" @@ -24,23 +46,13 @@ function test_alter_profile() ${CLICKHOUSE_CLIENT} -q $"ALTER SETTINGS PROFILE ${PROFILE} SETTINGS max_sessions_for_user = ${max_session_count}" # Create sessions with $max_session_count restriction - for ((i = 1 ; i <= ${max_session_count} ; i++)); do - local session_id="${SESSION_ID_PREFIX}_${i}" - # Skip output from this query - ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${USER}&session_id=${session_id}&session_check=0" --data-binary "SELECT 1" > /dev/null - done + run_sessions_set $max_session_count 0 # Update restriction to $alter_sessions_count ${CLICKHOUSE_CLIENT} -q $"ALTER SETTINGS PROFILE ${PROFILE} SETTINGS max_sessions_for_user = ${alter_sessions_count}" # Simultaneous sessions should use max settings from profile ($alter_sessions_count) - for ((i = 1 ; i <= ${max_session_count} ; i++)); do - local session_id="${SESSION_ID_PREFIX}_${i}" - # ignore select 1, we need only errors - ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${USER}&session_id=${session_id}&session_check=1" --data-binary "select sleep(0.3)" | grep -o -m 1 'USER_SESSION_LIMIT_EXCEEDED' & - done - - wait + run_sessions_set $max_session_count 1 } test_alter_profile 1 1 diff --git a/tests/queries/0_stateless/02833_multiprewhere_extra_column.sql b/tests/queries/0_stateless/02833_multiprewhere_extra_column.sql index 3a751294cba..da2f050cf38 100644 --- a/tests/queries/0_stateless/02833_multiprewhere_extra_column.sql +++ b/tests/queries/0_stateless/02833_multiprewhere_extra_column.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-random-settings, no-random-merge-tree-settings, no-s3-storage +-- Tags: no-parallel, no-random-settings, no-random-merge-tree-settings, no-object-storage drop table if exists t_multi_prewhere; drop row policy if exists policy_02834 on t_multi_prewhere; diff --git a/tests/queries/0_stateless/02834_apache_arrow_abort.sql b/tests/queries/0_stateless/02834_apache_arrow_abort.sql index 47db46f1e43..47e1c5d3951 100644 --- a/tests/queries/0_stateless/02834_apache_arrow_abort.sql +++ b/tests/queries/0_stateless/02834_apache_arrow_abort.sql @@ -1,4 +1,4 @@ -- Tags: no-fasttest -- This tests depends on internet access, but it does not matter, because it only has to check that there is no abort due to a bug in Apache Arrow library. - -INSERT INTO TABLE FUNCTION url('https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/hits.parquet') SELECT * FROM url('https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/hits.parquet'); -- { serverError CANNOT_WRITE_TO_OSTREAM, RECEIVED_ERROR_FROM_REMOTE_IO_SERVER, POCO_EXCEPTION } +SET optimize_trivial_insert_select=1; +INSERT INTO TABLE FUNCTION url('https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/athena_partitioned/hits_9.parquet') SELECT * FROM url('https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/athena_partitioned/hits_9.parquet'); -- { serverError CANNOT_WRITE_TO_OSTREAM, RECEIVED_ERROR_FROM_REMOTE_IO_SERVER, POCO_EXCEPTION } diff --git a/tests/queries/0_stateless/02841_not_ready_set_constraints.reference b/tests/queries/0_stateless/02841_not_ready_set_constraints.reference new file mode 100644 index 00000000000..daaac9e3030 --- /dev/null +++ b/tests/queries/0_stateless/02841_not_ready_set_constraints.reference @@ -0,0 +1,2 @@ +42 +42 diff --git a/tests/queries/0_stateless/02841_not_ready_set_constraints.sql b/tests/queries/0_stateless/02841_not_ready_set_constraints.sql new file mode 100644 index 00000000000..274940f50a3 --- /dev/null +++ b/tests/queries/0_stateless/02841_not_ready_set_constraints.sql @@ -0,0 +1,43 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE t1 ( + `id` UInt64 +) +ENGINE = MergeTree ORDER BY id; + +INSERT INTO t1(id) VALUES (42); + +CREATE TABLE t2 ( + `conversation` UInt64, + CONSTRAINT constraint_conversation CHECK conversation IN (SELECT id FROM t1) +) +ENGINE = MergeTree ORDER BY conversation; + +INSERT INTO t2(conversation) VALUES (42); + +select * from t2; + +drop table t1; + +INSERT INTO t2(conversation) VALUES (42); -- { serverError UNKNOWN_TABLE } + +drop table t2; + +CREATE TABLE t2 ( + `conversation` UInt64, + CONSTRAINT constraint_conversation CHECK conversation IN (SELECT id FROM t1) +) +ENGINE = MergeTree ORDER BY conversation; + +INSERT INTO t2(conversation) VALUES (42); -- { serverError UNKNOWN_TABLE } + +CREATE TABLE t1 ( + `id` UInt64 +) +ENGINE = MergeTree ORDER BY id; + +INSERT INTO t1(id) VALUES (42); + +INSERT INTO t2(conversation) VALUES (42); +select * from t2; diff --git a/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.sh b/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.sh index edfed206d87..07d2ee27d22 100755 --- a/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.sh +++ b/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: zookeeper, no-s3-storage +# Tags: zookeeper, no-object-storage # Because REPLACE PARTITION does not forces immediate removal of replaced data parts from local filesystem # (it tries to do it as quick as possible, but it still performed in separate thread asynchronously) diff --git a/tests/queries/0_stateless/02864_replace_regexp_string_fallback.reference b/tests/queries/0_stateless/02864_replace_regexp_string_fallback.reference new file mode 100644 index 00000000000..dd52d49eea3 --- /dev/null +++ b/tests/queries/0_stateless/02864_replace_regexp_string_fallback.reference @@ -0,0 +1 @@ +Hello l x Hexlo Hexxo diff --git a/tests/queries/0_stateless/02864_replace_regexp_string_fallback.sql b/tests/queries/0_stateless/02864_replace_regexp_string_fallback.sql new file mode 100644 index 00000000000..917c11fe8dd --- /dev/null +++ b/tests/queries/0_stateless/02864_replace_regexp_string_fallback.sql @@ -0,0 +1,11 @@ +-- Tests functions replaceRegexpAll and replaceRegexpOne with trivial patterns. These trigger internally a fallback to simple string replacement. + +-- _materialize_ because the shortcut is only implemented for non-const haystack + const needle + const replacement strings + +SELECT 'Hello' AS haystack, 'l' AS needle, 'x' AS replacement, replaceRegexpOne(materialize(haystack), needle, replacement), replaceRegexpAll(materialize(haystack), needle, replacement); + +-- negative tests + +-- Even if the fallback is used, invalid substitutions must throw an exception. +SELECT 'Hello' AS haystack, 'l' AS needle, '\\1' AS replacement, replaceRegexpOne(materialize(haystack), needle, replacement); -- { serverError BAD_ARGUMENTS } +SELECT 'Hello' AS haystack, 'l' AS needle, '\\1' AS replacement, replaceRegexpAll(materialize(haystack), needle, replacement); -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/02864_statistics_ddl.reference b/tests/queries/0_stateless/02864_statistics_ddl.reference new file mode 100644 index 00000000000..a7ff5caa0b0 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_ddl.reference @@ -0,0 +1,31 @@ +CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +After insert + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) +10 +0 +After drop statistic + Prewhere info + Prewhere filter + Prewhere filter column: and(less(b, 10), less(a, 10)) (removed) +10 +CREATE TABLE default.tab\n(\n `a` Float64,\n `b` Int64,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +After add statistic +CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +After materialize statistic + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) +20 +After merge + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) +20 +CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `c` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +After rename + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), less(c, 10)) (removed) +20 diff --git a/tests/queries/0_stateless/02864_statistics_ddl.sql b/tests/queries/0_stateless/02864_statistics_ddl.sql new file mode 100644 index 00000000000..fe612efe2ac --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_ddl.sql @@ -0,0 +1,59 @@ +-- Tests that various DDL statements create/drop/materialize statistics + +DROP TABLE IF EXISTS tab; + +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; + +CREATE TABLE tab +( + a Float64 STATISTICS(tdigest), + b Int64 STATISTICS(tdigest), + pk String, +) Engine = MergeTree() ORDER BY pk +SETTINGS min_bytes_for_wide_part = 0; + +SHOW CREATE TABLE tab; + +INSERT INTO tab select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; + +SELECT 'After insert'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT count(*) FROM tab WHERE b < 10 and a < 10; +SELECT count(*) FROM tab WHERE b < NULL and a < '10'; + +ALTER TABLE tab DROP STATISTICS a, b; + +SELECT 'After drop statistic'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT count(*) FROM tab WHERE b < 10 and a < 10; + +SHOW CREATE TABLE tab; + +ALTER TABLE tab ADD STATISTICS a, b TYPE tdigest; + +SELECT 'After add statistic'; + +SHOW CREATE TABLE tab; + +ALTER TABLE tab MATERIALIZE STATISTICS a, b; +INSERT INTO tab select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; + +SELECT 'After materialize statistic'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT count(*) FROM tab WHERE b < 10 and a < 10; + +OPTIMIZE TABLE tab FINAL; + +SELECT 'After merge'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT count(*) FROM tab WHERE b < 10 and a < 10; + +ALTER TABLE tab RENAME COLUMN b TO c; +SHOW CREATE TABLE tab; + +SELECT 'After rename'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT count(*) FROM tab WHERE c < 10 and a < 10; + +DROP TABLE IF EXISTS tab; diff --git a/tests/queries/0_stateless/02864_statistics_exception.sql b/tests/queries/0_stateless/02864_statistics_exception.sql index c531d39cd69..289ffee6600 100644 --- a/tests/queries/0_stateless/02864_statistics_exception.sql +++ b/tests/queries/0_stateless/02864_statistics_exception.sql @@ -1,57 +1,55 @@ -DROP TABLE IF EXISTS t1; +-- Tests creating/dropping/materializing statistics produces the right exceptions. -CREATE TABLE t1 +DROP TABLE IF EXISTS tab; + +-- Can't create statistics when allow_experimental_statistics = 0 +CREATE TABLE tab ( - a Float64 STATISTICS(tdigest), - b Int64 STATISTICS(tdigest), - pk String, -) Engine = MergeTree() ORDER BY pk; -- { serverError INCORRECT_QUERY } + a Float64 STATISTICS(tdigest) +) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } SET allow_experimental_statistics = 1; -CREATE TABLE t1 +-- The same type of statistics can't exist more than once on a column +CREATE TABLE tab ( - a Float64 STATISTICS(tdigest), - b Int64, - pk String STATISTICS(tdigest), -) Engine = MergeTree() ORDER BY pk; -- { serverError ILLEGAL_STATISTICS } + a Float64 STATISTICS(tdigest, tdigest) +) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } -CREATE TABLE t1 +-- Unknown statistics types are rejected +CREATE TABLE tab ( - a Float64 STATISTICS(tdigest, tdigest(10)), - b Int64, -) Engine = MergeTree() ORDER BY pk; -- { serverError INCORRECT_QUERY } + a Float64 STATISTICS(no_statistics_type) +) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } -CREATE TABLE t1 +-- tDigest statistics can only be created on numeric columns +CREATE TABLE tab ( - a Float64 STATISTICS(xyz), - b Int64, -) Engine = MergeTree() ORDER BY pk; -- { serverError INCORRECT_QUERY } + a String STATISTICS(tdigest), +) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } -CREATE TABLE t1 +CREATE TABLE tab ( a Float64, - b Int64, - pk String, -) Engine = MergeTree() ORDER BY pk; + b String +) Engine = MergeTree() ORDER BY tuple(); -ALTER TABLE t1 ADD STATISTICS a TYPE xyz; -- { serverError INCORRECT_QUERY } -ALTER TABLE t1 ADD STATISTICS a TYPE tdigest; -ALTER TABLE t1 ADD STATISTICS IF NOT EXISTS a TYPE tdigest; -ALTER TABLE t1 ADD STATISTICS a TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab ADD STATISTICS a TYPE no_statistics_type; -- { serverError INCORRECT_QUERY } +ALTER TABLE tab ADD STATISTICS a TYPE tdigest; +ALTER TABLE tab ADD STATISTICS IF NOT EXISTS a TYPE tdigest; +ALTER TABLE tab ADD STATISTICS a TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MODIFY STATISTICS a TYPE tdigest; -- Statistics can be created only on integer columns -ALTER TABLE t1 MODIFY STATISTICS a TYPE tdigest; -ALTER TABLE t1 ADD STATISTICS pk TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE t1 DROP STATISTICS b; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE t1 DROP STATISTICS a; -ALTER TABLE t1 DROP STATISTICS IF EXISTS a; -ALTER TABLE t1 CLEAR STATISTICS a; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE t1 CLEAR STATISTICS IF EXISTS a; -ALTER TABLE t1 MATERIALIZE STATISTICS b; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab ADD STATISTICS b TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab DROP STATISTICS b; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab DROP STATISTICS a; +ALTER TABLE tab DROP STATISTICS IF EXISTS a; +ALTER TABLE tab CLEAR STATISTICS a; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab CLEAR STATISTICS IF EXISTS a; +ALTER TABLE tab MATERIALIZE STATISTICS b; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE t1 ADD STATISTICS a TYPE tdigest; -ALTER TABLE t1 ADD STATISTICS b TYPE tdigest; -ALTER TABLE t1 MODIFY COLUMN a Float64 TTL toDateTime(b) + INTERVAL 1 MONTH; -ALTER TABLE t1 MODIFY COLUMN a Int64; -- { serverError ALTER_OF_COLUMN_IS_FORBIDDEN } +ALTER TABLE tab ADD STATISTICS a TYPE tdigest; +ALTER TABLE tab MODIFY COLUMN a Float64 TTL toDateTime(b) + INTERVAL 1 MONTH; +ALTER TABLE tab MODIFY COLUMN a Int64; -- { serverError ALTER_OF_COLUMN_IS_FORBIDDEN } -DROP TABLE t1; +DROP TABLE tab; diff --git a/tests/queries/0_stateless/03164_materialize_statistics.reference b/tests/queries/0_stateless/02864_statistics_materialize_in_merge.reference similarity index 100% rename from tests/queries/0_stateless/03164_materialize_statistics.reference rename to tests/queries/0_stateless/02864_statistics_materialize_in_merge.reference diff --git a/tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql b/tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql new file mode 100644 index 00000000000..3e15ec1148e --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql @@ -0,0 +1,52 @@ +-- Tests delayed materialization of statistics in merge instead of during insert (setting 'materialize_statistics_on_insert = 0'). + +DROP TABLE IF EXISTS tab; + +SET allow_experimental_analyzer = 1; +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; + +SET materialize_statistics_on_insert = 0; + +CREATE TABLE tab +( + a Int64 STATISTICS(tdigest), + b Int16 STATISTICS(tdigest), +) ENGINE = MergeTree() ORDER BY tuple() +SETTINGS min_bytes_for_wide_part = 0, enable_vertical_merge_algorithm = 0; -- TODO: there is a bug in vertical merge with statistics. + +INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000; + +SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics not used'; + +OPTIMIZE TABLE tab FINAL; + +SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after merge'; + +TRUNCATE TABLE tab; +SET mutations_sync = 2; + +INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000; +ALTER TABLE tab MATERIALIZE STATISTICS a, b; + +SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after materialize'; + +DROP TABLE tab; + +SYSTEM FLUSH LOGS; + +SELECT log_comment, message FROM system.text_log JOIN +( + SELECT Settings['log_comment'] AS log_comment, query_id FROM system.query_log + WHERE current_database = currentDatabase() + AND query LIKE 'SELECT count(*) FROM tab%' + AND type = 'QueryFinish' +) AS query_log USING (query_id) +WHERE message LIKE '%moved to PREWHERE%' +ORDER BY event_time_microseconds; + +SELECT count(), sum(ProfileEvents['MergeTreeDataWriterStatisticsCalculationMicroseconds']) +FROM system.query_log +WHERE current_database = currentDatabase() + AND query LIKE 'INSERT INTO tab SELECT%' + AND type = 'QueryFinish'; diff --git a/tests/queries/0_stateless/02864_statistics_operate.reference b/tests/queries/0_stateless/02864_statistics_operate.reference deleted file mode 100644 index 6398a9bd000..00000000000 --- a/tests/queries/0_stateless/02864_statistics_operate.reference +++ /dev/null @@ -1,31 +0,0 @@ -CREATE TABLE default.t1\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After insert - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) -10 -0 -After drop statistic - Prewhere info - Prewhere filter - Prewhere filter column: and(less(b, 10), less(a, 10)) (removed) -10 -CREATE TABLE default.t1\n(\n `a` Float64,\n `b` Int64,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After add statistic -CREATE TABLE default.t1\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After materialize statistic - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) -20 -After merge - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) -20 -CREATE TABLE default.t1\n(\n `a` Float64 STATISTICS(tdigest),\n `c` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After rename - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(c, 10)) (removed) -20 diff --git a/tests/queries/0_stateless/02864_statistics_operate.sql b/tests/queries/0_stateless/02864_statistics_operate.sql deleted file mode 100644 index bf69c11bc91..00000000000 --- a/tests/queries/0_stateless/02864_statistics_operate.sql +++ /dev/null @@ -1,57 +0,0 @@ -DROP TABLE IF EXISTS t1; - -SET allow_experimental_statistics = 1; -SET allow_statistics_optimize = 1; - -CREATE TABLE t1 -( - a Float64 STATISTICS(tdigest), - b Int64 STATISTICS(tdigest), - pk String, -) Engine = MergeTree() ORDER BY pk -SETTINGS min_bytes_for_wide_part = 0; - -SHOW CREATE TABLE t1; - -INSERT INTO t1 select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; - -SELECT 'After insert'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM t1 WHERE b < 10 and a < 10; -SELECT count(*) FROM t1 WHERE b < NULL and a < '10'; - -ALTER TABLE t1 DROP STATISTICS a, b; - -SELECT 'After drop statistic'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM t1 WHERE b < 10 and a < 10; - -SHOW CREATE TABLE t1; - -ALTER TABLE t1 ADD STATISTICS a, b TYPE tdigest; - -SELECT 'After add statistic'; - -SHOW CREATE TABLE t1; - -ALTER TABLE t1 MATERIALIZE STATISTICS a, b; -INSERT INTO t1 select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; - -SELECT 'After materialize statistic'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM t1 WHERE b < 10 and a < 10; - -OPTIMIZE TABLE t1 FINAL; - -SELECT 'After merge'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM t1 WHERE b < 10 and a < 10; - -ALTER TABLE t1 RENAME COLUMN b TO c; -SHOW CREATE TABLE t1; - -SELECT 'After rename'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE c < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM t1 WHERE c < 10 and a < 10; - -DROP TABLE IF EXISTS t1; diff --git a/tests/queries/0_stateless/02889_file_log_save_errors.sh b/tests/queries/0_stateless/02889_file_log_save_errors.sh index 8ef7816d57d..cf7ced0bd08 100755 --- a/tests/queries/0_stateless/02889_file_log_save_errors.sh +++ b/tests/queries/0_stateless/02889_file_log_save_errors.sh @@ -4,27 +4,25 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - ${CLICKHOUSE_CLIENT} --query "drop table if exists file_log;" ${CLICKHOUSE_CLIENT} --query "drop table if exists log_errors;" -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* for i in {0..9} do - echo "{\"key\" : $i, \"value\" : $i}" >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.jsonl - echo "Error $i" >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.jsonl + echo "{\"key\" : $i, \"value\" : $i}" >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.jsonl + echo "Error $i" >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.jsonl done for i in {10..19} do - echo "{\"key\" : $i, \"value\" : $i}" >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.jsonl - echo "Error $i" >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.jsonl + echo "{\"key\" : $i, \"value\" : $i}" >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.jsonl + echo "Error $i" >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.jsonl done -${CLICKHOUSE_CLIENT} --query "create table file_log(key UInt8, value UInt8) engine=FileLog('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'JSONEachRow') settings handle_error_mode='stream';" +${CLICKHOUSE_CLIENT} --query "create table file_log(key UInt8, value UInt8) engine=FileLog('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'JSONEachRow') settings handle_error_mode='stream';" ${CLICKHOUSE_CLIENT} --query "create Materialized View log_errors engine=MergeTree order by tuple() as select _error as error, _raw_record as record, _filename as file from file_log where not isNull(_error);" function count() @@ -42,4 +40,4 @@ ${CLICKHOUSE_CLIENT} --query "select * from log_errors order by file, record;" ${CLICKHOUSE_CLIENT} --query "drop table file_log;" ${CLICKHOUSE_CLIENT} --query "drop table log_errors;" -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} diff --git a/tests/queries/0_stateless/02890_named_tuple_functions.reference b/tests/queries/0_stateless/02890_named_tuple_functions.reference new file mode 100644 index 00000000000..f7a0c440b5a --- /dev/null +++ b/tests/queries/0_stateless/02890_named_tuple_functions.reference @@ -0,0 +1,9 @@ +Tuple(\n i Int32,\n j Int32) +['i','j'] +Tuple(UInt8, Int32) +['1','2'] +Tuple(\n k UInt8,\n j Int32) +['k','j'] +Tuple(Int32, Int32, Int32, Int32) +['1','2','3','4'] +(1,2,3) diff --git a/tests/queries/0_stateless/02890_named_tuple_functions.sql b/tests/queries/0_stateless/02890_named_tuple_functions.sql new file mode 100644 index 00000000000..8e0c9c2b10e --- /dev/null +++ b/tests/queries/0_stateless/02890_named_tuple_functions.sql @@ -0,0 +1,31 @@ +set enable_named_columns_in_function_tuple = 1; +set allow_experimental_analyzer = 1; + +drop table if exists x; +create table x (i int, j int) engine MergeTree order by i; +insert into x values (1, 2); + +select toTypeName(tuple(i, j)) from x; +select tupleNames(tuple(i, j)) from x; + +select toTypeName(tuple(1, j)) from x; +select tupleNames(tuple(1, j)) from x; + +select toTypeName(tuple(1 as k, j)) from x; +select tupleNames(tuple(1 as k, j)) from x; + +select toTypeName(tuple(i, i, j, j)) from x; +select tupleNames(tuple(i, i, j, j)) from x; + +select tupleNames(1); -- { serverError 43 } + +drop table x; + +drop table if exists tbl; + +-- Make sure named tuple won't break Values insert +create table tbl (x Tuple(a Int32, b Int32, c Int32)) engine MergeTree order by (); +insert into tbl values (tuple(1, 2, 3)); -- without tuple it's interpreted differently inside values block. +select * from tbl; + +drop table tbl diff --git a/tests/queries/0_stateless/02890_untuple_column_names.reference b/tests/queries/0_stateless/02890_untuple_column_names.reference index 388f974c45f..13a85c70138 100644 --- a/tests/queries/0_stateless/02890_untuple_column_names.reference +++ b/tests/queries/0_stateless/02890_untuple_column_names.reference @@ -57,6 +57,10 @@ t.1: 1 Row 1: ────── t.1: 1 +-- tuple() with enable_named_columns_in_function_tuple = 1 and allow_experimental_analyzer = 1 keeps the column names +Row 1: +────── +t.a: 1 -- thankfully JSONExtract() keeps them Row 1: ────── diff --git a/tests/queries/0_stateless/02890_untuple_column_names.sql b/tests/queries/0_stateless/02890_untuple_column_names.sql index ab6748cb54d..cd490ca3522 100644 --- a/tests/queries/0_stateless/02890_untuple_column_names.sql +++ b/tests/queries/0_stateless/02890_untuple_column_names.sql @@ -37,8 +37,11 @@ SELECT untuple(tuple(1)::Tuple(Int)), untuple(tuple(1)::Tuple(Int)) FORMAT Verti SELECT untuple(tuple(1)::Tuple(Int)), untuple(tuple(1)::Tuple(Int)) FORMAT Vertical SETTINGS allow_experimental_analyzer = 1; -- Bug: doesn't throw an exception SELECT '-- tuple() loses the column names (would be good to fix, see #36773)'; -SELECT untuple(tuple(1 as a)) as t FORMAT Vertical SETTINGS allow_experimental_analyzer = 0; -SELECT untuple(tuple(1 as a)) as t FORMAT Vertical SETTINGS allow_experimental_analyzer = 1; +SELECT untuple(tuple(1 as a)) as t FORMAT Vertical SETTINGS allow_experimental_analyzer = 0, enable_named_columns_in_function_tuple = 0; +SELECT untuple(tuple(1 as a)) as t FORMAT Vertical SETTINGS allow_experimental_analyzer = 1, enable_named_columns_in_function_tuple = 0; + +SELECT '-- tuple() with enable_named_columns_in_function_tuple = 1 and allow_experimental_analyzer = 1 keeps the column names'; +SELECT untuple(tuple(1 as a)) as t FORMAT Vertical SETTINGS allow_experimental_analyzer = 1, enable_named_columns_in_function_tuple = 1; SELECT '-- thankfully JSONExtract() keeps them'; SELECT untuple(JSONExtract('{"key": "value"}', 'Tuple(key String)')) x FORMAT Vertical SETTINGS allow_experimental_analyzer = 0; diff --git a/tests/queries/0_stateless/02892_input_csv_cr_end_count_many_rows.sh b/tests/queries/0_stateless/02892_input_csv_cr_end_count_many_rows.sh index 42dde18de00..9f93396e368 100755 --- a/tests/queries/0_stateless/02892_input_csv_cr_end_count_many_rows.sh +++ b/tests/queries/0_stateless/02892_input_csv_cr_end_count_many_rows.sh @@ -6,11 +6,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}') - cp "$CURDIR"/data_csv/1m_rows_cr_end_of_line.csv.xz $USER_FILES_PATH/ $CLICKHOUSE_CLIENT -q "SELECT count(1) from file('1m_rows_cr_end_of_line.csv.xz') settings input_format_csv_allow_cr_end_of_line=1, optimize_count_from_files=1" $CLICKHOUSE_CLIENT -q "SELECT count(1) from file('1m_rows_cr_end_of_line.csv.xz') settings input_format_csv_allow_cr_end_of_line=1, optimize_count_from_files=0" -rm $USER_FILES_PATH/1m_rows_cr_end_of_line.csv.xz \ No newline at end of file +rm $USER_FILES_PATH/1m_rows_cr_end_of_line.csv.xz diff --git a/tests/queries/0_stateless/02895_npy_format.reference b/tests/queries/0_stateless/02895_npy_format.reference index f9e77644a35..52972f0acbd 100644 --- a/tests/queries/0_stateless/02895_npy_format.reference +++ b/tests/queries/0_stateless/02895_npy_format.reference @@ -85,12 +85,12 @@ c [4,5,6] [[1,2],[3,4]] [[5,6],[7,8]] -0 -0 -0 -0 -0 -0 +1 +1 +1 +1 +1 +1 1 [2.199219,1.099609,3.300781] [4.25,3.34961,6.628906] diff --git a/tests/queries/0_stateless/02895_npy_format.sh b/tests/queries/0_stateless/02895_npy_format.sh index 9d05303a091..194b2bc1fe4 100755 --- a/tests/queries/0_stateless/02895_npy_format.sh +++ b/tests/queries/0_stateless/02895_npy_format.sh @@ -52,14 +52,14 @@ $CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/two_dim.npy', Npy, 'v $CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/three_dim.npy', Npy, 'value Array(Array(Int8))')" $CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_float.npy', Npy, 'value Array(Float32)')" 2>&1 | grep -c "BAD_ARGUMENTS" -$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_float.npy', Npy, 'value UUID')" 2>&1 | grep -c "BAD_ARGUMENTS" -$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_float.npy', Npy, 'value Tuple(UInt8)')" 2>&1 | grep -c "BAD_ARGUMENTS" +$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_float.npy', Npy, 'value UUID')" 2>&1 | grep -c "UNKNOWN_TYPE" +$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_float.npy', Npy, 'value Tuple(UInt8)')" 2>&1 | grep -c "UNKNOWN_TYPE" -$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_float.npy', Npy, 'value Int8')" 2>&1 | grep -c "BAD_ARGUMENTS" -$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_str.npy', Npy, 'value Int8')" 2>&1 | grep -c "BAD_ARGUMENTS" -$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_unicode.npy', Npy, 'value Float32')" 2>&1 | grep -c "BAD_ARGUMENTS" +$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_float.npy', Npy, 'value Int8')" 2>&1 | grep -c "ILLEGAL_COLUMN" +$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_str.npy', Npy, 'value Int8')" 2>&1 | grep -c "ILLEGAL_COLUMN" +$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_unicode.npy', Npy, 'value Float32')" 2>&1 | grep -c "ILLEGAL_COLUMN" -$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/complex.npy')" 2>&1 | grep -c "BAD_ARGUMENTS" +$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/complex.npy')" 2>&1 | grep -c "CANNOT_EXTRACT_TABLE_STRUCTURE" $CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/float_16.npy')" diff --git a/tests/queries/0_stateless/02895_npy_output_format.sh b/tests/queries/0_stateless/02895_npy_output_format.sh index 934c80830c5..a364e447062 100755 --- a/tests/queries/0_stateless/02895_npy_output_format.sh +++ b/tests/queries/0_stateless/02895_npy_output_format.sh @@ -5,10 +5,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -user_files_path=$($CLICKHOUSE_CLIENT_BINARY -q "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* -chmod 777 ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* +chmod 777 ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ ${CLICKHOUSE_CLIENT} -n -q --ignore-error " DROP DATABASE IF EXISTS npy_output_02895; @@ -33,43 +32,43 @@ ${CLICKHOUSE_CLIENT} -n -q --ignore-error " INSERT INTO npy_output_02895.data_types VALUES (1, 1, 1, 1, 1, 1, 1, 1, 0.1, 0.01, 'npy', 'npy'), (-1, -1, -1, -1, 0, 0, 0, 0, 0.2, 0.02, 'npy', 'npynpy'); - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int8.npy') SELECT i1 FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int16.npy') SELECT i2 FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int32.npy') SELECT i4 FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int64.npy') SELECT i8 FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint8.npy') SELECT u1 FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint16.npy') SELECT u2 FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint32.npy') SELECT u4 FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint64.npy') SELECT u8 FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float32.npy') SELECT f4 FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float64.npy') SELECT f8 FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_fixedstring.npy') SELECT fs FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_string.npy') SELECT s FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int8.npy') SELECT i1 FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int16.npy') SELECT i2 FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int32.npy') SELECT i4 FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int64.npy') SELECT i8 FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint8.npy') SELECT u1 FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint16.npy') SELECT u2 FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint32.npy') SELECT u4 FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint64.npy') SELECT u8 FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float32.npy') SELECT f4 FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float64.npy') SELECT f8 FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_fixedstring.npy') SELECT fs FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_string.npy') SELECT s FROM npy_output_02895.data_types; - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int8.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int16.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int32.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int64.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint8.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint16.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint32.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint64.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float32.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float64.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_fixedstring.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_string.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int8.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int16.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int32.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int64.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint8.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint16.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint32.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint64.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float32.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float64.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_fixedstring.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_string.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int8.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int16.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int32.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int64.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint8.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint16.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint32.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint64.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float32.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float64.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_fixedstring.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_string.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int8.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int16.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int32.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int64.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint8.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint16.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint32.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint64.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float32.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float64.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_fixedstring.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_string.npy'); SELECT '-- test nested data types --'; CREATE TABLE IF NOT EXISTS npy_output_02895.nested_data_types @@ -81,16 +80,16 @@ ${CLICKHOUSE_CLIENT} -n -q --ignore-error " INSERT INTO npy_output_02895.nested_data_types VALUES ([[[1], [2]], [[3], [4]]], [[0.1], [0.2]], ['a', 'bb']), ([[[1], [2]], [[3], [4]]], [[0.1], [0.2]], ['ccc', 'dddd']); - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_int32.npy') SELECT i4 FROM npy_output_02895.nested_data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_float64.npy') SELECT f8 FROM npy_output_02895.nested_data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_string.npy') SELECT s FROM npy_output_02895.nested_data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_int32.npy') SELECT i4 FROM npy_output_02895.nested_data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_float64.npy') SELECT f8 FROM npy_output_02895.nested_data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_string.npy') SELECT s FROM npy_output_02895.nested_data_types; - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_int32.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_float64.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_string.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_int32.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_float64.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_string.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_int32.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_float64.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_string.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_int32.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_float64.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_string.npy'); SELECT '-- test exceptions --'; CREATE TABLE IF NOT EXISTS npy_output_02895.exceptions @@ -115,4 +114,4 @@ ${CLICKHOUSE_CLIENT} -n -q --ignore-error " DROP DATABASE IF EXISTS npy_output_02895;" -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference index 5b6c14ca24f..4eb7e74446d 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference @@ -3,7 +3,7 @@ SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR (t1.x IS NULL AND t2.x IS NULL)) O 2 2 2 2 3 3 3 33 \N \N \N \N -SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.x IS NULL AND t1.y <=> t2.y AND t2.x IS NULL) ORDER BY t1.x NULLS LAST; +SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.x IS NULL AND t2.x IS NULL) OR t1.y <=> t2.y ORDER BY t1.x NULLS LAST; 1 42 4 42 2 2 2 2 3 3 3 33 @@ -12,14 +12,14 @@ SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) ORDER 2 2 2 2 3 3 3 33 \N \N \N \N -SELECT * FROM t1 JOIN t2 ON t1.x <=> t2.x AND (t1.x = t1.y OR t1.x IS NULL AND t1.y IS NULL) ORDER BY t1.x; +SELECT * FROM t1 JOIN t2 ON t1.x <=> t2.x AND ((t1.x = t1.y) OR t1.x IS NULL AND t1.y IS NULL) ORDER BY t1.x; 2 2 2 2 3 3 3 33 \N \N \N \N SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) AND t1.y <=> t2.y ORDER BY t1.x NULLS LAST; 2 2 2 2 \N \N \N \N -SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.y <=> t2.y OR (t1.x IS NULL AND t1.y IS NULL AND t2.x IS NULL AND t2.y IS NULL)) ORDER BY t1.x NULLS LAST; +SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.y <=> t2.y OR (t1.x IS NULL AND t2.x IS NULL) OR (t1.y IS NULL AND t2.y IS NULL)) ORDER BY t1.x NULLS LAST; 1 42 4 42 2 2 2 2 3 3 3 33 @@ -31,3 +31,21 @@ SELECT x = y OR (x IS NULL AND y IS NULL) FROM t1 ORDER BY x NULLS LAST; 1 1 1 +SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; +2 2 2 2 +3 3 3 33 +\N \N \N \N +-- aliases defined in the join condition are valid +-- FIXME(@vdimir) broken query formatting for the following queries: +-- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; +-- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; + +-- check for non-nullable columns for which `is null` is replaced with constant +SELECT * FROM t1n as t1 JOIN t2n as t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; +2 2 2 2 +3 3 3 33 +-- +0 +0 +2 +2 diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql index 5458370db8c..f7813e2a1b4 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql @@ -1,5 +1,7 @@ DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t1n; +DROP TABLE IF EXISTS t2n; CREATE TABLE t1 (x Nullable(Int64), y Nullable(UInt64)) ENGINE = TinyLog; CREATE TABLE t2 (x Nullable(Int64), y Nullable(UInt64)) ENGINE = TinyLog; @@ -7,24 +9,63 @@ CREATE TABLE t2 (x Nullable(Int64), y Nullable(UInt64)) ENGINE = TinyLog; INSERT INTO t1 VALUES (1,42), (2,2), (3,3), (NULL,NULL); INSERT INTO t2 VALUES (NULL,NULL), (2,2), (3,33), (4,42); +CREATE TABLE t1n (x Int64, y UInt64) ENGINE = TinyLog; +CREATE TABLE t2n (x Int64, y UInt64) ENGINE = TinyLog; + +INSERT INTO t1n VALUES (1,42), (2,2), (3,3); +INSERT INTO t2n VALUES (2,2), (3,33), (4,42); + SET allow_experimental_analyzer = 1; -- { echoOn } SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR (t1.x IS NULL AND t2.x IS NULL)) ORDER BY t1.x NULLS LAST; -SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.x IS NULL AND t1.y <=> t2.y AND t2.x IS NULL) ORDER BY t1.x NULLS LAST; +SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.x IS NULL AND t2.x IS NULL) OR t1.y <=> t2.y ORDER BY t1.x NULLS LAST; SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) ORDER BY t1.x; -SELECT * FROM t1 JOIN t2 ON t1.x <=> t2.x AND (t1.x = t1.y OR t1.x IS NULL AND t1.y IS NULL) ORDER BY t1.x; +SELECT * FROM t1 JOIN t2 ON t1.x <=> t2.x AND ((t1.x = t1.y) OR t1.x IS NULL AND t1.y IS NULL) ORDER BY t1.x; SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) AND t1.y <=> t2.y ORDER BY t1.x NULLS LAST; -SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.y <=> t2.y OR (t1.x IS NULL AND t1.y IS NULL AND t2.x IS NULL AND t2.y IS NULL)) ORDER BY t1.x NULLS LAST; +SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.y <=> t2.y OR (t1.x IS NULL AND t2.x IS NULL) OR (t1.y IS NULL AND t2.y IS NULL)) ORDER BY t1.x NULLS LAST; SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR (t1.x IS NULL AND t2.x IS NULL)) AND (t1.y == t2.y OR (t1.y IS NULL AND t2.y IS NULL)) AND COALESCE(t1.x, 0) != 2 ORDER BY t1.x NULLS LAST; SELECT x = y OR (x IS NULL AND y IS NULL) FROM t1 ORDER BY x NULLS LAST; + +SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; + +-- aliases defined in the join condition are valid +-- FIXME(@vdimir) broken query formatting for the following queries: +-- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; +-- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; + +-- check for non-nullable columns for which `is null` is replaced with constant +SELECT * FROM t1n as t1 JOIN t2n as t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; + -- { echoOff } +SELECT '--'; + +-- IS NOT NULL and constants are optimized out +SELECT count() FROM ( EXPLAIN QUERY TREE + SELECT * FROM t1 JOIN t2 ON ( (t1.x = t2.x) AND (t1.x IS NOT NULL) AND true AND (t2.x IS NOT NULL) ) +) WHERE explain like '%CONSTANT%' OR explain ilike '%is%null%'; + +SELECT count() FROM ( EXPLAIN QUERY TREE + SELECT * FROM t1 JOIN t2 ON ( (t1.x = t2.x) AND true ) +) WHERE explain like '%CONSTANT%' OR explain ilike '%is%null%'; + +-- this is not optimized out +SELECT count() FROM ( EXPLAIN QUERY TREE + SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.x IS NULL AND t1.y <=> t2.y AND t2.x IS NULL) +) WHERE explain like '%CONSTANT%' OR explain ilike '%is%null%'; + +SELECT count() FROM ( EXPLAIN QUERY TREE + SELECT * FROM t1 JOIN t2 ON t1.x <=> t2.x AND (t1.x = t1.y OR t1.x IS NULL AND t1.y IS NULL) +) WHERE explain like '%CONSTANT%' OR explain ilike '%is%null%'; + DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t1n; +DROP TABLE IF EXISTS t2n; diff --git a/tests/queries/0_stateless/02931_file_cluster.sh b/tests/queries/0_stateless/02931_file_cluster.sh index 8566e2ab08e..ebd3792e1dc 100755 --- a/tests/queries/0_stateless/02931_file_cluster.sh +++ b/tests/queries/0_stateless/02931_file_cluster.sh @@ -4,8 +4,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - mkdir -p "${USER_FILES_PATH}"/"${CLICKHOUSE_TEST_UNIQUE_NAME}"/ for i in {1..10} diff --git a/tests/queries/0_stateless/02933_change_cache_setting_without_restart.sh b/tests/queries/0_stateless/02933_change_cache_setting_without_restart.sh index ddad7a1904b..2e5a538007c 100755 --- a/tests/queries/0_stateless/02933_change_cache_setting_without_restart.sh +++ b/tests/queries/0_stateless/02933_change_cache_setting_without_restart.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage +# Tags: no-fasttest, no-parallel, no-object-storage CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -8,7 +8,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) disk_name="s3_cache_02933" $CLICKHOUSE_CLIENT --query "DESCRIBE FILESYSTEM CACHE '${disk_name}'" -config_path=/etc/clickhouse-server/config.d/storage_conf.xml +config_path=${CLICKHOUSE_CONFIG_DIR}/config.d/storage_conf.xml config_path_tmp=$config_path.tmp cat $config_path \ diff --git a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference index d00491fd7e5..6ed281c757a 100644 --- a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference +++ b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference @@ -1 +1,2 @@ 1 +1 diff --git a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh index 8a6904b6bd7..15f169d880f 100755 --- a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh +++ b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh @@ -9,8 +9,17 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh ${CLICKHOUSE_CLIENT} --query "CREATE DATABASE ${CLICKHOUSE_DATABASE}_db engine = Replicated('/clickhouse/databases/${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}_db', '{shard}', '{replica}')" + # Non-replicated engines are allowed ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test (id UInt64) ENGINE = MergeTree() ORDER BY id AS SELECT 1" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv (id UInt64) ENGINE = MergeTree() ORDER BY id POPULATE AS SELECT 1" + # Replicated storafes are forbidden -${CLICKHOUSE_CLIENT} --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test2', '1') ORDER BY id AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" + +# But it is allowed with the special setting +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" --database_replicated_allow_heavy_create=1 +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1" --database_replicated_allow_heavy_create=1 + ${CLICKHOUSE_CLIENT} --query "DROP DATABASE ${CLICKHOUSE_DATABASE}_db" diff --git a/tests/queries/0_stateless/02941_variant_type_1.reference b/tests/queries/0_stateless/02941_variant_type_1.reference index 8a6e77d4f6d..53e5a556821 100644 --- a/tests/queries/0_stateless/02941_variant_type_1.reference +++ b/tests/queries/0_stateless/02941_variant_type_1.reference @@ -91,42 +91,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 0 1 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -145,21 +145,21 @@ lc_str_2 [0] [0,1] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 @@ -256,42 +256,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N 0 -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -310,23 +310,23 @@ lc_str_2 [0] [] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 ----------------------------------------------------------------------------------------------------------- test3 insert @@ -421,42 +421,42 @@ lc_str_15 (0,0) (16,17) (0,0) -\N -\N -\N -\N +0 +0 +0 +0 4 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 10 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 16 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 5 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 11 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 17 -\N +0 [] [] [] @@ -475,23 +475,23 @@ lc_str_15 [] [] [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 6 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 12 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 18 ----------------------------------------------------------------------------------------------------------- MergeTree compact @@ -587,42 +587,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 0 1 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -641,21 +641,21 @@ lc_str_2 [0] [0,1] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 @@ -751,42 +751,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 0 1 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -805,21 +805,21 @@ lc_str_2 [0] [0,1] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 @@ -916,42 +916,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N 0 -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -970,23 +970,23 @@ lc_str_2 [0] [] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 ----------------------------------------------------------------------------------------------------------- test2 select @@ -1080,42 +1080,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N 0 -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -1134,23 +1134,23 @@ lc_str_2 [0] [] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 ----------------------------------------------------------------------------------------------------------- test3 insert @@ -1245,42 +1245,42 @@ lc_str_15 (0,0) (16,17) (0,0) -\N -\N -\N -\N +0 +0 +0 +0 4 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 10 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 16 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 5 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 11 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 17 -\N +0 [] [] [] @@ -1299,23 +1299,23 @@ lc_str_15 [] [] [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 6 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 12 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 18 ----------------------------------------------------------------------------------------------------------- test3 select @@ -1409,42 +1409,42 @@ lc_str_15 (0,0) (16,17) (0,0) -\N -\N -\N -\N +0 +0 +0 +0 4 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 10 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 16 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 5 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 11 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 17 -\N +0 [] [] [] @@ -1463,23 +1463,23 @@ lc_str_15 [] [] [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 6 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 12 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 18 ----------------------------------------------------------------------------------------------------------- MergeTree wide @@ -1575,42 +1575,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 0 1 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -1629,21 +1629,21 @@ lc_str_2 [0] [0,1] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 @@ -1739,42 +1739,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 0 1 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -1793,21 +1793,21 @@ lc_str_2 [0] [0,1] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 @@ -1904,42 +1904,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N 0 -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -1958,23 +1958,23 @@ lc_str_2 [0] [] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 ----------------------------------------------------------------------------------------------------------- test2 select @@ -2068,42 +2068,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N 0 -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -2122,23 +2122,23 @@ lc_str_2 [0] [] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 ----------------------------------------------------------------------------------------------------------- test3 insert @@ -2233,42 +2233,42 @@ lc_str_15 (0,0) (16,17) (0,0) -\N -\N -\N -\N +0 +0 +0 +0 4 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 10 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 16 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 5 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 11 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 17 -\N +0 [] [] [] @@ -2287,23 +2287,23 @@ lc_str_15 [] [] [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 6 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 12 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 18 ----------------------------------------------------------------------------------------------------------- test3 select @@ -2397,42 +2397,42 @@ lc_str_15 (0,0) (16,17) (0,0) -\N -\N -\N -\N +0 +0 +0 +0 4 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 10 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 16 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 5 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 11 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 17 -\N +0 [] [] [] @@ -2451,22 +2451,22 @@ lc_str_15 [] [] [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 6 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 12 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 18 ----------------------------------------------------------------------------------------------------------- diff --git a/tests/queries/0_stateless/02941_variant_type_2.reference b/tests/queries/0_stateless/02941_variant_type_2.reference index 20a5176cb5e..1d9126aa230 100644 --- a/tests/queries/0_stateless/02941_variant_type_2.reference +++ b/tests/queries/0_stateless/02941_variant_type_2.reference @@ -6,9 +6,6 @@ test4 select 100000 100000 100000 -100000 -100000 -100000 MergeTree compact test4 insert test4 select @@ -17,18 +14,12 @@ test4 select 100000 100000 100000 -100000 -100000 -100000 test4 select 500000 100000 100000 100000 100000 -100000 -100000 -100000 MergeTree wide test4 insert test4 select @@ -37,15 +28,9 @@ test4 select 100000 100000 100000 -100000 -100000 -100000 test4 select 500000 100000 100000 100000 100000 -100000 -100000 -100000 diff --git a/tests/queries/0_stateless/02941_variant_type_2.sh b/tests/queries/0_stateless/02941_variant_type_2.sh index f43cd2bb0d6..8453bce98dc 100755 --- a/tests/queries/0_stateless/02941_variant_type_2.sh +++ b/tests/queries/0_stateless/02941_variant_type_2.sh @@ -33,13 +33,10 @@ select v.\`LowCardinality(String)\` from test format Null; select count() from test where isNotNull(v.\`LowCardinality(String)\`); select v.\`Tuple(a UInt32, b UInt32)\` from test format Null; select v.\`Tuple(a UInt32, b UInt32)\`.a from test format Null; -select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.a); select v.\`Tuple(a UInt32, b UInt32)\`.b from test format Null; -select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.b); select v.\`Array(UInt64)\` from test format Null; select count() from test where not empty(v.\`Array(UInt64)\`); -select v.\`Array(UInt64)\`.size0 from test format Null; -select count() from test where isNotNull(v.\`Array(UInt64)\`.size0);" +select v.\`Array(UInt64)\`.size0 from test format Null;" } function run() diff --git a/tests/queries/0_stateless/02941_variant_type_3.reference b/tests/queries/0_stateless/02941_variant_type_3.reference index 1ccdb3acdff..d28aa7a594b 100644 --- a/tests/queries/0_stateless/02941_variant_type_3.reference +++ b/tests/queries/0_stateless/02941_variant_type_3.reference @@ -6,9 +6,6 @@ test5 select 100000 100000 100000 -100000 -100000 -100000 MergeTree compact test5 insert test5 select @@ -17,18 +14,12 @@ test5 select 100000 100000 100000 -100000 -100000 -100000 test5 select 500000 100000 100000 100000 100000 -100000 -100000 -100000 MergeTree wide test5 insert test5 select @@ -37,15 +28,9 @@ test5 select 100000 100000 100000 -100000 -100000 -100000 test5 select 500000 100000 100000 100000 100000 -100000 -100000 -100000 diff --git a/tests/queries/0_stateless/02941_variant_type_3.sh b/tests/queries/0_stateless/02941_variant_type_3.sh index f4b2b304f56..990eb25b5be 100755 --- a/tests/queries/0_stateless/02941_variant_type_3.sh +++ b/tests/queries/0_stateless/02941_variant_type_3.sh @@ -35,13 +35,10 @@ select v.\`LowCardinality(String)\` from test format Null; select count() from test where isNotNull(v.\`LowCardinality(String)\`); select v.\`Tuple(a UInt32, b UInt32)\` from test format Null; select v.\`Tuple(a UInt32, b UInt32)\`.a from test format Null; -select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.a); select v.\`Tuple(a UInt32, b UInt32)\`.b from test format Null; -select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.b); select v.\`Array(UInt64)\` from test format Null; select count() from test where not empty(v.\`Array(UInt64)\`); -select v.\`Array(UInt64)\`.size0 from test format Null; -select count() from test where isNotNull(v.\`Array(UInt64)\`.size0);" +select v.\`Array(UInt64)\`.size0 from test format Null;" } function run() diff --git a/tests/queries/0_stateless/02941_variant_type_4.reference b/tests/queries/0_stateless/02941_variant_type_4.reference index e13d5820343..d1630b04347 100644 --- a/tests/queries/0_stateless/02941_variant_type_4.reference +++ b/tests/queries/0_stateless/02941_variant_type_4.reference @@ -6,9 +6,6 @@ test6 select 200000 200000 200000 -200000 -200000 -200000 ----------------------------------------------------------------------------------------------------------- MergeTree compact test6 insert @@ -18,9 +15,6 @@ test6 select 200000 200000 200000 -200000 -200000 -200000 ----------------------------------------------------------------------------------------------------------- test6 select 1000000 @@ -28,9 +22,6 @@ test6 select 200000 200000 200000 -200000 -200000 -200000 ----------------------------------------------------------------------------------------------------------- MergeTree wide test6 insert @@ -40,9 +31,6 @@ test6 select 200000 200000 200000 -200000 -200000 -200000 ----------------------------------------------------------------------------------------------------------- test6 select 1000000 @@ -50,7 +38,4 @@ test6 select 200000 200000 200000 -200000 -200000 -200000 ----------------------------------------------------------------------------------------------------------- diff --git a/tests/queries/0_stateless/02941_variant_type_4.sh b/tests/queries/0_stateless/02941_variant_type_4.sh index f9a16847864..b8f619694b0 100755 --- a/tests/queries/0_stateless/02941_variant_type_4.sh +++ b/tests/queries/0_stateless/02941_variant_type_4.sh @@ -29,13 +29,10 @@ function test6_select() select count() from test where isNotNull(v.\`LowCardinality(String)\`); select v.\`Tuple(a UInt32, b UInt32)\` from test format Null; select v.\`Tuple(a UInt32, b UInt32)\`.a from test format Null; - select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.a); select v.\`Tuple(a UInt32, b UInt32)\`.b from test format Null; - select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.b); select v.\`Array(UInt64)\` from test format Null; select count() from test where not empty(v.\`Array(UInt64)\`); - select v.\`Array(UInt64)\`.size0 from test format Null; - select count() from test where isNotNull(v.\`Array(UInt64)\`.size0);" + select v.\`Array(UInt64)\`.size0 from test format Null;" echo "-----------------------------------------------------------------------------------------------------------" } diff --git a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh index 2e344a6b6e5..cb099bb59ae 100755 --- a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh +++ b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: no-fasttest, no-parallel, no-object-storage, no-random-settings CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -24,7 +24,7 @@ $CLICKHOUSE_CLIENT --query "SELECT * FROM test FORMAT Null" $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE state = 'DOWNLOADED'" $CLICKHOUSE_CLIENT --query "SELECT sum(size) FROM system.filesystem_cache WHERE state = 'DOWNLOADED'" -config_path=/etc/clickhouse-server/config.d/storage_conf_02944.xml +config_path=${CLICKHOUSE_CONFIG_DIR}/config.d/storage_conf_02944.xml config_path_tmp=$config_path.tmp echo 'set max_size from 100 to 10' diff --git a/tests/queries/0_stateless/02944_variant_as_common_type.sql b/tests/queries/0_stateless/02944_variant_as_common_type.sql index 49ea5f2769c..b3b86427b06 100644 --- a/tests/queries/0_stateless/02944_variant_as_common_type.sql +++ b/tests/queries/0_stateless/02944_variant_as_common_type.sql @@ -71,4 +71,3 @@ select toTypeName(res), array([1, 2, 3], [[1, 2, 3]]) as res; select toTypeName(res), map('a', 1, 'b', 'str_1') as res; select toTypeName(res), map('a', 1, 'b', map('c', 2, 'd', 'str_1')) as res; select toTypeName(res), map('a', 1, 'b', [1, 2, 3], 'c', [[4, 5, 6]]) as res; - diff --git a/tests/queries/0_stateless/02950_dictionary_ssd_cache_short_circuit.sh b/tests/queries/0_stateless/02950_dictionary_ssd_cache_short_circuit.sh index a02bdd0a1d2..3d2fe5d664d 100755 --- a/tests/queries/0_stateless/02950_dictionary_ssd_cache_short_circuit.sh +++ b/tests/queries/0_stateless/02950_dictionary_ssd_cache_short_circuit.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - $CLICKHOUSE_CLIENT -n --query=" DROP DATABASE IF EXISTS 02950_database_for_ssd_cache_dictionary; CREATE DATABASE 02950_database_for_ssd_cache_dictionary; @@ -32,7 +30,7 @@ $CLICKHOUSE_CLIENT -n --query=" PRIMARY KEY id SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'source_table')) LIFETIME(MIN 1 MAX 1000) - LAYOUT(SSD_CACHE(BLOCK_SIZE 4096 FILE_SIZE 8192 PATH '$USER_FILES_PATH/0d')); + LAYOUT(SSD_CACHE(BLOCK_SIZE 4096 FILE_SIZE 8192 PATH '$CLICKHOUSE_USER_FILES/0d')); SELECT dictGetOrDefault('02950_database_for_ssd_cache_dictionary.ssd_cache_dictionary', ('v1', 'v2'), 0, (intDiv(1, id), intDiv(1, id))) FROM 02950_database_for_ssd_cache_dictionary.source_table; SELECT dictGetOrDefault('02950_database_for_ssd_cache_dictionary.ssd_cache_dictionary', 'v2', id+1, intDiv(NULL, id)) FROM 02950_database_for_ssd_cache_dictionary.source_table; diff --git a/tests/queries/0_stateless/02961_storage_config_volume_priority.sh b/tests/queries/0_stateless/02961_storage_config_volume_priority.sh index 4e085541a8d..145b921a750 100755 --- a/tests/queries/0_stateless/02961_storage_config_volume_priority.sh +++ b/tests/queries/0_stateless/02961_storage_config_volume_priority.sh @@ -15,7 +15,7 @@ WHERE policy_name = 'policy_02961' ORDER BY volume_priority ASC; " -config_path=/etc/clickhouse-server/config.d/storage_conf_02961.xml +config_path=${CLICKHOUSE_CONFIG_DIR}/config.d/storage_conf_02961.xml config_path_tmp=$config_path.tmp echo 'check non-unique values dont work' diff --git a/tests/queries/0_stateless/02962_system_sync_replica_lightweight_from_modifier.sh b/tests/queries/0_stateless/02962_system_sync_replica_lightweight_from_modifier.sh index b61be87411d..9ef271632d0 100755 --- a/tests/queries/0_stateless/02962_system_sync_replica_lightweight_from_modifier.sh +++ b/tests/queries/0_stateless/02962_system_sync_replica_lightweight_from_modifier.sh @@ -62,7 +62,7 @@ export -f sync_and_drop_replicas export -f optimize_thread export -f mutations_thread -TIMEOUT=60 +TIMEOUT=30 timeout $TIMEOUT bash -c insert_thread 2> /dev/null & timeout $TIMEOUT bash -c sync_and_drop_replicas 2> /dev/null & diff --git a/tests/queries/0_stateless/02967_prewhere_no_columns.reference b/tests/queries/0_stateless/02967_prewhere_no_columns.reference new file mode 100644 index 00000000000..df105254618 --- /dev/null +++ b/tests/queries/0_stateless/02967_prewhere_no_columns.reference @@ -0,0 +1,2 @@ +105 +105 diff --git a/tests/queries/0_stateless/02967_prewhere_no_columns.sql b/tests/queries/0_stateless/02967_prewhere_no_columns.sql new file mode 100644 index 00000000000..efcc952caa2 --- /dev/null +++ b/tests/queries/0_stateless/02967_prewhere_no_columns.sql @@ -0,0 +1,51 @@ +CREATE TABLE t_02967 +( + `key` Date, + `value` UInt16 +) +ENGINE = MergeTree +ORDER BY key +SETTINGS + index_granularity_bytes = 0 --8192 --, min_index_granularity_bytes = 2 + , index_granularity = 100 + , min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0 +-- +-- , min_bytes_for_wide_part = 2 +AS SELECT + number, + repeat(toString(number), 5) +FROM numbers(105.); + + + +-- Check with newly inserted data part. It's in-memory structured are filled at insert time. +SELECT + count(ignore(*)) +FROM t_02967 +PREWHERE CAST(ignore() + 1 as UInt8) +GROUP BY + ignore(65535, *), + ignore(255, 256, *) +SETTINGS + --send_logs_level='test', + max_threads=1; + + + +-- Reload part form disk to check that in-meory structures where properly serilaized-deserialized +DETACH TABLE t_02967; +ATTACH TABLE t_02967; + + +SELECT + count(ignore(*)) +FROM t_02967 +PREWHERE CAST(ignore() + 1 as UInt8) +GROUP BY + ignore(65535, *), + ignore(255, 256, *) +SETTINGS + --send_logs_level='test', + max_threads=1; + +DROP TABLE t_02967; diff --git a/tests/queries/0_stateless/02968_file_log_multiple_read.sh b/tests/queries/0_stateless/02968_file_log_multiple_read.sh index 199893a9428..d9bae05270a 100755 --- a/tests/queries/0_stateless/02968_file_log_multiple_read.sh +++ b/tests/queries/0_stateless/02968_file_log_multiple_read.sh @@ -4,12 +4,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -# Data preparation. -# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -logs_dir=${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME} +logs_dir=${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME} rm -rf ${logs_dir} diff --git a/tests/queries/0_stateless/02971_analyzer_remote_id.sh b/tests/queries/0_stateless/02971_analyzer_remote_id.sh index 463e4cc1f0c..ab3c5292529 100755 --- a/tests/queries/0_stateless/02971_analyzer_remote_id.sh +++ b/tests/queries/0_stateless/02971_analyzer_remote_id.sh @@ -1,15 +1,9 @@ #!/usr/bin/env bash -# Tags: no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --query="DROP DATABASE IF EXISTS test_02971" -${CLICKHOUSE_CLIENT} --query="CREATE DATABASE test_02971" - -${CLICKHOUSE_CLIENT} --query="CREATE TABLE test_02971.x ENGINE = MergeTree() ORDER BY number AS SELECT * FROM numbers(2)" -${CLICKHOUSE_LOCAL} --query="SELECT count() FROM remote('127.0.0.{2,3}', 'test_02971.x') SETTINGS allow_experimental_analyzer = 1" 2>&1 \ +${CLICKHOUSE_CLIENT} --query="CREATE TABLE ${CLICKHOUSE_DATABASE}.x ENGINE = MergeTree() ORDER BY number AS SELECT * FROM numbers(2)" +${CLICKHOUSE_LOCAL} --query="SELECT count() FROM remote('127.0.0.{2,3}', '${CLICKHOUSE_DATABASE}.x') SETTINGS allow_experimental_analyzer = 1" 2>&1 \ | grep -av "ASan doesn't fully support makecontext/swapcontext functions" - -${CLICKHOUSE_CLIENT} --query="DROP DATABASE IF EXISTS test_02971" diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference new file mode 100644 index 00000000000..3389ea44074 --- /dev/null +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference @@ -0,0 +1,5 @@ +SELECT + __table1.`arr.size0` AS `length(arr)`, + __table1.`n.null` AS `isNull(n)` +FROM default.t_column_names AS __table1 +{"length(arr)":"3","isNull(n)":0} diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql new file mode 100644 index 00000000000..48e5232d18b --- /dev/null +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql @@ -0,0 +1,13 @@ +DROP TABLE IF EXISTS t_column_names; + +CREATE TABLE t_column_names (arr Array(UInt64), n Nullable(String)) ENGINE = Memory; + +INSERT INTO t_column_names VALUES ([1, 2, 3], 'foo'); + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; + +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr), isNull(n) FROM t_column_names; +SELECT length(arr), isNull(n) FROM t_column_names FORMAT JSONEachRow; + +DROP TABLE t_column_names; diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference new file mode 100644 index 00000000000..9488291c8ff --- /dev/null +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference @@ -0,0 +1,12 @@ +SELECT __table1.`m.size0` AS `length(m)` +FROM default.t_func_to_subcolumns_map AS __table1 +2 +1 +SELECT __table1.`m.size0` = 0 AS `empty(m)` +FROM default.t_func_to_subcolumns_map AS __table1 +0 +0 +SELECT __table1.`m.size0` != 0 AS `notEmpty(m)` +FROM default.t_func_to_subcolumns_map AS __table1 +1 +1 diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql new file mode 100644 index 00000000000..e8a752a82d5 --- /dev/null +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS t_func_to_subcolumns_map; + +CREATE TABLE t_func_to_subcolumns_map (id UInt64, m Map(String, UInt64)) ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_func_to_subcolumns_map VALUES (1, map('aaa', 1, 'bbb', 2)) (2, map('ccc', 3)); + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; + +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(m) FROM t_func_to_subcolumns_map; +SELECT length(m) FROM t_func_to_subcolumns_map; + +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT empty(m) FROM t_func_to_subcolumns_map; +SELECT empty(m) FROM t_func_to_subcolumns_map; + +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT notEmpty(m) FROM t_func_to_subcolumns_map; +SELECT notEmpty(m) FROM t_func_to_subcolumns_map; + +DROP TABLE t_func_to_subcolumns_map; diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.reference b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.reference new file mode 100644 index 00000000000..04616738a15 --- /dev/null +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.reference @@ -0,0 +1,4 @@ +SELECT __table1.`v.String` AS `variantElement(v, \'String\')` +FROM default.t_func_to_subcolumns_variant AS __table1 +foo +\N diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.sql b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.sql new file mode 100644 index 00000000000..511bcc44514 --- /dev/null +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.sql @@ -0,0 +1,15 @@ +DROP TABLE IF EXISTS t_func_to_subcolumns_variant; + +SET allow_experimental_variant_type = 1; + +CREATE TABLE t_func_to_subcolumns_variant (id UInt64, v Variant(String, UInt64)) ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_func_to_subcolumns_variant VALUES (1, 'foo') (2, 111); + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; + +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT variantElement(v, 'String') FROM t_func_to_subcolumns_variant; +SELECT variantElement(v, 'String') FROM t_func_to_subcolumns_variant; + +DROP TABLE t_func_to_subcolumns_variant; diff --git a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh index 14f28f1ba4a..6edac86be5b 100755 --- a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh +++ b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh @@ -4,8 +4,6 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -# Data preparation step -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') UNIX_ENDINGS="${CLICKHOUSE_TEST_UNIQUE_NAME}_data_without_crlf.tsv" DOS_ENDINGS="${CLICKHOUSE_TEST_UNIQUE_NAME}_data_with_crlf.tsv" DATA_FILE_UNIX_ENDINGS="${USER_FILES_PATH:?}/${UNIX_ENDINGS}" diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.reference b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.reference index 531163e1d84..3135f2d01e1 100644 --- a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.reference +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.reference @@ -3,28 +3,28 @@ data after ATTACH 1 Files before DETACH TABLE all_1_1_0 -backups/ordinary_default/data/ordinary_default/data/all_1_1_0: -primary.cidx -serialization.json -metadata_version.txt -default_compression_codec.txt +/backups/ordinary_default/data/ordinary_default/data/all_1_1_0: +checksums.txt +columns.txt +count.txt data.bin data.cmrk3 -count.txt -columns.txt -checksums.txt +default_compression_codec.txt +metadata_version.txt +primary.cidx +serialization.json Files after DETACH TABLE all_1_1_0 -backups/ordinary_default/data/ordinary_default/data/all_1_1_0: -primary.cidx -serialization.json -metadata_version.txt -default_compression_codec.txt +/backups/ordinary_default/data/ordinary_default/data/all_1_1_0: +checksums.txt +columns.txt +count.txt data.bin data.cmrk3 -count.txt -columns.txt -checksums.txt +default_compression_codec.txt +metadata_version.txt +primary.cidx +serialization.json diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh index 12d08159012..d543f7195a9 100755 --- a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh @@ -49,11 +49,11 @@ path=$($CLICKHOUSE_CLIENT -q "SELECT replace(data_paths[1], 's3_plain', '') FROM path=${path%/} echo "Files before DETACH TABLE" -clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "${path:?}" | tail -n+2 +clickhouse-disks -C "$config" --disk s3_plain_disk --query "list --recursive $path" | tail -n+2 $CLICKHOUSE_CLIENT -q "detach table data" echo "Files after DETACH TABLE" -clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "$path" | tail -n+2 +clickhouse-disks -C "$config" --disk s3_plain_disk --query "list --recursive $path" | tail -n+2 # metadata file is left $CLICKHOUSE_CLIENT --force_remove_data_recursively_on_drop=1 -q "drop database if exists $CLICKHOUSE_DATABASE" diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference index 1e191b719a5..a2dd196083e 100644 --- a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference @@ -3,28 +3,28 @@ data after ATTACH 1 Files before DETACH TABLE all_X_X_X -backups/ordinary_default/data/ordinary_default/data_read/all_X_X_X: -primary.cidx -serialization.json -metadata_version.txt -default_compression_codec.txt +/backups/ordinary_default/data/ordinary_default/data_read/all_X_X_X: +checksums.txt +columns.txt +count.txt data.bin data.cmrk3 -count.txt -columns.txt -checksums.txt +default_compression_codec.txt +metadata_version.txt +primary.cidx +serialization.json Files after DETACH TABLE all_X_X_X -backups/ordinary_default/data/ordinary_default/data_read/all_X_X_X: -primary.cidx -serialization.json -metadata_version.txt -default_compression_codec.txt +/backups/ordinary_default/data/ordinary_default/data_read/all_X_X_X: +checksums.txt +columns.txt +count.txt data.bin data.cmrk3 -count.txt -columns.txt -checksums.txt +default_compression_codec.txt +metadata_version.txt +primary.cidx +serialization.json diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh index b079e67a000..eec05c81344 100755 --- a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh @@ -55,14 +55,14 @@ path=${path%/} echo "Files before DETACH TABLE" # sed to match any part, since in case of fault injection part name may not be all_0_0_0 but all_1_1_0 -clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "${path:?}" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g' +clickhouse-disks -C "$config" --disk s3_plain_disk --query "list --recursive $path" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g' $CLICKHOUSE_CLIENT -nm -q " detach table data_read; detach table data_write; " echo "Files after DETACH TABLE" -clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "$path" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g' +clickhouse-disks -C "$config" --disk s3_plain_disk --query "list --recursive $path" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g' # metadata file is left $CLICKHOUSE_CLIENT --force_remove_data_recursively_on_drop=1 -q "drop database if exists $CLICKHOUSE_DATABASE" diff --git a/tests/queries/0_stateless/02982_aggregation_states_destruction.reference b/tests/queries/0_stateless/02982_aggregation_states_destruction.reference index 72749c905a3..d00491fd7e5 100644 --- a/tests/queries/0_stateless/02982_aggregation_states_destruction.reference +++ b/tests/queries/0_stateless/02982_aggregation_states_destruction.reference @@ -1 +1 @@ -1 1 1 +1 diff --git a/tests/queries/0_stateless/02982_aggregation_states_destruction.sh b/tests/queries/0_stateless/02982_aggregation_states_destruction.sh index 263a4535c0e..84183606d48 100755 --- a/tests/queries/0_stateless/02982_aggregation_states_destruction.sh +++ b/tests/queries/0_stateless/02982_aggregation_states_destruction.sh @@ -11,4 +11,4 @@ $CLICKHOUSE_CLIENT --query_id $query_id --log_query_threads 1 --query="select nu $CLICKHOUSE_CLIENT -q "system flush logs;" -$CLICKHOUSE_CLIENT -q "select count() > 0, (countIf(thread_name = 'AggregDestruct') as aggs) > 0, aggs > 1 from system.query_thread_log where query_id = '$query_id' and current_database = currentDatabase();" +$CLICKHOUSE_CLIENT -q "select count() > 0 from system.query_thread_log where query_id = '$query_id' and current_database = currentDatabase() and thread_name = 'AggregDestruct';" diff --git a/tests/queries/0_stateless/02982_changeDate.reference b/tests/queries/0_stateless/02982_changeDate.reference new file mode 100644 index 00000000000..4a7f093ca2b --- /dev/null +++ b/tests/queries/0_stateless/02982_changeDate.reference @@ -0,0 +1,169 @@ +Negative tests +changeYear +-- Date +2001-01-01 +1970-01-01 +1970-01-01 +2149-06-06 +-- Date32 +2001-01-01 +1900-01-01 +1900-01-01 +2299-12-31 +-- DateTime +2001-01-01 11:22:33 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +2106-02-07 07:28:15 +-- DateTime64 +2001-01-01 11:22:33.4444 +1900-01-01 00:00:00.0000 +1900-01-01 00:00:00.0000 +2299-12-31 23:59:59.9999 +changeMonth +-- Date +2000-01-01 +2000-02-01 +2000-12-01 +1970-01-01 +1970-01-01 +1970-01-01 +-- Date32 +2000-01-01 +2000-02-01 +2000-12-01 +1900-01-01 +1900-01-01 +1900-01-01 +-- DateTime +2000-01-01 11:22:33 +2000-02-01 11:22:33 +2000-12-01 11:22:33 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +-- DateTime64 +2000-01-01 11:22:33.4444 +2000-02-01 11:22:33.4444 +2000-12-01 11:22:33.4444 +1900-01-01 00:00:00.0000 +1900-01-01 00:00:00.0000 +1900-01-01 00:00:00.0000 +changeDay +-- Date +2000-01-01 +2000-01-02 +2000-01-31 +1970-01-01 +1970-01-01 +1970-01-01 +-- Date32 +2000-01-01 +2000-01-02 +2000-01-31 +1900-01-01 +1900-01-01 +1900-01-01 +-- DateTime +2000-01-01 11:22:33 +2000-01-02 11:22:33 +2000-01-31 11:22:33 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +-- DateTime64 +2000-01-01 11:22:33.4444 +2000-01-02 11:22:33.4444 +2000-01-31 11:22:33.4444 +1900-01-01 00:00:00.0000 +1900-01-01 00:00:00.0000 +1900-01-01 00:00:00.0000 +-- Special case: change to 29 Feb in a leap year +2000-02-29 +2000-02-29 +2000-02-29 11:22:33 +2000-02-29 11:22:33.4444 +changeHour +-- Date +2000-01-01 00:00:00 +2000-01-01 02:00:00 +2000-01-01 23:00:00 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +-- Date32 +2000-01-01 00:00:00.000 +2000-01-01 02:00:00.000 +2000-01-01 23:00:00.000 +1900-01-01 00:00:00.000 +1900-01-01 00:00:00.000 +-- DateTime +2000-01-01 00:22:33 +2000-01-01 02:22:33 +2000-01-01 23:22:33 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +-- DateTime64 +2000-01-01 00:22:33.4444 +2000-01-01 02:22:33.4444 +2000-01-01 23:22:33.4444 +1900-01-01 00:00:00.0000 +1900-01-01 00:00:00.0000 +-- With different timezone +1970-01-01 07:00:00 +1970-01-01 07:00:00 +changeMinute +-- Date +2000-01-01 00:00:00 +2000-01-01 00:02:00 +2000-01-01 00:59:00 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +-- Date32 +2000-01-01 00:00:00.000 +2000-01-01 00:02:00.000 +2000-01-01 00:59:00.000 +1900-01-01 00:00:00.000 +1900-01-01 00:00:00.000 +-- DateTime +2000-01-01 11:00:33 +2000-01-01 11:02:33 +2000-01-01 11:59:33 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +-- DateTime64 +2000-01-01 11:00:33.4444 +2000-01-01 11:02:33.4444 +2000-01-01 11:59:33.4444 +1900-01-01 00:00:00.0000 +1900-01-01 00:00:00.0000 +-- With different timezone +1970-01-01 07:00:00 +1970-01-01 07:00:00 +changeSecond +-- Date +2000-01-01 00:00:00 +2000-01-01 00:00:02 +2000-01-01 00:00:59 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +-- Date32 +2000-01-01 00:00:00.000 +2000-01-01 00:00:02.000 +2000-01-01 00:00:59.000 +1900-01-01 00:00:00.000 +1900-01-01 00:00:00.000 +-- DateTime +2000-01-01 11:22:00 +2000-01-01 11:22:02 +2000-01-01 11:22:59 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +-- DateTime64 +2000-01-01 11:22:00.4444 +2000-01-01 11:22:02.4444 +2000-01-01 11:22:59.4444 +1900-01-01 00:00:00.0000 +1900-01-01 00:00:00.0000 +-- With different timezone +1970-01-01 07:00:00 +1970-01-01 07:00:00 diff --git a/tests/queries/0_stateless/02982_changeDate.sql b/tests/queries/0_stateless/02982_changeDate.sql new file mode 100644 index 00000000000..2bc9aa95569 --- /dev/null +++ b/tests/queries/0_stateless/02982_changeDate.sql @@ -0,0 +1,185 @@ +SELECT 'Negative tests'; +-- as changeYear, changeMonth, changeDay, changeMinute, changeSecond share the same implementation, just testing one of them +SELECT changeYear(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT changeYear(toDate('2000-01-01')); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT changeYear(toDate('2000-01-01'), 2000, 1); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT changeYear(1999, 2000); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT changeYear(toDate('2000-01-01'), 'abc'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT changeYear(toDate('2000-01-01'), 1.5); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +-- Disable timezone randomization +SET session_timezone='CET'; + +SELECT 'changeYear'; +SELECT '-- Date'; +SELECT changeYear(toDate('2000-01-01'), 2001); +SELECT changeYear(toDate('2000-01-01'), 1800); -- out-of-bounds +SELECT changeYear(toDate('2000-01-01'), -5000); -- out-of-bounds +SELECT changeYear(toDate('2000-01-01'), 2500); -- out-of-bounds +SELECT '-- Date32'; +SELECT changeYear(toDate32('2000-01-01'), 2001); +SELECT changeYear(toDate32('2000-01-01'), 1800); -- out-of-bounds +SELECT changeYear(toDate32('2000-01-01'), -5000); -- out-of-bounds +SELECT changeYear(toDate32('2000-01-01'), 2500); -- out-of-bounds +SELECT '-- DateTime'; +SELECT changeYear(toDateTime('2000-01-01 11:22:33'), 2001); +SELECT changeYear(toDateTime('2000-01-01 11:22:33'), 1800); -- out-of-bounds +SELECT changeYear(toDateTime('2000-01-01 11:22:33'), -5000); -- out-of-bounds +SELECT changeYear(toDateTime('2000-01-01 11:22:33'), 2500); -- out-of-bounds +SELECT '-- DateTime64'; +SELECT changeYear(toDateTime64('2000-01-01 11:22:33.4444', 4), 2001); +SELECT changeYear(toDateTime64('2000-01-01 11:22:33.4444', 4), 1800); -- out-of-bounds +SELECT changeYear(toDateTime64('2000-01-01 11:22:33.4444', 4), -5000); -- out-of-bounds +SELECT changeYear(toDateTime64('2000-01-01 11:22:33.4444', 4), 2500); -- out-of-bounds + +SELECT 'changeMonth'; +SELECT '-- Date'; +SELECT changeMonth(toDate('2000-01-01'), 1); +SELECT changeMonth(toDate('2000-01-01'), 2); +SELECT changeMonth(toDate('2000-01-01'), 12); +SELECT changeMonth(toDate('2000-01-01'), 0); -- out-of-bounds +SELECT changeMonth(toDate('2000-01-01'), -1); -- out-of-bounds +SELECT changeMonth(toDate('2000-01-01'), 13); -- out-of-bounds +SELECT '-- Date32'; +SELECT changeMonth(toDate32('2000-01-01'), 1); +SELECT changeMonth(toDate32('2000-01-01'), 2); +SELECT changeMonth(toDate32('2000-01-01'), 12); +SELECT changeMonth(toDate32('2000-01-01'), 0); -- out-of-bounds +SELECT changeMonth(toDate32('2000-01-01'), -1); -- out-of-bounds +SELECT changeMonth(toDate32('2000-01-01'), 13); -- out-of-bounds +SELECT '-- DateTime'; +SELECT changeMonth(toDateTime('2000-01-01 11:22:33'), 1); +SELECT changeMonth(toDateTime('2000-01-01 11:22:33'), 2); +SELECT changeMonth(toDateTime('2000-01-01 11:22:33'), 12); +SELECT changeMonth(toDateTime('2000-01-01 11:22:33'), 0); -- out-of-bounds +SELECT changeMonth(toDateTime('2000-01-01 11:22:33'), -1); -- out-of-bounds +SELECT changeMonth(toDateTime('2000-01-01 11:22:33'), 13); -- out-of-bounds +SELECT '-- DateTime64'; +SELECT changeMonth(toDateTime64('2000-01-01 11:22:33.4444', 4), 1); +SELECT changeMonth(toDateTime64('2000-01-01 11:22:33.4444', 4), 2); +SELECT changeMonth(toDateTime64('2000-01-01 11:22:33.4444', 4), 12); +SELECT changeMonth(toDateTime64('2000-01-01 11:22:33.4444', 4), 0); -- out-of-bounds +SELECT changeMonth(toDateTime64('2000-01-01 11:22:33.4444', 4), -1); -- out-of-bounds +SELECT changeMonth(toDateTime64('2000-01-01 11:22:33.4444', 4), 13); -- out-of-bounds + +SELECT 'changeDay'; +SELECT '-- Date'; +SELECT changeDay(toDate('2000-01-01'), 1); +SELECT changeDay(toDate('2000-01-01'), 2); +SELECT changeDay(toDate('2000-01-01'), 31); +SELECT changeDay(toDate('2000-01-01'), 0); -- out-of-bounds +SELECT changeDay(toDate('2000-01-01'), -1); -- out-of-bounds +SELECT changeDay(toDate('2000-01-01'), 32); -- out-of-bounds +SELECT '-- Date32'; +SELECT changeDay(toDate32('2000-01-01'), 1); +SELECT changeDay(toDate32('2000-01-01'), 2); +SELECT changeDay(toDate32('2000-01-01'), 31); +SELECT changeDay(toDate32('2000-01-01'), 0); -- out-of-bounds +SELECT changeDay(toDate32('2000-01-01'), -1); -- out-of-bounds +SELECT changeDay(toDate32('2000-01-01'), 32); -- out-of-bounds +SELECT '-- DateTime'; +SELECT changeDay(toDateTime('2000-01-01 11:22:33'), 1); +SELECT changeDay(toDateTime('2000-01-01 11:22:33'), 2); +SELECT changeDay(toDateTime('2000-01-01 11:22:33'), 31); +SELECT changeDay(toDateTime('2000-01-01 11:22:33'), 0); -- out-of-bounds +SELECT changeDay(toDateTime('2000-01-01 11:22:33'), -1); -- out-of-bounds +SELECT changeDay(toDateTime('2000-01-01 11:22:33'), 32); -- out-of-bounds +SELECT '-- DateTime64'; +SELECT changeDay(toDateTime64('2000-01-01 11:22:33.4444', 4), 1); +SELECT changeDay(toDateTime64('2000-01-01 11:22:33.4444', 4), 2); +SELECT changeDay(toDateTime64('2000-01-01 11:22:33.4444', 4), 31); +SELECT changeDay(toDateTime64('2000-01-01 11:22:33.4444', 4), 0); -- out-of-bounds +SELECT changeDay(toDateTime64('2000-01-01 11:22:33.4444', 4), -1); -- out-of-bounds +SELECT changeDay(toDateTime64('2000-01-01 11:22:33.4444', 4), 32); -- out-of-bounds +SELECT '-- Special case: change to 29 Feb in a leap year'; +SELECT changeDay(toDate('2000-02-28'), 29); +SELECT changeDay(toDate32('2000-02-01'), 29); +SELECT changeDay(toDateTime('2000-02-01 11:22:33'), 29); +SELECT changeDay(toDateTime64('2000-02-01 11:22:33.4444', 4), 29); + +SELECT 'changeHour'; +SELECT '-- Date'; +SELECT changeHour(toDate('2000-01-01'), 0); +SELECT changeHour(toDate('2000-01-01'), 2); +SELECT changeHour(toDate('2000-01-01'), 23); +SELECT changeHour(toDate('2000-01-01'), -1); -- out-of-bounds +SELECT changeHour(toDate('2000-01-01'), 24); -- out-of-bounds +SELECT '-- Date32'; +SELECT changeHour(toDate32('2000-01-01'), 0); +SELECT changeHour(toDate32('2000-01-01'), 2); +SELECT changeHour(toDate32('2000-01-01'), 23); +SELECT changeHour(toDate32('2000-01-01'), -1); -- out-of-bounds +SELECT changeHour(toDate32('2000-01-01'), 24); -- out-of-bounds +SELECT '-- DateTime'; +SELECT changeHour(toDateTime('2000-01-01 11:22:33'), 0); +SELECT changeHour(toDateTime('2000-01-01 11:22:33'), 2); +SELECT changeHour(toDateTime('2000-01-01 11:22:33'), 23); +SELECT changeHour(toDateTime('2000-01-01 11:22:33'), -1); -- out-of-bounds +SELECT changeHour(toDateTime('2000-01-01 11:22:33'), 24); -- out-of-bounds +SELECT '-- DateTime64'; +SELECT changeHour(toDateTime64('2000-01-01 11:22:33.4444', 4), 0); +SELECT changeHour(toDateTime64('2000-01-01 11:22:33.4444', 4), 2); +SELECT changeHour(toDateTime64('2000-01-01 11:22:33.4444', 4), 23); +SELECT changeHour(toDateTime64('2000-01-01 11:22:33.4444', 4), -1); -- out-of-bounds +SELECT changeHour(toDateTime64('2000-01-01 11:22:33.4444', 4), 24); -- out-of-bounds +SELECT '-- With different timezone'; +SELECT changeHour(toDate('2000-01-01'), -1) SETTINGS session_timezone = 'Asia/Novosibirsk'; +SELECT changeHour(toDate('2000-01-01'), 24) SETTINGS session_timezone = 'Asia/Novosibirsk'; + +SELECT 'changeMinute'; +SELECT '-- Date'; +SELECT changeMinute(toDate('2000-01-01'), 0); +SELECT changeMinute(toDate('2000-01-01'), 2); +SELECT changeMinute(toDate('2000-01-01'), 59); +SELECT changeMinute(toDate('2000-01-01'), -1); -- out-of-bounds +SELECT changeMinute(toDate('2000-01-01'), 60); -- out-of-bounds +SELECT '-- Date32'; +SELECT changeMinute(toDate32('2000-01-01'), 0); +SELECT changeMinute(toDate32('2000-01-01'), 2); +SELECT changeMinute(toDate32('2000-01-01'), 59); +SELECT changeMinute(toDate32('2000-01-01'), -1); -- out-of-bounds +SELECT changeMinute(toDate32('2000-01-01'), 60); -- out-of-bounds +SELECT '-- DateTime'; +SELECT changeMinute(toDateTime('2000-01-01 11:22:33'), 0); +SELECT changeMinute(toDateTime('2000-01-01 11:22:33'), 2); +SELECT changeMinute(toDateTime('2000-01-01 11:22:33'), 59); +SELECT changeMinute(toDateTime('2000-01-01 11:22:33'), -1); -- out-of-bounds +SELECT changeMinute(toDateTime('2000-01-01 11:22:33'), 60); -- out-of-bounds +SELECT '-- DateTime64'; +SELECT changeMinute(toDateTime64('2000-01-01 11:22:33.4444', 4), 0); +SELECT changeMinute(toDateTime64('2000-01-01 11:22:33.4444', 4), 2); +SELECT changeMinute(toDateTime64('2000-01-01 11:22:33.4444', 4), 59); +SELECT changeMinute(toDateTime64('2000-01-01 11:22:33.4444', 4), -1); -- out-of-bounds +SELECT changeMinute(toDateTime64('2000-01-01 11:22:33.4444', 4), 60); -- out-of-bounds +SELECT '-- With different timezone'; +SELECT changeMinute(toDate('2000-01-01'), -1) SETTINGS session_timezone = 'Asia/Novosibirsk'; +SELECT changeMinute(toDate('2000-01-01'), 60) SETTINGS session_timezone = 'Asia/Novosibirsk'; + +SELECT 'changeSecond'; +SELECT '-- Date'; +SELECT changeSecond(toDate('2000-01-01'), 0); +SELECT changeSecond(toDate('2000-01-01'), 2); +SELECT changeSecond(toDate('2000-01-01'), 59); +SELECT changeSecond(toDate('2000-01-01'), -1); -- out-of-bounds +SELECT changeSecond(toDate('2000-01-01'), 60); -- out-of-bounds +SELECT '-- Date32'; +SELECT changeSecond(toDate32('2000-01-01'), 0); +SELECT changeSecond(toDate32('2000-01-01'), 2); +SELECT changeSecond(toDate32('2000-01-01'), 59); +SELECT changeSecond(toDate32('2000-01-01'), -1); -- out-of-bounds +SELECT changeSecond(toDate32('2000-01-01'), 60); -- out-of-bounds +SELECT '-- DateTime'; +SELECT changeSecond(toDateTime('2000-01-01 11:22:33'), 0); +SELECT changeSecond(toDateTime('2000-01-01 11:22:33'), 2); +SELECT changeSecond(toDateTime('2000-01-01 11:22:33'), 59); +SELECT changeSecond(toDateTime('2000-01-01 11:22:33'), -1); -- out-of-bounds +SELECT changeSecond(toDateTime('2000-01-01 11:22:33'), 60); -- out-of-bounds +SELECT '-- DateTime64'; +SELECT changeSecond(toDateTime64('2000-01-01 11:22:33.4444', 4), 0); +SELECT changeSecond(toDateTime64('2000-01-01 11:22:33.4444', 4), 2); +SELECT changeSecond(toDateTime64('2000-01-01 11:22:33.4444', 4), 59); +SELECT changeSecond(toDateTime64('2000-01-01 11:22:33.4444', 4), -1); -- out-of-bounds +SELECT changeSecond(toDateTime64('2000-01-01 11:22:33.4444', 4), 60); -- out-of-bounds +SELECT '-- With different timezone'; +SELECT changeSecond(toDate('2000-01-01'), -1) SETTINGS session_timezone = 'Asia/Novosibirsk'; +SELECT changeSecond(toDate('2000-01-01'), 60) SETTINGS session_timezone = 'Asia/Novosibirsk'; diff --git a/tests/queries/0_stateless/02984_form_format.sh b/tests/queries/0_stateless/02984_form_format.sh index ce5feb60130..471b48e0f68 100755 --- a/tests/queries/0_stateless/02984_form_format.sh +++ b/tests/queries/0_stateless/02984_form_format.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Test setup -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') FILE_NAME="data.tmp" FORM_DATA="${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/${FILE_NAME}" mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ @@ -30,4 +28,4 @@ touch $FORM_DATA echo -ne "c.e=ls7xfkpm&c.tti.m=raf&rt.start=navigation&rt.bmr=390%2C11%2C10&rt.tstart=1707076768666&rt.bstart=1707076769091&rt.blstart=1707076769056&rt.end=1707076769078&t_resp=296&t_page=116&t_done=412&t_other=boomerang%7C6%2Cboomr_fb%7C425%2Cboomr_ld%7C390%2Cboomr_lat%7C35&rt.tt=2685&rt.obo=0&pt.fcp=407&nt_nav_st=1707076768666&nt_dns_st=1707076768683&nt_dns_end=1707076768684&nt_con_st=1707076768684&nt_con_end=1707076768850&nt_req_st=1707076768850&nt_res_st=1707076768962&nt_res_end=1707076768962&nt_domloading=1707076769040&nt_domint=1707076769066&nt_domcontloaded_st=1707076769067&nt_domcontloaded_end=1707076769068&nt_domcomp=1707076769069&nt_load_st=1707076769069&nt_load_end=1707076769078&nt_unload_st=1707076769040&nt_unload_end=1707076769041&nt_ssl_st=1707076768788&nt_enc_size=3209&nt_dec_size=10093&nt_trn_size=3940&nt_protocol=h2&nt_red_cnt=0&nt_nav_type=1&restiming=%7B%22https%3A%2F%2Fwww.basicrum.com%2F%22%3A%7B%22publications%2F%22%3A%226%2C88%2C88%2C54%2C54%2C3e%2Ci%2Ci%2Ch*12h5%2Ckb%2C5b8%22%2C%22assets%2Fjs%2F%22%3A%7B%22just-the-docs.js%22%3A%223am%2Ce%2Ce*12pc%2C_%2C8oj*20%22%2C%22boomerang-1.737.60.cutting-edge.min.js%22%3A%222au%2Cb%2Ca*1pu3%2C_%2C1m19*21*42%22%2C%22vendor%2Flunr.min.js%22%3A%223am%2Cd%2C8*16t2%2C_%2Cfym*20%22%7D%7D%7D&u=https%3A%2F%2Fwww.basicrum.com%2Fpublications%2F&r=https%3A%2F%2Fwww.basicrum.com%2Fcost-analyses%2F&v=1.737.60&sv=14&sm=p&rt.si=dd0c542f-7adf-4310-830a-6c0a3d157c90-s8cjr1&rt.ss=1707075325294&rt.sl=4&vis.st=visible&ua.plt=Linux%20x86_64&ua.vnd=&pid=8fftz949&n=1&c.t.fps=07*4*65*j*61&c.t.busy=2*4*0034&c.tti.vr=408&c.tti=408&c.b=2&c.f=60&c.f.d=2511&c.f.m=1&c.f.s=ls7xfl1h&dom.res=5&dom.doms=1&mem.lsln=0&mem.ssln=0&mem.lssz=2&mem.sssz=2&scr.xy=1920x1200&scr.bpp=24%2F24&scr.orn=0%2Flandscape-primary&cpu.cnc=16&dom.ln=114&dom.sz=10438&dom.ck=157&dom.img=0&dom.script=6&dom.script.ext=3&dom.iframe=0&dom.link=4&dom.link.css=1&sb=1" > $FORM_DATA $CLICKHOUSE_CLIENT -q "SELECT * FROM file('$FORM_DATA', Form) FORMAT Vertical" -rm $FORM_DATA \ No newline at end of file +rm $FORM_DATA diff --git a/tests/queries/0_stateless/02993_lazy_index_loading.reference b/tests/queries/0_stateless/02993_lazy_index_loading.reference index 5bc329ae4eb..08f07a92815 100644 --- a/tests/queries/0_stateless/02993_lazy_index_loading.reference +++ b/tests/queries/0_stateless/02993_lazy_index_loading.reference @@ -1,4 +1,4 @@ -100000000 140000000 +100000000 100000000 0 0 1 100000000 100000000 diff --git a/tests/queries/0_stateless/03003_functions_to_subcolumns_final.reference b/tests/queries/0_stateless/03003_functions_to_subcolumns_final.reference new file mode 100644 index 00000000000..3051c199363 --- /dev/null +++ b/tests/queries/0_stateless/03003_functions_to_subcolumns_final.reference @@ -0,0 +1,25 @@ +3 +2 +SELECT __table1.`arr.size0` AS `length(arr)` +FROM default.t_length_1 AS __table1 +WHERE __table1.`arr.size0` IN ( + SELECT __table1.arr_length AS arr_length + FROM default.t_length_2 AS __table1 +) +2 +SELECT __table1.`arr.size0` AS `length(arr)` +FROM default.t_length_1 AS __table1 +WHERE __table1.`arr.size0` IN ( + SELECT __table1.arr_length AS arr_length + FROM default.t_length_2 AS __table1 + FINAL +) +2 +SELECT length(__table1.arr) AS `length(arr)` +FROM default.t_length_1 AS __table1 +FINAL +WHERE length(__table1.arr) IN ( + SELECT __table1.arr_length AS arr_length + FROM default.t_length_2 AS __table1 + FINAL +) diff --git a/tests/queries/0_stateless/03003_functions_to_subcolumns_final.sql b/tests/queries/0_stateless/03003_functions_to_subcolumns_final.sql new file mode 100644 index 00000000000..3fe29139c5f --- /dev/null +++ b/tests/queries/0_stateless/03003_functions_to_subcolumns_final.sql @@ -0,0 +1,24 @@ +DROP TABLE IF EXISTS t_length_1; +DROP TABLE IF EXISTS t_length_2; + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; +SET optimize_on_insert = 0; + +CREATE TABLE t_length_1 (id UInt64, arr Array(UInt64)) ENGINE = ReplacingMergeTree ORDER BY id; +CREATE TABLE t_length_2 (id UInt64, arr_length UInt64) ENGINE = ReplacingMergeTree ORDER BY id; + +INSERT INTO t_length_1 VALUES (1, [1, 2, 3]), (2, [4, 5]); +INSERT INTO t_length_2 VALUES (1, 3), (1, 2), (2, 2); + +SELECT length(arr) FROM t_length_1 WHERE length(arr) in (SELECT arr_length FROM t_length_2); +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr) FROM t_length_1 WHERE length(arr) in (SELECT arr_length FROM t_length_2); + +SELECT length(arr) FROM t_length_1 WHERE length(arr) in (SELECT arr_length FROM t_length_2 FINAL); +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr) FROM t_length_1 WHERE length(arr) in (SELECT arr_length FROM t_length_2 FINAL); + +SELECT length(arr) FROM t_length_1 FINAL WHERE length(arr) in (SELECT arr_length FROM t_length_2 FINAL); +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr) FROM t_length_1 FINAL WHERE length(arr) in (SELECT arr_length FROM t_length_2 FINAL); + +DROP TABLE t_length_1; +DROP TABLE t_length_2; diff --git a/tests/queries/0_stateless/03008_local_plain_rewritable.sh b/tests/queries/0_stateless/03008_local_plain_rewritable.sh index 5fac964a219..d51e180efc9 100755 --- a/tests/queries/0_stateless/03008_local_plain_rewritable.sh +++ b/tests/queries/0_stateless/03008_local_plain_rewritable.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-random-settings, no-s3-storage, no-replicated-database, no-shared-merge-tree +# Tags: no-random-settings, no-object-storage, no-replicated-database, no-shared-merge-tree # Tag no-random-settings: enable after root causing flakiness CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/03030_system_flush_distributed_settings.reference b/tests/queries/0_stateless/03030_system_flush_distributed_settings.reference index 5caff40c4a0..3a05c8b3ee8 100644 --- a/tests/queries/0_stateless/03030_system_flush_distributed_settings.reference +++ b/tests/queries/0_stateless/03030_system_flush_distributed_settings.reference @@ -1 +1 @@ -10000 +30000 diff --git a/tests/queries/0_stateless/03030_system_flush_distributed_settings.sql b/tests/queries/0_stateless/03030_system_flush_distributed_settings.sql index ac64135b593..fac673a4fe4 100644 --- a/tests/queries/0_stateless/03030_system_flush_distributed_settings.sql +++ b/tests/queries/0_stateless/03030_system_flush_distributed_settings.sql @@ -6,15 +6,17 @@ drop table if exists dist_out; create table ephemeral (key Int, value Int) engine=Null(); create table dist_in as ephemeral engine=Distributed(test_shard_localhost, currentDatabase(), ephemeral, key) settings background_insert_batch=1; -create table data (key Int, uniq_values Int) engine=Memory(); -create materialized view mv to data as select key, uniqExact(value) uniq_values from ephemeral group by key; +create table data (key Int, uniq_values Int) engine=TinyLog(); +create materialized view mv to data as select key, uniqExact(value::String) uniq_values from ephemeral group by key; system stop distributed sends dist_in; create table dist_out as data engine=Distributed(test_shard_localhost, currentDatabase(), data); set prefer_localhost_replica=0; SET optimize_trivial_insert_select = 1; -insert into dist_in select number/100, number from system.numbers limit 1e6 settings max_memory_usage='20Mi'; +-- due to pushing to MV with aggregation the query needs ~300MiB +-- but it will be done in background via "system flush distributed" +insert into dist_in select number/100, number from system.numbers limit 3e6 settings max_block_size=3e6, max_memory_usage='100Mi'; system flush distributed dist_in; -- { serverError MEMORY_LIMIT_EXCEEDED } system flush distributed dist_in settings max_memory_usage=0; select count() from dist_out; diff --git a/tests/queries/0_stateless/03031_table_function_fuzzquery.reference b/tests/queries/0_stateless/03031_table_function_fuzzquery.reference new file mode 100644 index 00000000000..202e4557a33 --- /dev/null +++ b/tests/queries/0_stateless/03031_table_function_fuzzquery.reference @@ -0,0 +1,2 @@ +query +String diff --git a/tests/queries/0_stateless/03031_table_function_fuzzquery.sql b/tests/queries/0_stateless/03031_table_function_fuzzquery.sql new file mode 100644 index 00000000000..b26096f7f0e --- /dev/null +++ b/tests/queries/0_stateless/03031_table_function_fuzzquery.sql @@ -0,0 +1,18 @@ + +SELECT * FROM fuzzQuery('SELECT 1', 500, 8956) LIMIT 0 FORMAT TSVWithNamesAndTypes; + +SELECT * FROM fuzzQuery('SELECT * +FROM ( + SELECT + ([toString(number % 2)] :: Array(LowCardinality(String))) AS item_id, + count() + FROM numbers(3) + GROUP BY item_id WITH TOTALS +) AS l FULL JOIN ( + SELECT + ([toString((number % 2) * 2)] :: Array(String)) AS item_id + FROM numbers(3) +) AS r +ON l.item_id = r.item_id +ORDER BY 1,2,3; +', 500, 8956) LIMIT 10 FORMAT NULL; diff --git a/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh b/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh index 526c4f84030..cba5317fcfa 100755 --- a/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh +++ b/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: no-fasttest, no-parallel, no-object-storage, no-random-settings CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -18,7 +18,7 @@ $CLICKHOUSE_CLIENT --query "SELECT * FROM test FORMAT Null" prev_max_size=$($CLICKHOUSE_CLIENT --query "SELECT max_size FROM system.filesystem_cache_settings WHERE cache_name = '$disk_name'") $CLICKHOUSE_CLIENT --query "SELECT current_size > 0 FROM system.filesystem_cache_settings WHERE cache_name = '$disk_name' FORMAT TabSeparated" -config_path=/etc/clickhouse-server/config.d/storage_conf.xml +config_path=${CLICKHOUSE_CONFIG_DIR}/config.d/storage_conf.xml new_max_size=$($CLICKHOUSE_CLIENT --query "SELECT divide(max_size, 2) FROM system.filesystem_cache_settings WHERE cache_name = '$disk_name'") sed -i "s|$prev_max_size<\/max_size>|$new_max_size<\/max_size>|" $config_path diff --git a/tests/queries/0_stateless/03036_reading_s3_archives.reference b/tests/queries/0_stateless/03036_reading_s3_archives.reference index 36ced212a1b..eacf16d0295 100644 --- a/tests/queries/0_stateless/03036_reading_s3_archives.reference +++ b/tests/queries/0_stateless/03036_reading_s3_archives.reference @@ -1,52 +1,52 @@ -1 Str1 example1.csv test/03036_archive1.zip::example1.csv -2 Str2 example1.csv test/03036_archive1.zip::example1.csv -3 Str3 example2.csv test/03036_archive2.zip::example2.csv -4 Str4 example2.csv test/03036_archive2.zip::example2.csv -5 Str5 example3.csv test/03036_archive2.zip::example3.csv -6 Str6 example3.csv test/03036_archive2.zip::example3.csv -3 Str3 example2.csv test/03036_archive1.zip::example2.csv -3 Str3 example2.csv test/03036_archive2.zip::example2.csv -4 Str4 example2.csv test/03036_archive1.zip::example2.csv -4 Str4 example2.csv test/03036_archive2.zip::example2.csv -1 Str1 example1.csv test/03036_archive1.zip::example1.csv -2 Str2 example1.csv test/03036_archive1.zip::example1.csv -3 Str3 example2.csv test/03036_archive1.zip::example2.csv -3 Str3 example2.csv test/03036_archive2.zip::example2.csv -4 Str4 example2.csv test/03036_archive1.zip::example2.csv -4 Str4 example2.csv test/03036_archive2.zip::example2.csv -5 Str5 example3.csv test/03036_archive2.zip::example3.csv -6 Str6 example3.csv test/03036_archive2.zip::example3.csv -1 Str1 example1.csv test/03036_archive1.tar::example1.csv -2 Str2 example1.csv test/03036_archive1.tar::example1.csv -7 Str7 example4.csv test/03036_archive1.tar::example4.csv -7 Str7 example4.csv test/03036_archive2.tar::example4.csv -8 Str8 example4.csv test/03036_archive1.tar::example4.csv -8 Str8 example4.csv test/03036_archive2.tar::example4.csv -5 Str5 example3.csv test/03036_archive2.tar::example3.csv -6 Str6 example3.csv test/03036_archive2.tar::example3.csv -7 Str7 example4.csv test/03036_archive2.tar::example4.csv -8 Str8 example4.csv test/03036_archive2.tar::example4.csv -9 Str9 example5.csv test/03036_archive2.tar::example5.csv -10 Str10 example5.csv test/03036_archive2.tar::example5.csv -3 Str3 example2.csv test/03036_archive3.tar.gz::example2.csv -4 Str4 example2.csv test/03036_archive3.tar.gz::example2.csv -11 Str11 example6.csv test/03036_archive3.tar.gz::example6.csv -12 Str12 example6.csv test/03036_archive3.tar.gz::example6.csv -3 Str3 example2.csv test/03036_archive3.tar.gz::example2.csv -4 Str4 example2.csv test/03036_archive3.tar.gz::example2.csv -5 Str5 example3.csv test/03036_archive2.tar::example3.csv -6 Str6 example3.csv test/03036_archive2.tar::example3.csv -3 Str3 example2.csv test/03036_archive2.zip::example2.csv -4 Str4 example2.csv test/03036_archive2.zip::example2.csv -5 Str5 example3.csv test/03036_archive2.tar::example3.csv -6 Str6 example3.csv test/03036_archive2.tar::example3.csv -7 Str7 example4.csv test/03036_archive2.tar::example4.csv -8 Str8 example4.csv test/03036_archive2.tar::example4.csv -9 Str9 example5.csv test/03036_archive2.tar::example5.csv -10 Str10 example5.csv test/03036_archive2.tar::example5.csv -3 Str3 example2.csv test/03036_archive3.tar.gz::example2.csv -4 Str4 example2.csv test/03036_archive3.tar.gz::example2.csv -5 Str5 example3.csv test/03036_archive2.tar::example3.csv -6 Str6 example3.csv test/03036_archive2.tar::example3.csv -13 Str13 example7.csv test/03036_compressed_file_archive.zip::example7.csv -14 Str14 example7.csv test/03036_compressed_file_archive.zip::example7.csv +1 Str1 25 example1.csv test/03036_archive1.zip::example1.csv +2 Str2 25 example1.csv test/03036_archive1.zip::example1.csv +3 Str3 25 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive2.zip::example2.csv +5 Str5 25 example3.csv test/03036_archive2.zip::example3.csv +6 Str6 25 example3.csv test/03036_archive2.zip::example3.csv +3 Str3 25 example2.csv test/03036_archive1.zip::example2.csv +3 Str3 25 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive1.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive2.zip::example2.csv +1 Str1 25 example1.csv test/03036_archive1.zip::example1.csv +2 Str2 25 example1.csv test/03036_archive1.zip::example1.csv +3 Str3 25 example2.csv test/03036_archive1.zip::example2.csv +3 Str3 25 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive1.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive2.zip::example2.csv +5 Str5 25 example3.csv test/03036_archive2.zip::example3.csv +6 Str6 25 example3.csv test/03036_archive2.zip::example3.csv +1 Str1 25 example1.csv test/03036_archive1.tar::example1.csv +2 Str2 25 example1.csv test/03036_archive1.tar::example1.csv +7 Str7 25 example4.csv test/03036_archive1.tar::example4.csv +7 Str7 25 example4.csv test/03036_archive2.tar::example4.csv +8 Str8 25 example4.csv test/03036_archive1.tar::example4.csv +8 Str8 25 example4.csv test/03036_archive2.tar::example4.csv +5 Str5 25 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 25 example3.csv test/03036_archive2.tar::example3.csv +7 Str7 25 example4.csv test/03036_archive2.tar::example4.csv +8 Str8 25 example4.csv test/03036_archive2.tar::example4.csv +9 Str9 27 example5.csv test/03036_archive2.tar::example5.csv +10 Str10 27 example5.csv test/03036_archive2.tar::example5.csv +3 Str3 25 example2.csv test/03036_archive3.tar.gz::example2.csv +4 Str4 25 example2.csv test/03036_archive3.tar.gz::example2.csv +11 Str11 29 example6.csv test/03036_archive3.tar.gz::example6.csv +12 Str12 29 example6.csv test/03036_archive3.tar.gz::example6.csv +3 Str3 25 example2.csv test/03036_archive3.tar.gz::example2.csv +4 Str4 25 example2.csv test/03036_archive3.tar.gz::example2.csv +5 Str5 25 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 25 example3.csv test/03036_archive2.tar::example3.csv +3 Str3 25 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive2.zip::example2.csv +5 Str5 25 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 25 example3.csv test/03036_archive2.tar::example3.csv +7 Str7 25 example4.csv test/03036_archive2.tar::example4.csv +8 Str8 25 example4.csv test/03036_archive2.tar::example4.csv +9 Str9 27 example5.csv test/03036_archive2.tar::example5.csv +10 Str10 27 example5.csv test/03036_archive2.tar::example5.csv +3 Str3 25 example2.csv test/03036_archive3.tar.gz::example2.csv +4 Str4 25 example2.csv test/03036_archive3.tar.gz::example2.csv +5 Str5 25 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 25 example3.csv test/03036_archive2.tar::example3.csv +13 Str13 57 example7.csv test/03036_compressed_file_archive.zip::example7.csv +14 Str14 57 example7.csv test/03036_compressed_file_archive.zip::example7.csv diff --git a/tests/queries/0_stateless/03036_reading_s3_archives.sql b/tests/queries/0_stateless/03036_reading_s3_archives.sql index 00d7cc25e1a..43bda4ee704 100644 --- a/tests/queries/0_stateless/03036_reading_s3_archives.sql +++ b/tests/queries/0_stateless/03036_reading_s3_archives.sql @@ -1,22 +1,22 @@ -- Tags: no-fasttest -- Tag no-fasttest: Depends on AWS -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive1.zip :: example1.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive2.zip :: example*.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.zip :: example2.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.zip :: example*') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive1.tar :: example1.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar :: example4.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive2.tar :: example*.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar.gz :: example*.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar* :: example{2..3}.csv') ORDER BY (id, _file, _path); -select id, data, _file, _path from s3(s3_conn, filename='03036_archive2.zip :: nonexistent.csv'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } -select id, data, _file, _path from s3(s3_conn, filename='03036_archive2.zip :: nonexistent{2..3}.csv'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive1.zip :: example1.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive2.zip :: example*.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive*.zip :: example2.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive*.zip :: example*') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive1.tar :: example1.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar :: example4.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive2.tar :: example*.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar.gz :: example*.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar* :: example{2..3}.csv') ORDER BY (id, _file, _path); +select id, data, _size, _file, _path from s3(s3_conn, filename='03036_archive2.zip :: nonexistent.csv'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } +select id, data, _size, _file, _path from s3(s3_conn, filename='03036_archive2.zip :: nonexistent{2..3}.csv'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } CREATE TABLE table_zip22 Engine S3(s3_conn, filename='03036_archive2.zip :: example2.csv'); -select id, data, _file, _path from table_zip22 ORDER BY (id, _file, _path); +select id, data, _size, _file, _path from table_zip22 ORDER BY (id, _file, _path); CREATE table table_tar2star Engine S3(s3_conn, filename='03036_archive2.tar :: example*.csv'); -SELECT id, data, _file, _path FROM table_tar2star ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM table_tar2star ORDER BY (id, _file, _path); CREATE table table_tarstarglobs Engine S3(s3_conn, filename='03036_archive*.tar* :: example{2..3}.csv'); -SELECT id, data, _file, _path FROM table_tarstarglobs ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM table_tarstarglobs ORDER BY (id, _file, _path); CREATE table table_noexist Engine s3(s3_conn, filename='03036_archive2.zip :: nonexistent.csv'); -- { serverError UNKNOWN_STORAGE } -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_compressed_file_archive.zip :: example7.csv', format='CSV', structure='auto', compression_method='gz') ORDER BY (id, _file, _path) +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_compressed_file_archive.zip :: example7.csv', format='CSV', structure='auto', compression_method='gz') ORDER BY (id, _file, _path) diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges.sh b/tests/queries/0_stateless/03038_nested_dynamic_merges.sh index b82ddb3813e..5d8eac082cf 100755 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges.sh +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --enable_named_columns_in_function_tuple=0" function test() diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference index ca98ec0963c..a9c785d1e48 100644 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference @@ -2,525 +2,525 @@ Memory initial insert alter add column 1 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 alter modify column 1 7 None 8 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 insert after alter modify column 1 8 None 11 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 alter modify column 2 4 UInt64 7 String 8 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 insert after alter modify column 2 1 Date 5 UInt64 8 String 9 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N -19 19 \N \N \N \N \N -20 20 20 \N 20 \N \N -21 21 str_21 str_21 \N \N \N -22 22 1970-01-23 \N \N 1970-01-23 \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +19 19 \N \N \N \N 0 +20 20 20 \N 20 \N 0 +21 21 str_21 str_21 \N \N 0 +22 22 1970-01-23 \N \N 1970-01-23 0 alter modify column 3 1 Date 5 UInt64 8 String 9 None -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N \N 3 \N \N -4 4 4 \N \N \N 4 \N \N -5 5 5 \N \N \N 5 \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N \N 12 \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 \N 3 \N 0 +4 4 4 \N 0 \N 4 \N 0 +5 5 5 \N 0 \N 5 \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 \N 12 \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 insert after alter modify column 3 1 Date 5 UInt64 8 String 12 None -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N \N 3 \N \N -4 4 4 \N \N \N 4 \N \N -5 5 5 \N \N \N 5 \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N \N 12 \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N -23 \N \N \N \N \N \N \N \N -24 24 24 \N \N \N \N \N \N -25 str_25 \N str_25 \N \N \N \N \N +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 \N 3 \N 0 +4 4 4 \N 0 \N 4 \N 0 +5 5 5 \N 0 \N 5 \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 \N 12 \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 +23 \N \N \N 0 \N \N \N 0 +24 24 24 \N 0 \N \N \N 0 +25 str_25 \N str_25 0 \N \N \N 0 MergeTree compact initial insert alter add column 1 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 alter modify column 1 7 None 8 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 insert after alter modify column 1 8 None 11 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 alter modify column 2 8 None 11 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 insert after alter modify column 2 1 Date 1 UInt64 9 None 12 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N -19 19 \N \N \N \N \N -20 20 20 \N 20 \N \N -21 21 str_21 str_21 \N \N \N -22 22 1970-01-23 \N \N 1970-01-23 \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +19 19 \N \N \N \N 0 +20 20 20 \N 20 \N 0 +21 21 str_21 str_21 \N \N 0 +22 22 1970-01-23 \N \N 1970-01-23 0 alter modify column 3 1 Date 1 UInt64 9 None 12 String -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N 3 \N \N \N -4 4 4 \N \N 4 \N \N \N -5 5 5 \N \N 5 \N \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N 12 \N \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 3 \N \N 0 +4 4 4 \N 0 4 \N \N 0 +5 5 5 \N 0 5 \N \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 12 \N \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 insert after alter modify column 3 1 Date 1 UInt64 12 None 12 String -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N 3 \N \N \N -4 4 4 \N \N 4 \N \N \N -5 5 5 \N \N 5 \N \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N 12 \N \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N -23 \N \N \N \N \N \N \N \N -24 24 24 \N \N \N \N \N \N -25 str_25 \N str_25 \N \N \N \N \N +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 3 \N \N 0 +4 4 4 \N 0 4 \N \N 0 +5 5 5 \N 0 5 \N \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 12 \N \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 +23 \N \N \N 0 \N \N \N 0 +24 24 24 \N 0 \N \N \N 0 +25 str_25 \N str_25 0 \N \N \N 0 MergeTree wide initial insert alter add column 1 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 alter modify column 1 7 None 8 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 insert after alter modify column 1 8 None 11 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 alter modify column 2 8 None 11 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 insert after alter modify column 2 1 Date 1 UInt64 9 None 12 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N -19 19 \N \N \N \N \N -20 20 20 \N 20 \N \N -21 21 str_21 str_21 \N \N \N -22 22 1970-01-23 \N \N 1970-01-23 \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +19 19 \N \N \N \N 0 +20 20 20 \N 20 \N 0 +21 21 str_21 str_21 \N \N 0 +22 22 1970-01-23 \N \N 1970-01-23 0 alter modify column 3 1 Date 1 UInt64 9 None 12 String -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N 3 \N \N \N -4 4 4 \N \N 4 \N \N \N -5 5 5 \N \N 5 \N \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N 12 \N \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 3 \N \N 0 +4 4 4 \N 0 4 \N \N 0 +5 5 5 \N 0 5 \N \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 12 \N \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 insert after alter modify column 3 1 Date 1 UInt64 12 None 12 String -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N 3 \N \N \N -4 4 4 \N \N 4 \N \N \N -5 5 5 \N \N 5 \N \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N 12 \N \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N -23 \N \N \N \N \N \N \N \N -24 24 24 \N \N \N \N \N \N -25 str_25 \N str_25 \N \N \N \N \N +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 3 \N \N 0 +4 4 4 \N 0 4 \N \N 0 +5 5 5 \N 0 5 \N \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 12 \N \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 +23 \N \N \N 0 \N \N \N 0 +24 24 24 \N 0 \N \N \N 0 +25 str_25 \N str_25 0 \N \N \N 0 diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference index 18a181464e9..f7c00bd8c44 100644 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference @@ -2,181 +2,181 @@ MergeTree compact initial insert alter add column 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 alter rename column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 insert nested dynamic 3 Array(Dynamic) 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N [] [] [] -1 1 \N \N \N \N \N [] [] [] -2 2 \N \N \N \N \N [] [] [] -3 3 3 \N 3 \N \N [] [] [] -4 4 4 \N 4 \N \N [] [] [] -5 5 5 \N 5 \N \N [] [] [] -6 6 str_6 str_6 \N \N \N [] [] [] -7 7 str_7 str_7 \N \N \N [] [] [] -8 8 str_8 str_8 \N \N \N [] [] [] -9 9 \N \N \N \N \N [] [] [] -10 10 \N \N \N \N \N [] [] [] -11 11 \N \N \N \N \N [] [] [] -12 12 12 \N 12 \N \N [] [] [] -13 13 str_13 str_13 \N \N \N [] [] [] -14 14 \N \N \N \N \N [] [] [] -15 15 [15] \N \N \N \N [15] [NULL] [NULL] -16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] -17 17 [17] \N \N \N \N [17] [NULL] [NULL] +0 0 \N \N \N \N 0 [] [] [] +1 1 \N \N \N \N 0 [] [] [] +2 2 \N \N \N \N 0 [] [] [] +3 3 3 \N 3 \N 0 [] [] [] +4 4 4 \N 4 \N 0 [] [] [] +5 5 5 \N 5 \N 0 [] [] [] +6 6 str_6 str_6 \N \N 0 [] [] [] +7 7 str_7 str_7 \N \N 0 [] [] [] +8 8 str_8 str_8 \N \N 0 [] [] [] +9 9 \N \N \N \N 0 [] [] [] +10 10 \N \N \N \N 0 [] [] [] +11 11 \N \N \N \N 0 [] [] [] +12 12 12 \N 12 \N 0 [] [] [] +13 13 str_13 str_13 \N \N 0 [] [] [] +14 14 \N \N \N \N 0 [] [] [] +15 15 [15] \N \N \N 0 [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N 0 [17] [NULL] [NULL] alter rename column 2 3 Array(Dynamic) 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N [] [] [] -1 1 \N \N \N \N \N [] [] [] -2 2 \N \N \N \N \N [] [] [] -3 3 3 \N 3 \N \N [] [] [] -4 4 4 \N 4 \N \N [] [] [] -5 5 5 \N 5 \N \N [] [] [] -6 6 str_6 str_6 \N \N \N [] [] [] -7 7 str_7 str_7 \N \N \N [] [] [] -8 8 str_8 str_8 \N \N \N [] [] [] -9 9 \N \N \N \N \N [] [] [] -10 10 \N \N \N \N \N [] [] [] -11 11 \N \N \N \N \N [] [] [] -12 12 12 \N 12 \N \N [] [] [] -13 13 str_13 str_13 \N \N \N [] [] [] -14 14 \N \N \N \N \N [] [] [] -15 15 [15] \N \N \N \N [15] [NULL] [NULL] -16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] -17 17 [17] \N \N \N \N [17] [NULL] [NULL] +0 0 \N \N \N \N 0 [] [] [] +1 1 \N \N \N \N 0 [] [] [] +2 2 \N \N \N \N 0 [] [] [] +3 3 3 \N 3 \N 0 [] [] [] +4 4 4 \N 4 \N 0 [] [] [] +5 5 5 \N 5 \N 0 [] [] [] +6 6 str_6 str_6 \N \N 0 [] [] [] +7 7 str_7 str_7 \N \N 0 [] [] [] +8 8 str_8 str_8 \N \N 0 [] [] [] +9 9 \N \N \N \N 0 [] [] [] +10 10 \N \N \N \N 0 [] [] [] +11 11 \N \N \N \N 0 [] [] [] +12 12 12 \N 12 \N 0 [] [] [] +13 13 str_13 str_13 \N \N 0 [] [] [] +14 14 \N \N \N \N 0 [] [] [] +15 15 [15] \N \N \N 0 [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N 0 [17] [NULL] [NULL] MergeTree wide initial insert alter add column 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 alter rename column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 insert nested dynamic 3 Array(Dynamic) 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N [] [] [] -1 1 \N \N \N \N \N [] [] [] -2 2 \N \N \N \N \N [] [] [] -3 3 3 \N 3 \N \N [] [] [] -4 4 4 \N 4 \N \N [] [] [] -5 5 5 \N 5 \N \N [] [] [] -6 6 str_6 str_6 \N \N \N [] [] [] -7 7 str_7 str_7 \N \N \N [] [] [] -8 8 str_8 str_8 \N \N \N [] [] [] -9 9 \N \N \N \N \N [] [] [] -10 10 \N \N \N \N \N [] [] [] -11 11 \N \N \N \N \N [] [] [] -12 12 12 \N 12 \N \N [] [] [] -13 13 str_13 str_13 \N \N \N [] [] [] -14 14 \N \N \N \N \N [] [] [] -15 15 [15] \N \N \N \N [15] [NULL] [NULL] -16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] -17 17 [17] \N \N \N \N [17] [NULL] [NULL] +0 0 \N \N \N \N 0 [] [] [] +1 1 \N \N \N \N 0 [] [] [] +2 2 \N \N \N \N 0 [] [] [] +3 3 3 \N 3 \N 0 [] [] [] +4 4 4 \N 4 \N 0 [] [] [] +5 5 5 \N 5 \N 0 [] [] [] +6 6 str_6 str_6 \N \N 0 [] [] [] +7 7 str_7 str_7 \N \N 0 [] [] [] +8 8 str_8 str_8 \N \N 0 [] [] [] +9 9 \N \N \N \N 0 [] [] [] +10 10 \N \N \N \N 0 [] [] [] +11 11 \N \N \N \N 0 [] [] [] +12 12 12 \N 12 \N 0 [] [] [] +13 13 str_13 str_13 \N \N 0 [] [] [] +14 14 \N \N \N \N 0 [] [] [] +15 15 [15] \N \N \N 0 [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N 0 [17] [NULL] [NULL] alter rename column 2 3 Array(Dynamic) 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N [] [] [] -1 1 \N \N \N \N \N [] [] [] -2 2 \N \N \N \N \N [] [] [] -3 3 3 \N 3 \N \N [] [] [] -4 4 4 \N 4 \N \N [] [] [] -5 5 5 \N 5 \N \N [] [] [] -6 6 str_6 str_6 \N \N \N [] [] [] -7 7 str_7 str_7 \N \N \N [] [] [] -8 8 str_8 str_8 \N \N \N [] [] [] -9 9 \N \N \N \N \N [] [] [] -10 10 \N \N \N \N \N [] [] [] -11 11 \N \N \N \N \N [] [] [] -12 12 12 \N 12 \N \N [] [] [] -13 13 str_13 str_13 \N \N \N [] [] [] -14 14 \N \N \N \N \N [] [] [] -15 15 [15] \N \N \N \N [15] [NULL] [NULL] -16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] -17 17 [17] \N \N \N \N [17] [NULL] [NULL] +0 0 \N \N \N \N 0 [] [] [] +1 1 \N \N \N \N 0 [] [] [] +2 2 \N \N \N \N 0 [] [] [] +3 3 3 \N 3 \N 0 [] [] [] +4 4 4 \N 4 \N 0 [] [] [] +5 5 5 \N 5 \N 0 [] [] [] +6 6 str_6 str_6 \N \N 0 [] [] [] +7 7 str_7 str_7 \N \N 0 [] [] [] +8 8 str_8 str_8 \N \N 0 [] [] [] +9 9 \N \N \N \N 0 [] [] [] +10 10 \N \N \N \N 0 [] [] [] +11 11 \N \N \N \N 0 [] [] [] +12 12 12 \N 12 \N 0 [] [] [] +13 13 str_13 str_13 \N \N 0 [] [] [] +14 14 \N \N \N \N 0 [] [] [] +15 15 [15] \N \N \N 0 [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N 0 [17] [NULL] [NULL] diff --git a/tests/queries/0_stateless/03041_dynamic_type_check_table.reference b/tests/queries/0_stateless/03041_dynamic_type_check_table.reference index b1ea186a917..0dab4ea0d20 100644 --- a/tests/queries/0_stateless/03041_dynamic_type_check_table.reference +++ b/tests/queries/0_stateless/03041_dynamic_type_check_table.reference @@ -2,55 +2,55 @@ MergeTree compact initial insert alter add column 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 check table 1 MergeTree wide initial insert alter add column 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 check table 1 diff --git a/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.reference b/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.reference deleted file mode 100644 index 72749c905a3..00000000000 --- a/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.reference +++ /dev/null @@ -1 +0,0 @@ -1 1 1 diff --git a/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.sql b/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.sql deleted file mode 100644 index de471c1a091..00000000000 --- a/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.sql +++ /dev/null @@ -1,17 +0,0 @@ --- https://github.com/ClickHouse/ClickHouse/issues/22627 -SET allow_experimental_analyzer=1; -WITH - x AS - ( - SELECT 1 AS a - ), - xx AS - ( - SELECT * - FROM x - , x AS x1 - , x AS x2 - ) -SELECT * -FROM xx -WHERE a = 1; diff --git a/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference b/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference index 3ac6127fb21..2d33f7f6683 100644 --- a/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference +++ b/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference @@ -1,8 +1,8 @@ -100000000 140000000 -100000000 140000000 -100000000 140000000 +100000000 100000000 +100000000 100000000 +100000000 100000000 0 0 -100000000 140000000 +100000000 100000000 0 0 0 0 1 diff --git a/tests/queries/0_stateless/03128_system_unload_primary_key.reference b/tests/queries/0_stateless/03128_system_unload_primary_key.reference index c7b40ae5b06..2646dc7247f 100644 --- a/tests/queries/0_stateless/03128_system_unload_primary_key.reference +++ b/tests/queries/0_stateless/03128_system_unload_primary_key.reference @@ -1,4 +1,4 @@ -100000000 140000000 -100000000 140000000 +100000000 100000000 +100000000 100000000 0 0 0 0 diff --git a/tests/queries/0_stateless/03144_parallel_alter_add_drop_column_zookeeper_on_steroids.sh b/tests/queries/0_stateless/03144_parallel_alter_add_drop_column_zookeeper_on_steroids.sh index ea7bb8f7ad0..c27dfffcfc2 100755 --- a/tests/queries/0_stateless/03144_parallel_alter_add_drop_column_zookeeper_on_steroids.sh +++ b/tests/queries/0_stateless/03144_parallel_alter_add_drop_column_zookeeper_on_steroids.sh @@ -85,7 +85,7 @@ export -f optimize_thread; export -f insert_thread; -TIMEOUT=30 +TIMEOUT=20 # Sometimes we detach and attach tables timeout $TIMEOUT bash -c alter_thread 2> /dev/null & diff --git a/tests/queries/0_stateless/03153_format_regexp_usability.sh b/tests/queries/0_stateless/03153_format_regexp_usability.sh index 03bed10dd17..561de3be893 100755 --- a/tests/queries/0_stateless/03153_format_regexp_usability.sh +++ b/tests/queries/0_stateless/03153_format_regexp_usability.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-ordinary-database, long +# Tags: no-fasttest, no-ordinary-database, long CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference index e69de29bb2d..c5a6cbab0bc 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference @@ -0,0 +1,2 @@ +1231 John 33 +8888 Alice 50 diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql index cd29fae8fd7..b189388e356 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql @@ -5,7 +5,8 @@ CREATE TABLE users ( uid Int16, name String, age Int16, - projection p1 (select count(), age group by age) + projection p1 (select count(), age group by age), + projection p2 (select age, name group by age, name) ) ENGINE = MergeTree order by uid; INSERT INTO users VALUES (1231, 'John', 33); @@ -13,3 +14,18 @@ INSERT INTO users VALUES (6666, 'Ksenia', 48); INSERT INTO users VALUES (8888, 'Alice', 50); DELETE FROM users WHERE 1; -- { serverError NOT_IMPLEMENTED } + +DELETE FROM users WHERE uid = 8888 SETTINGS lightweight_mutation_projection_mode = 'throw'; -- { serverError NOT_IMPLEMENTED } + +DELETE FROM users WHERE uid = 6666 SETTINGS lightweight_mutation_projection_mode = 'drop'; + +-- expecting no projection +SELECT + name, + `table` +FROM system.projection_parts +WHERE (database = currentDatabase()) AND (`table` = 'users'); + +SELECT * FROM users ORDER BY uid; + +DROP TABLE users; diff --git a/tests/queries/0_stateless/03164_materialize_statistics.sql b/tests/queries/0_stateless/03164_materialize_statistics.sql deleted file mode 100644 index 43c5724dd59..00000000000 --- a/tests/queries/0_stateless/03164_materialize_statistics.sql +++ /dev/null @@ -1,49 +0,0 @@ -DROP TABLE IF EXISTS t_statistics_materialize; - -SET allow_experimental_analyzer = 1; -SET allow_experimental_statistics = 1; -SET allow_statistics_optimize = 1; -SET materialize_statistics_on_insert = 0; - -CREATE TABLE t_statistics_materialize -( - a Int64 STATISTICS(tdigest), - b Int16 STATISTICS(tdigest), -) ENGINE = MergeTree() ORDER BY tuple() -SETTINGS min_bytes_for_wide_part = 0, enable_vertical_merge_algorithm = 0; -- TODO: there is a bug in vertical merge with statistics. - -INSERT INTO t_statistics_materialize SELECT number, -number FROM system.numbers LIMIT 10000; - -SELECT count(*) FROM t_statistics_materialize WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics not used'; - -OPTIMIZE TABLE t_statistics_materialize FINAL; - -SELECT count(*) FROM t_statistics_materialize WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after merge'; - -TRUNCATE TABLE t_statistics_materialize; -SET mutations_sync = 2; - -INSERT INTO t_statistics_materialize SELECT number, -number FROM system.numbers LIMIT 10000; -ALTER TABLE t_statistics_materialize MATERIALIZE STATISTICS a, b; - -SELECT count(*) FROM t_statistics_materialize WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after materialize'; - -DROP TABLE t_statistics_materialize; - -SYSTEM FLUSH LOGS; - -SELECT log_comment, message FROM system.text_log JOIN -( - SELECT Settings['log_comment'] AS log_comment, query_id FROM system.query_log - WHERE current_database = currentDatabase() - AND query LIKE 'SELECT count(*) FROM t_statistics_materialize%' - AND type = 'QueryFinish' -) AS query_log USING (query_id) -WHERE message LIKE '%moved to PREWHERE%' -ORDER BY event_time_microseconds; - -SELECT count(), sum(ProfileEvents['MergeTreeDataWriterStatisticsCalculationMicroseconds']) -FROM system.query_log -WHERE current_database = currentDatabase() - AND query LIKE 'INSERT INTO t_statistics_materialize SELECT%' - AND type = 'QueryFinish'; diff --git a/tests/queries/0_stateless/03165_round_scale_as_column.reference b/tests/queries/0_stateless/03165_round_scale_as_column.reference index 9ad25ed466a..e0c9b6959ee 100644 --- a/tests/queries/0_stateless/03165_round_scale_as_column.reference +++ b/tests/queries/0_stateless/03165_round_scale_as_column.reference @@ -2162,4 +2162,17 @@ CHECKPOINT2 10 1.6275 1.6275 1.6275 1.6275 1 1 +3 +3.1 +3.14 +3.142 +3.1416 +3.14159 +3.141593 +3.1415927 +3.14159265 +3.141592654 +42 +42.4 +42.42 1 diff --git a/tests/queries/0_stateless/03165_round_scale_as_column.sql b/tests/queries/0_stateless/03165_round_scale_as_column.sql index 229f705808d..adae36564b8 100644 --- a/tests/queries/0_stateless/03165_round_scale_as_column.sql +++ b/tests/queries/0_stateless/03165_round_scale_as_column.sql @@ -118,6 +118,7 @@ DROP TABLE tab; SELECT round(1, 1); SELECT round(materialize(1), materialize(1)); -SELECT round(1, materialize(1)); --{serverError ILLEGAL_COLUMN} +SELECT round(pi(), number) FROM numbers(10); +SELECT round(toDecimal32(42.42, 2), number) from numbers(3); SELECT round(materialize(1), 1); SELECT materialize(10.1) AS x, ceil(x, toUInt256(123)); --{serverError ILLEGAL_TYPE_OF_ARGUMENT} diff --git a/tests/queries/0_stateless/03167_base64_url_functions_sh.reference b/tests/queries/0_stateless/03167_base64_url_functions_sh.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03167_base64_url_functions_sh.sh b/tests/queries/0_stateless/03167_base64_url_functions_sh.sh new file mode 100755 index 00000000000..57060b8c525 --- /dev/null +++ b/tests/queries/0_stateless/03167_base64_url_functions_sh.sh @@ -0,0 +1,166 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# shellcheck disable=SC2155 + +set -e + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +urls=( + "http://www.example.com" + "https://secure.example.com" + "http://example.com" + "https://www.example.org" + "https://subdomain.example.com" + "http://sub.sub.example.com" + "http://192.168.1.1" + "https://[2001:db8:85a3:8d3:1319:8a2e:370:7348]" + "http://example.com:8080" + "https://example.com:443" + "http://example.com/path/to/page.html" + "https://example.com/path/with/trailing/slash/" + "http://example.com/search?q=query&lang=en" + "https://example.com/path?param1=value1¶m2=value2" + "http://example.com/page.html#section1" + "https://example.com/document.pdf#page=10" + "http://user:password@example.com" + "https://user@example.com" + "https://user:pass@sub.example.com:8080/path/page.html?query=123#fragment" + "http://example.com/path%20with%20spaces" + "https://example.com/search?q=encode+this" + "http://例子.测试" + "https://mañana.com" + "http://example.com/%E2%82%AC" + "data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==" + "file:///C:/path/to/file.txt" + "file:///home/user/document.pdf" + "ftp://ftp.example.com/pub/file.zip" + "ftps://secure-ftp.example.com/private/doc.pdf" + "mailto:user@example.com" + "mailto:user@example.com?subject=Hello&body=How%20are%20you" + "git://github.com/user/repo.git" + "ssh://user@host.xz:port/path/to/repo.git" + "https://example.com/path(1)/[2]/{3}" + "http://example.com/path;param?query,value" + "" + "http://" + "example.com" + "http:" + "//" + "?query=value" + "#fragment" + "http://?#" + "http://xn--bcher-kva.ch" + "https://xn--bcher-kva.xn--tckwe/xn--8ws00zhy3a/%E6%B8%AC%E8%A9%A6.php?xn--o39an51a5phao35a=xn--mgbh0fb&xn--fiq228c5hs=test" + "https://xn--3e0b707e.xn--79-8kcre8v3a/%ED%85%8C%EC%8A%A4%ED%8A%B8/%ED%8C%8C%EC%9D%BC.jsp?xn--i1b6b1a6a2e=xn--9t4b11yi5a&xn--3e0b707e=xn--80aaa1cbgbm" + "https://example.com/path?param=value&special=!@#$%^&*()" + + "http://example.com/path/with/~tilde" + "https://example.com/path/with/\`backtick\`" + + "https://example.com/path?param1=value1¶m2=value2¶m3=value3#section1#section2" + "http://example.com/page?q1=v1&q2=v2#frag1#frag2#frag3" + + "https://example.com/☃/snowman" + "http://example.com/path/⽇本語" + "https://example.com/ü/ñ/path?q=ç" + + "https://example.com/path/to/very/long/url/that/exceeds/two/hundred/and/fifty/five/characters/lorem/ipsum/dolor/sit/amet/consectetur/adipiscing/elit/sed/do/eiusmod/tempor/incididunt/ut/labore/et/dolore/magna/aliqua/ut/enim/ad/minim/veniam/quis/nostrud/exercitation/ullamco/laboris/nisi/ut/aliquip/ex/ea/commodo/consequat" + + "https://example.com//path///to//file" + "http://example.com/path?param1=value1&¶m2=value2&&¶m3=value3" + + "http://example.com/%70%61%74%68?%70%61%72%61%6d=%76%61%6c%75%65#%66%72%61%67%6d%65%6e%74" + + "HtTpS://ExAmPlE.cOm/PaTh" + "http://EXAMPLE.COM/PATH" + + "http://127.0.0.1:8080/path" + "https://[::1]/path" + "http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:8080/path" + + "http://example.com:65535/path" + "https://example.com:0/path" + + "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAACklEQVR4nGMAAQAABQABDQottAAAAABJRU5ErkJggg==" + + "https://user:password@example.com:8080/path?query=value#fragment" + "ftp://anonymous:password@ftp.example.com/pub/" + + "http://example.com/path%20with%20spaces" + "https://example.com/search?q=query%20with%20spaces" + + "https://www.mañana.com/path" + "http://例子.测试/path" + "https://рм.рф/path" + + "https://user:pass@sub.example.com:8080/p/a/t/h?query=123&key=value#fragid1" + + "jdbc:mysql://localhost:3306/database" + "market://details?id=com.example.app" + "tel:+1-816-555-1212" + "sms:+18165551212" + + "http://[1080:0:0:0:8:800:200C:417A]/index.html" + "https://[2001:db8::1428:57ab]:8080/path" + + "http://.." + "http://../" + "http://??" + "http://??/" + "http:///a" + "http://example.com??" + "http://example.com??/" + "foo://example.com:8042/over/there?name=ferret#nose" + "//example.com/path" +) + + +base64URLEncode() { + echo -n "$1" | base64 -w0 | tr '+/' '-_' | tr -d '=' +} + +base64URLDecode() { + local len=$((${#1} % 4)) + local result="$1" + if [ $len -eq 2 ]; then result="$1"'==' + elif [ $len -eq 3 ]; then result="$1"'=' + fi + echo "$result" | tr '_-' '/+' | base64 -w0 -d +} + +test() { + local input="$1" + local encode_ch=$(${CLICKHOUSE_CLIENT} --query="SELECT base64URLEncode('$input')") + local encode_gold=$(base64URLEncode $input) + + local decode_ch=$(${CLICKHOUSE_CLIENT} --query="SELECT base64URLDecode('$encode_gold')") + local decode_gold=$(base64URLDecode $encode_gold) + + if [ "$encode_ch" != "$encode_gold" ]; then + echo "Input: $input" + echo "Expected: $encode_gold" + echo "Got: $encode_ch" + fi + + if [ "$decode_ch" != "$input" ] || [ "$decode_ch" != "$decode_gold" ]; then + echo "Input: $input" + echo "Decode gold: $decode_gold" + echo "Got: $decode_ch" + fi +} + + +for url in "${urls[@]}"; do + test "$url" +done + +# special case for ' +decode=$(${CLICKHOUSE_CLIENT} --query="SELECT base64URLDecode(base64URLEncode('http://example.com/!$&\'()*+,;=:@/path'))") +if [ "$decode" != "http://example.com/!$&\'()*+,;=:@/path" ]; then + echo "Special case fail" + echo "Got: $decode" +fi diff --git a/tests/queries/0_stateless/03168_read_in_order_buffering_1.reference b/tests/queries/0_stateless/03168_read_in_order_buffering_1.reference new file mode 100644 index 00000000000..306885a0974 --- /dev/null +++ b/tests/queries/0_stateless/03168_read_in_order_buffering_1.reference @@ -0,0 +1,6 @@ +1 +0 +1 +0 +0 +0 diff --git a/tests/queries/0_stateless/03168_read_in_order_buffering_1.sql b/tests/queries/0_stateless/03168_read_in_order_buffering_1.sql new file mode 100644 index 00000000000..75025dcadc8 --- /dev/null +++ b/tests/queries/0_stateless/03168_read_in_order_buffering_1.sql @@ -0,0 +1,45 @@ +DROP TABLE IF EXISTS t_read_in_order_1; + +CREATE TABLE t_read_in_order_1 (id UInt64, v UInt64) +ENGINE = MergeTree ORDER BY id +SETTINGS index_granularity = 1024, index_granularity_bytes = '10M'; + +INSERT INTO t_read_in_order_1 SELECT number, number FROM numbers(1000000); + +SET max_threads = 8; +SET optimize_read_in_order = 1; +SET read_in_order_use_buffering = 1; + +SELECT count() FROM +( + EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 ORDER BY id +) WHERE explain LIKE '%BufferChunks%'; + +SELECT count() FROM +( + EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 ORDER BY id LIMIT 10 +) WHERE explain LIKE '%BufferChunks%'; + +SELECT count() FROM +( + EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 WHERE v % 10 = 0 ORDER BY id LIMIT 10 +) WHERE explain LIKE '%BufferChunks%'; + +SET read_in_order_use_buffering = 0; + +SELECT count() FROM +( + EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 ORDER BY id +) WHERE explain LIKE '%BufferChunks%'; + +SELECT count() FROM +( + EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 ORDER BY id LIMIT 10 +) WHERE explain LIKE '%BufferChunks%'; + +SELECT count() FROM +( + EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 WHERE v % 10 = 0 ORDER BY id LIMIT 10 +) WHERE explain LIKE '%BufferChunks%'; + +DROP TABLE t_read_in_order_1; diff --git a/tests/queries/0_stateless/03168_read_in_order_buffering_2.reference b/tests/queries/0_stateless/03168_read_in_order_buffering_2.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03168_read_in_order_buffering_2.sql b/tests/queries/0_stateless/03168_read_in_order_buffering_2.sql new file mode 100644 index 00000000000..1d3a75412e0 --- /dev/null +++ b/tests/queries/0_stateless/03168_read_in_order_buffering_2.sql @@ -0,0 +1,17 @@ +-- Tags: long, no-random-settings, no-tsan, no-asan, no-msan, no-s3-storage + +DROP TABLE IF EXISTS t_read_in_order_2; + +CREATE TABLE t_read_in_order_2 (id UInt64, v UInt64) ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_read_in_order_2 SELECT number, number FROM numbers(10000000); +OPTIMIZE TABLE t_read_in_order_2 FINAL; + +SET optimize_read_in_order = 1; +SET max_threads = 4; +SET read_in_order_use_buffering = 1; +SET max_memory_usage = '100M'; + +SELECT * FROM t_read_in_order_2 ORDER BY id FORMAT Null; + +DROP TABLE t_read_in_order_2; diff --git a/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.reference b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.reference new file mode 100644 index 00000000000..1fc6683620c --- /dev/null +++ b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.reference @@ -0,0 +1,9 @@ +1 +2 1 +3 0 +0 450 +1 460 +2 470 +3 480 +4 490 +\N 4950 diff --git a/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.sql b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.sql new file mode 100644 index 00000000000..f10019a78dd --- /dev/null +++ b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.sql @@ -0,0 +1,49 @@ +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS t_func_to_subcolumns_map_2; + +CREATE TABLE t_func_to_subcolumns_map_2 (id UInt64, m Map(String, UInt64)) ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_func_to_subcolumns_map_2 VALUES (1, map('aaa', 1, 'bbb', 2)) (2, map('ccc', 3)); + +SELECT sum(mapContains(m, toNullable('aaa'))) FROM t_func_to_subcolumns_map_2; + +DROP TABLE t_func_to_subcolumns_map_2; + +DROP TABLE IF EXISTS t_func_to_subcolumns_join; + +CREATE TABLE t_func_to_subcolumns_join (id UInt64, arr Array(UInt64), n Nullable(String), m Map(String, UInt64)) +ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO t_func_to_subcolumns_join VALUES (1, [1, 2, 3], 'abc', map('foo', 1, 'bar', 2)) (2, [], NULL, map()); + +SET join_use_nulls = 1; + +SELECT + id, + right.n IS NULL +FROM t_func_to_subcolumns_join AS left +FULL OUTER JOIN +( + SELECT + 1 AS id, + 'qqq' AS n + UNION ALL + SELECT + 3 AS id, + 'www' +) AS right USING (id) +WHERE empty(arr); + +DROP TABLE t_func_to_subcolumns_join; + +DROP TABLE IF EXISTS t_func_to_subcolumns_use_nulls; + +CREATE TABLE t_func_to_subcolumns_use_nulls (arr Array(UInt64), v UInt64) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO t_func_to_subcolumns_use_nulls SELECT range(number % 10), number FROM numbers(100); + +SELECT length(arr) AS n, sum(v) FROM t_func_to_subcolumns_use_nulls GROUP BY n WITH ROLLUP HAVING n <= 4 OR isNull(n) ORDER BY n SETTINGS group_by_use_nulls = 1; + +DROP TABLE t_func_to_subcolumns_use_nulls; diff --git a/tests/queries/0_stateless/03172_dynamic_binary_serialization.reference b/tests/queries/0_stateless/03172_dynamic_binary_serialization.reference new file mode 100644 index 00000000000..f5668ed935b --- /dev/null +++ b/tests/queries/0_stateless/03172_dynamic_binary_serialization.reference @@ -0,0 +1,50 @@ +\N None +42 UInt8 +-42 Int8 +42 UInt16 +-42 Int16 +42 UInt32 +-42 Int32 +42 UInt64 +-42 Int64 +42 UInt128 +-42 Int128 +42 UInt256 +-42 Int256 +42.42 Float32 +42.42 Float64 +2020-01-01 Date +2020-01-01 Date32 +2020-01-01 00:00:00 DateTime(\'EST\') +2020-01-01 00:00:00 DateTime(\'CET\') +2020-01-01 00:00:00.000000 DateTime64(6, \'EST\') +2020-01-01 00:00:00.000000 DateTime64(6, \'CET\') +Hello, World! String +aaaaa FixedString(5) +a Enum8(\'c\' = -128, \'a\' = 1, \'b\' = 2) +a Enum16(\'c\' = -1280, \'a\' = 1, \'b\' = 2) +42.42 Decimal(9, 3) +42.42 Decimal(18, 3) +42.42 Decimal(38, 3) +42.42 Decimal(76, 3) +984ac60f-4d08-4ef1-9c62-d82f343fbc90 UUID +[1,2,3] Array(UInt64) +[[[1],[2]],[[3,4,5]]] Array(Array(Array(UInt64))) +(1,'str',42.42) Tuple(UInt32, String, Float32) +(1,'str',42.42) Tuple(a UInt32, b String, c Float32) +(1,('str',(42.42,-30))) Tuple(UInt32, Tuple(String, Tuple(Float32, Int8))) +(1,('str',(42.42,-30))) Tuple(a UInt32, b Tuple(c String, d Tuple(e Float32, f Int8))) +\0 \0\0\0\0\0\0\0\0\0\0\0\0\06364136223846793005 0 123459*\0\0\0\0\0\0\0 AggregateFunction(quantile(0.5), UInt64) +42 SimpleAggregateFunction(sum, UInt64) +Hello, World! LowCardinality(String) +{1:'str1',2:'str2'} Map(UInt64, String) +{1:{1:{1:'str1'}},2:{2:{2:'str2'}}} Map(UInt64, Map(UInt64, Map(UInt64, String))) +127.0.0.0 IPv4 +2001:db8:cafe:1::1 IPv6 +true Bool +[(1,2),(3,4)] Nested(a UInt32, b UInt32) +[(0,0),(10,0),(10,10),(0,10)] Ring +(0,0) Point +[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]] Polygon +[[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]]] MultiPolygon +[{42:(1,[(2,{1:2})])}] Array(Map(UInt8, Tuple(UInt8, Array(Tuple(UInt8, Map(UInt8, UInt8)))))) diff --git a/tests/queries/0_stateless/03172_dynamic_binary_serialization.sh b/tests/queries/0_stateless/03172_dynamic_binary_serialization.sh new file mode 100755 index 00000000000..9b57e5c8718 --- /dev/null +++ b/tests/queries/0_stateless/03172_dynamic_binary_serialization.sh @@ -0,0 +1,63 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists test" +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "create table test (id UInt64, d Dynamic(max_types=255)) engine=Memory" + +$CLICKHOUSE_CLIENT -q "insert into test select 0, NULL" +$CLICKHOUSE_CLIENT -q "insert into test select 1, materialize(42)::UInt8" +$CLICKHOUSE_CLIENT -q "insert into test select 2, materialize(-42)::Int8" +$CLICKHOUSE_CLIENT -q "insert into test select 3, materialize(42)::UInt16" +$CLICKHOUSE_CLIENT -q "insert into test select 4, materialize(-42)::Int16" +$CLICKHOUSE_CLIENT -q "insert into test select 5, materialize(42)::UInt32" +$CLICKHOUSE_CLIENT -q "insert into test select 6, materialize(-42)::Int32" +$CLICKHOUSE_CLIENT -q "insert into test select 7, materialize(42)::UInt64" +$CLICKHOUSE_CLIENT -q "insert into test select 8, materialize(-42)::Int64" +$CLICKHOUSE_CLIENT -q "insert into test select 9, materialize(42)::UInt128" +$CLICKHOUSE_CLIENT -q "insert into test select 10, materialize(-42)::Int128" +$CLICKHOUSE_CLIENT -q "insert into test select 11, materialize(42)::UInt256" +$CLICKHOUSE_CLIENT -q "insert into test select 12, materialize(-42)::Int256" +$CLICKHOUSE_CLIENT -q "insert into test select 13, materialize(42.42)::Float32" +$CLICKHOUSE_CLIENT -q "insert into test select 14, materialize(42.42)::Float64" +$CLICKHOUSE_CLIENT -q "insert into test select 15, materialize('2020-01-01')::Date" +$CLICKHOUSE_CLIENT -q "insert into test select 16, materialize('2020-01-01')::Date32" +$CLICKHOUSE_CLIENT -q "insert into test select 17, materialize('2020-01-01 00:00:00')::DateTime('EST')" +$CLICKHOUSE_CLIENT -q "insert into test select 18, materialize('2020-01-01 00:00:00')::DateTime('CET')" +$CLICKHOUSE_CLIENT -q "insert into test select 19, materialize('2020-01-01 00:00:00.000000')::DateTime64(6, 'EST')" +$CLICKHOUSE_CLIENT -q "insert into test select 20, materialize('2020-01-01 00:00:00.000000')::DateTime64(6, 'CET')" +$CLICKHOUSE_CLIENT -q "insert into test select 21, materialize('Hello, World!')" +$CLICKHOUSE_CLIENT -q "insert into test select 22, materialize('aaaaa')::FixedString(5)" +$CLICKHOUSE_CLIENT -q "insert into test select 23, materialize('a')::Enum8('a' = 1, 'b' = 2, 'c' = -128)" +$CLICKHOUSE_CLIENT -q "insert into test select 24, materialize('a')::Enum16('a' = 1, 'b' = 2, 'c' = -1280)" +$CLICKHOUSE_CLIENT -q "insert into test select 25, materialize(42.42)::Decimal32(3)" +$CLICKHOUSE_CLIENT -q "insert into test select 26, materialize(42.42)::Decimal64(3)" +$CLICKHOUSE_CLIENT -q "insert into test select 27, materialize(42.42)::Decimal128(3)" +$CLICKHOUSE_CLIENT -q "insert into test select 28, materialize(42.42)::Decimal256(3)" +$CLICKHOUSE_CLIENT -q "insert into test select 29, materialize('984ac60f-4d08-4ef1-9c62-d82f343fbc90')::UUID" +$CLICKHOUSE_CLIENT -q "insert into test select 30, materialize([1, 2, 3])::Array(UInt64)" +$CLICKHOUSE_CLIENT -q "insert into test select 31, materialize([[[1], [2]], [[3, 4, 5]]])::Array(Array(Array(UInt64)))" +$CLICKHOUSE_CLIENT -q "insert into test select 32, materialize(tuple(1, 'str', 42.42))::Tuple(UInt32, String, Float32)" +$CLICKHOUSE_CLIENT -q "insert into test select 33, materialize(tuple(1, 'str', 42.42))::Tuple(a UInt32, b String, c Float32)" +$CLICKHOUSE_CLIENT -q "insert into test select 34, materialize(tuple(1, tuple('str', tuple(42.42, -30))))::Tuple(UInt32, Tuple(String, Tuple(Float32, Int8)))" +$CLICKHOUSE_CLIENT -q "insert into test select 35, materialize(tuple(1, tuple('str', tuple(42.42, -30))))::Tuple(a UInt32, b Tuple(c String, d Tuple(e Float32, f Int8)))" +$CLICKHOUSE_CLIENT -q "insert into test select 36, quantileState(0.5)(42::UInt64)" +$CLICKHOUSE_CLIENT -q "insert into test select 37, sumSimpleState(42::UInt64)" +$CLICKHOUSE_CLIENT -q "insert into test select 38, toLowCardinality('Hello, World!')" +$CLICKHOUSE_CLIENT -q "insert into test select 39, materialize(map(1, 'str1', 2, 'str2'))::Map(UInt64, String)" +$CLICKHOUSE_CLIENT -q "insert into test select 40, materialize(map(1, map(1, map(1, 'str1')), 2, map(2, map(2, 'str2'))))::Map(UInt64, Map(UInt64, Map(UInt64, String)))" +$CLICKHOUSE_CLIENT -q "insert into test select 41, materialize('127.0.0.0')::IPv4" +$CLICKHOUSE_CLIENT -q "insert into test select 42, materialize('2001:db8:cafe:1:0:0:0:1')::IPv6" +$CLICKHOUSE_CLIENT -q "insert into test select 43, materialize(true)::Bool" +$CLICKHOUSE_CLIENT -q "insert into test select 44, materialize([tuple(1, 2), tuple(3, 4)])::Nested(a UInt32, b UInt32)" +$CLICKHOUSE_CLIENT -q "insert into test select 45, materialize([(0, 0), (10, 0), (10, 10), (0, 10)])::Ring" +$CLICKHOUSE_CLIENT -q "insert into test select 46, materialize((0, 0))::Point" +$CLICKHOUSE_CLIENT -q "insert into test select 47, materialize([[(20, 20), (50, 20), (50, 50), (20, 50)], [(30, 30), (50, 50), (50, 30)]])::Polygon" +$CLICKHOUSE_CLIENT -q "insert into test select 48, materialize([[[(0, 0), (10, 0), (10, 10), (0, 10)]], [[(20, 20), (50, 20), (50, 50), (20, 50)],[(30, 30), (50, 50), (50, 30)]]])::MultiPolygon" +$CLICKHOUSE_CLIENT -q "insert into test select 49, materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])" + +$CLICKHOUSE_CLIENT -q "select * from test format RowBinary" | $CLICKHOUSE_LOCAL --allow_experimental_dynamic_type=1 --input-format RowBinary --structure 'id UInt64, d Dynamic(max_types=255)' -q "select d, dynamicType(d) from table order by id" +$CLICKHOUSE_CLIENT -q "drop table test" + diff --git a/tests/queries/0_stateless/03172_error_log_table_not_empty.sh b/tests/queries/0_stateless/03172_error_log_table_not_empty.sh index 8d74ebe1039..22a2fd82c64 100755 --- a/tests/queries/0_stateless/03172_error_log_table_not_empty.sh +++ b/tests/queries/0_stateless/03172_error_log_table_not_empty.sh @@ -1,9 +1,14 @@ #!/usr/bin/env bash +# Tags: no-fasttest +# Tag no-fasttest: this test relies on the timeouts, it always takes no less that 4 seconds to run CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +# system.error_log is created lazy, flush logs query makes it sure that the table is created. +$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS;" + # Get the previous number of errors for 111, 222 and 333 errors_111=$($CLICKHOUSE_CLIENT -q "SELECT sum(value) FROM system.error_log WHERE code = 111") errors_222=$($CLICKHOUSE_CLIENT -q "SELECT sum(value) FROM system.error_log WHERE code = 222") @@ -38,4 +43,4 @@ $CLICKHOUSE_CLIENT -mn -q " SELECT sum(value) > $(($errors_111+1)) FROM system.error_log WHERE code = 111; SELECT sum(value) > $(($errors_222+1)) FROM system.error_log WHERE code = 222; SELECT sum(value) > $(($errors_333+1)) FROM system.error_log WHERE code = 333; -" \ No newline at end of file +" diff --git a/tests/queries/0_stateless/03173_parallel_replicas_join_bug.reference b/tests/queries/0_stateless/03173_parallel_replicas_join_bug.reference new file mode 100644 index 00000000000..93018551e1b --- /dev/null +++ b/tests/queries/0_stateless/03173_parallel_replicas_join_bug.reference @@ -0,0 +1,10 @@ +a1451105-722e-4fe7-bfaa-65ad2ae249c2 whatever +a1451105-722e-4fe7-bfaa-65ad2ae249c2 whatever +--------------------------- +a1451105-722e-4fe7-bfaa-65ad2ae249c2 +a1451105-722e-4fe7-bfaa-65ad2ae249c2 +--------------------------- +a1451105-722e-4fe7-bfaa-65ad2ae249c2 +--------------------------- +a1451105-722e-4fe7-bfaa-65ad2ae249c2 +a1451105-722e-4fe7-bfaa-65ad2ae249c2 diff --git a/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh b/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh new file mode 100755 index 00000000000..20a29e2734e --- /dev/null +++ b/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +$CLICKHOUSE_CLIENT -nq " + CREATE TABLE ids (id UUID, whatever String) Engine=MergeTree ORDER BY tuple(); + INSERT INTO ids VALUES ('a1451105-722e-4fe7-bfaa-65ad2ae249c2', 'whatever'); + + CREATE TABLE data (id UUID, event_time DateTime, status String) Engine=MergeTree ORDER BY tuple(); + INSERT INTO data VALUES ('a1451105-722e-4fe7-bfaa-65ad2ae249c2', '2000-01-01', 'CREATED'); + + CREATE TABLE data2 (id UUID, event_time DateTime, status String) Engine=MergeTree ORDER BY tuple(); + INSERT INTO data2 VALUES ('a1451105-722e-4fe7-bfaa-65ad2ae249c2', '2000-01-02', 'CREATED'); +" + +$CLICKHOUSE_CLIENT -nq " +SET allow_experimental_analyzer = 1, cluster_for_parallel_replicas = 'parallel_replicas', max_parallel_replicas = 10, allow_experimental_parallel_reading_from_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, max_threads = 1; + +SELECT + id, + whatever +FROM ids AS l +INNER JOIN view( + SELECT * + FROM merge($CLICKHOUSE_DATABASE, 'data.*') +) AS s ON l.id = s.id +WHERE status IN ['CREATED', 'CREATING'] +ORDER BY event_time DESC; + +SELECT '---------------------------'; + +with +results1 as ( + SELECT id + FROM data t1 + inner join ids t2 + on t1.id = t2.id +), +results2 as ( + SELECT id + FROM ids t1 + inner join data t2 + on t1.id = t2.id +) +select * from results1 union all select * from results2; + +SELECT '---------------------------'; + +with +results1 as ( + SELECT id + FROM data t1 + inner join ids t2 + on t1.id = t2.id +), +results2 as ( + SELECT id + FROM ids t1 + inner join data t2 + on t1.id = t2.id +) +select * from results1 t1 inner join results2 t2 using (id); + +SELECT '---------------------------'; + +with +results1 as ( + SELECT t1.id + FROM data t1 + inner join ids t2 on t1.id = t2.id + left join data t3 on t2.id = t3.id +), +results2 as ( + SELECT id + FROM ids t1 + inner join data t2 + on t1.id = t2.id +) +select * from results1 union all select * from results2; +" diff --git a/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.reference b/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.reference new file mode 100644 index 00000000000..1ba147f9627 --- /dev/null +++ b/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.reference @@ -0,0 +1,114 @@ +42 UInt8 +42 UInt8 +\N Nullable(Nothing) +\N Nullable(Nothing) +42 UInt8 +42 UInt8 +-42 Int8 +-42 Int8 +42 UInt16 +42 UInt16 +-42 Int16 +-42 Int16 +42 UInt32 +42 UInt32 +-42 Int32 +-42 Int32 +42 UInt64 +42 UInt64 +-42 Int64 +-42 Int64 +42 UInt128 +42 UInt128 +-42 Int128 +-42 Int128 +42 UInt256 +42 UInt256 +-42 Int256 +-42 Int256 +42.42 Float32 +42.42 Float32 +42.42 Float64 +42.42 Float64 +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date32 +2020-01-01 Date32 +2020-01-01 00:00:00 DateTime +2020-01-01 00:00:00 DateTime +2020-01-01 00:00:00 DateTime(\'EST\') +2020-01-01 00:00:00 DateTime(\'EST\') +2020-01-01 00:00:00 DateTime(\'CET\') +2020-01-01 00:00:00 DateTime(\'CET\') +2020-01-01 00:00:00.000000 DateTime64(6) +2020-01-01 00:00:00.000000 DateTime64(6) +2020-01-01 00:00:00.000000 DateTime64(6, \'EST\') +2020-01-01 00:00:00.000000 DateTime64(6, \'EST\') +2020-01-01 00:00:00.000000 DateTime64(6, \'CET\') +2020-01-01 00:00:00.000000 DateTime64(6, \'CET\') +Hello, World! String +Hello, World! String +aaaaa FixedString(5) +aaaaa FixedString(5) +a Enum8(\'c\' = -128, \'a\' = 1, \'b\' = 2) +a Enum8(\'c\' = -128, \'a\' = 1, \'b\' = 2) +a Enum16(\'c\' = -1280, \'a\' = 1, \'b\' = 2) +a Enum16(\'c\' = -1280, \'a\' = 1, \'b\' = 2) +42.42 Decimal(9, 3) +42.42 Decimal(9, 3) +42.42 Decimal(18, 3) +42.42 Decimal(18, 3) +42.42 Decimal(38, 3) +42.42 Decimal(38, 3) +42.42 Decimal(76, 3) +42.42 Decimal(76, 3) +984ac60f-4d08-4ef1-9c62-d82f343fbc90 UUID +984ac60f-4d08-4ef1-9c62-d82f343fbc90 UUID +[1,2,3] Array(UInt64) +[1,2,3] Array(UInt64) +[[[1],[2]],[[3,4,5]]] Array(Array(Array(UInt64))) +[[[1],[2]],[[3,4,5]]] Array(Array(Array(UInt64))) +(1,'str',42.42) Tuple(UInt32, String, Float32) +(1,'str',42.42) Tuple(UInt32, String, Float32) +(1,'str',42.42) Tuple(\n a UInt32,\n b String,\n c Float32) +(1,'str',42.42) Tuple(\n a UInt32,\n b String,\n c Float32) +(1,('str',(42.42,-30))) Tuple(UInt32, Tuple(String, Tuple(Float32, Int8))) +(1,('str',(42.42,-30))) Tuple(UInt32, Tuple(String, Tuple(Float32, Int8))) +(1,('str',(42.42,-30))) Tuple(\n a UInt32,\n b Tuple(\n c String,\n d Tuple(\n e Float32,\n f Int8))) +(1,('str',(42.42,-30))) Tuple(\n a UInt32,\n b Tuple(\n c String,\n d Tuple(\n e Float32,\n f Int8))) +\0 \0\0\0\0\0\0\0\0\0\0\0\0\06364136223846793005 0 123459*\0\0\0\0\0\0\0 AggregateFunction(quantile(0.5), UInt64) +\0 \0\0\0\0\0\0\0\0\0\0\0\0\06364136223846793005 0 123459*\0\0\0\0\0\0\0 AggregateFunction(quantile(0.5), UInt64) +42 SimpleAggregateFunction(sum, UInt64) +42 SimpleAggregateFunction(sum, UInt64) +Hello, World! LowCardinality(String) +Hello, World! LowCardinality(String) +{1:'str1',2:'str2'} Map(UInt64, String) +{1:'str1',2:'str2'} Map(UInt64, String) +{1:{1:{1:'str1'}},2:{2:{2:'str2'}}} Map(UInt64, Map(UInt64, Map(UInt64, String))) +{1:{1:{1:'str1'}},2:{2:{2:'str2'}}} Map(UInt64, Map(UInt64, Map(UInt64, String))) +127.0.0.0 IPv4 +127.0.0.0 IPv4 +2001:db8:cafe:1::1 IPv6 +2001:db8:cafe:1::1 IPv6 +true Bool +true Bool +[(1,2),(3,4)] Nested(a UInt32, b UInt32) +[(1,2),(3,4)] Nested(a UInt32, b UInt32) +[(0,0),(10,0),(10,10),(0,10)] Ring +[(0,0),(10,0),(10,10),(0,10)] Ring +(0,0) Point +(0,0) Point +[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]] Polygon +[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]] Polygon +[[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]]] MultiPolygon +[[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]]] MultiPolygon +[{42:(1,[(2,{1:2})])}] Array(Map(UInt8, Tuple(UInt8, Array(Tuple(UInt8, Map(UInt8, UInt8)))))) +[{42:(1,[(2,{1:2})])}] Array(Map(UInt8, Tuple(UInt8, Array(Tuple(UInt8, Map(UInt8, UInt8)))))) +42 Variant(String, Tuple(\n a UInt32,\n b Array(Map(String, String))), UInt32) +42 Variant(String, Tuple(\n a UInt32,\n b Array(Map(String, String))), UInt32) +[{42:(1,[(2,{1:2})])}] Dynamic +[{42:(1,[(2,{1:2})])}] Dynamic +[{42:(1,[(2,{1:2})])}] Dynamic(max_types=10) +[{42:(1,[(2,{1:2})])}] Dynamic(max_types=10) +[{42:(1,[(2,{1:2})])}] Dynamic(max_types=255) +[{42:(1,[(2,{1:2})])}] Dynamic(max_types=255) diff --git a/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh b/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh new file mode 100755 index 00000000000..723b11ad620 --- /dev/null +++ b/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +function test +{ + $CLICKHOUSE_LOCAL --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --output_format_binary_encode_types_in_binary_format=1 -q "select $1 as value format RowBinaryWithNamesAndTypes" | $CLICKHOUSE_LOCAL --input-format RowBinaryWithNamesAndTypes --input_format_binary_decode_types_in_binary_format=1 -q "select value, toTypeName(value) from table" + $CLICKHOUSE_LOCAL --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --output_format_native_encode_types_in_binary_format=1 -q "select $1 as value format Native" | $CLICKHOUSE_LOCAL --input-format Native --input_format_native_decode_types_in_binary_format=1 -q "select value, toTypeName(value) from table" +} + +test "materialize(42)::UInt8" +test "NULL" +test "materialize(42)::UInt8" +test "materialize(-42)::Int8" +test "materialize(42)::UInt16" +test "materialize(-42)::Int16" +test "materialize(42)::UInt32" +test "materialize(-42)::Int32" +test "materialize(42)::UInt64" +test "materialize(-42)::Int64" +test "materialize(42)::UInt128" +test "materialize(-42)::Int128" +test "materialize(42)::UInt256" +test "materialize(-42)::Int256" +test "materialize(42.42)::Float32" +test "materialize(42.42)::Float64" +test "materialize('2020-01-01')::Date" +test "materialize('2020-01-01')::Date32" +test "materialize('2020-01-01 00:00:00')::DateTime" +test "materialize('2020-01-01 00:00:00')::DateTime('EST')" +test "materialize('2020-01-01 00:00:00')::DateTime('CET')" +test "materialize('2020-01-01 00:00:00.000000')::DateTime64(6)" +test "materialize('2020-01-01 00:00:00.000000')::DateTime64(6, 'EST')" +test "materialize('2020-01-01 00:00:00.000000')::DateTime64(6, 'CET')" +test "materialize('Hello, World!')" +test "materialize('aaaaa')::FixedString(5)" +test "materialize('a')::Enum8('a' = 1, 'b' = 2, 'c' = -128)" +test "materialize('a')::Enum16('a' = 1, 'b' = 2, 'c' = -1280)" +test "materialize(42.42)::Decimal32(3)" +test "materialize(42.42)::Decimal64(3)" +test "materialize(42.42)::Decimal128(3)" +test "materialize(42.42)::Decimal256(3)" +test "materialize('984ac60f-4d08-4ef1-9c62-d82f343fbc90')::UUID" +test "materialize([1, 2, 3])::Array(UInt64)" +test "materialize([[[1], [2]], [[3, 4, 5]]])::Array(Array(Array(UInt64)))" +test "materialize(tuple(1, 'str', 42.42))::Tuple(UInt32, String, Float32)" +test "materialize(tuple(1, 'str', 42.42))::Tuple(a UInt32, b String, c Float32)" +test "materialize(tuple(1, tuple('str', tuple(42.42, -30))))::Tuple(UInt32, Tuple(String, Tuple(Float32, Int8)))" +test "materialize(tuple(1, tuple('str', tuple(42.42, -30))))::Tuple(a UInt32, b Tuple(c String, d Tuple(e Float32, f Int8)))" +test "quantileState(0.5)(42::UInt64)" +test "sumSimpleState(42::UInt64)" +test "toLowCardinality('Hello, World!')" +test "materialize(map(1, 'str1', 2, 'str2'))::Map(UInt64, String)" +test "materialize(map(1, map(1, map(1, 'str1')), 2, map(2, map(2, 'str2'))))::Map(UInt64, Map(UInt64, Map(UInt64, String)))" +test "materialize('127.0.0.0')::IPv4" +test "materialize('2001:db8:cafe:1:0:0:0:1')::IPv6" +test "materialize(true)::Bool" +test "materialize([tuple(1, 2), tuple(3, 4)])::Nested(a UInt32, b UInt32)" +test "materialize([(0, 0), (10, 0), (10, 10), (0, 10)])::Ring" +test "materialize((0, 0))::Point" +test "materialize([[(20, 20), (50, 20), (50, 50), (20, 50)], [(30, 30), (50, 50), (50, 30)]])::Polygon" +test "materialize([[[(0, 0), (10, 0), (10, 10), (0, 10)]], [[(20, 20), (50, 20), (50, 50), (20, 50)],[(30, 30), (50, 50), (50, 30)]]])::MultiPolygon" +test "materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])" +test "materialize(42::UInt32)::Variant(UInt32, String, Tuple(a UInt32, b Array(Map(String, String))))" +test "materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])::Dynamic" +test "materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])::Dynamic(max_types=10)" +test "materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])::Dynamic(max_types=255)" diff --git a/tests/queries/0_stateless/03173_set_transformed_partition_pruning.reference b/tests/queries/0_stateless/03173_set_transformed_partition_pruning.reference new file mode 100644 index 00000000000..3a6727b70e8 --- /dev/null +++ b/tests/queries/0_stateless/03173_set_transformed_partition_pruning.reference @@ -0,0 +1,50 @@ +-- Single partition by function +0 +2 +-- Nested partition by function +1 +2 +1 +1 +-- Nested partition by function, LowCardinality +1 +2 +1 +1 +-- Nested partition by function, Nullable +1 +2 +1 +1 +-- Nested partition by function, LowCardinality + Nullable +1 +2 +1 +1 +-- Non-safe cast +2 +2 +-- Multiple partition columns +1 +1 +1 +2 +-- LowCardinality set +1 +1 +-- Nullable set +1 +1 +-- LowCardinality + Nullable set +1 +1 +-- Not failing with date parsing functions +1 +0 +-- Pruning + not failing with nested date parsing functions +1 +2 +0 +-- Empty transform functions +2 +1 diff --git a/tests/queries/0_stateless/03173_set_transformed_partition_pruning.sql b/tests/queries/0_stateless/03173_set_transformed_partition_pruning.sql new file mode 100644 index 00000000000..8ffabacaa8c --- /dev/null +++ b/tests/queries/0_stateless/03173_set_transformed_partition_pruning.sql @@ -0,0 +1,258 @@ +SELECT '-- Single partition by function'; + +DROP TABLE IF EXISTS 03173_single_function; +CREATE TABLE 03173_single_function ( + dt Date, +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY toMonth(dt); + +INSERT INTO 03173_single_function +SELECT toDate('2000-01-01') + 10 * number FROM numbers(50) +UNION ALL +SELECT toDate('2100-01-01') + 10 * number FROM numbers(50); +OPTIMIZE TABLE 03173_single_function FINAL; + +SELECT count() FROM 03173_single_function WHERE dt IN ('2024-01-20', '2024-05-25') SETTINGS log_comment='03173_single_function'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_single_function'; + +DROP TABLE IF EXISTS 03173_single_function; + +SELECT '-- Nested partition by function'; + +DROP TABLE IF EXISTS 03173_nested_function; +CREATE TABLE 03173_nested_function( + id Int32, +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY xxHash32(id) % 3; + +INSERT INTO 03173_nested_function SELECT number FROM numbers(100); +OPTIMIZE TABLE 03173_nested_function FINAL; + +SELECT count() FROM 03173_nested_function WHERE id IN (10) SETTINGS log_comment='03173_nested_function'; +SELECT count() FROM 03173_nested_function WHERE xxHash32(id) IN (2158931063, 1449383981) SETTINGS log_comment='03173_nested_function_subexpr'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function'; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_subexpr'; + +DROP TABLE IF EXISTS 03173_nested_function; + +SELECT '-- Nested partition by function, LowCardinality'; + +SET allow_suspicious_low_cardinality_types = 1; + +DROP TABLE IF EXISTS 03173_nested_function_lc; +CREATE TABLE 03173_nested_function_lc( + id LowCardinality(Int32), +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY xxHash32(id) % 3; + +INSERT INTO 03173_nested_function_lc SELECT number FROM numbers(100); +OPTIMIZE TABLE 03173_nested_function_lc FINAL; + +SELECT count() FROM 03173_nested_function_lc WHERE id IN (10) SETTINGS log_comment='03173_nested_function_lc'; +SELECT count() FROM 03173_nested_function_lc WHERE xxHash32(id) IN (2158931063, 1449383981) SETTINGS log_comment='03173_nested_function_subexpr_lc'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_lc'; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_subexpr_lc'; + +DROP TABLE IF EXISTS 03173_nested_function_lc; + +SELECT '-- Nested partition by function, Nullable'; + +DROP TABLE IF EXISTS 03173_nested_function_null; +CREATE TABLE 03173_nested_function_null( + id Nullable(Int32), +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY xxHash32(id) % 3 +SETTINGS allow_nullable_key=1; + +INSERT INTO 03173_nested_function_null SELECT number FROM numbers(100); +OPTIMIZE TABLE 03173_nested_function_null FINAL; + +SELECT count() FROM 03173_nested_function_null WHERE id IN (10) SETTINGS log_comment='03173_nested_function_null'; +SELECT count() FROM 03173_nested_function_null WHERE xxHash32(id) IN (2158931063, 1449383981) SETTINGS log_comment='03173_nested_function_subexpr_null'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_null'; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_subexpr_null'; + +DROP TABLE IF EXISTS 03173_nested_function_null; + +SELECT '-- Nested partition by function, LowCardinality + Nullable'; + +DROP TABLE IF EXISTS 03173_nested_function_lc_null; + +SET allow_suspicious_low_cardinality_types = 1; +CREATE TABLE 03173_nested_function_lc_null( + id LowCardinality(Nullable(Int32)), +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY xxHash32(id) % 3 +SETTINGS allow_nullable_key=1; + +INSERT INTO 03173_nested_function_lc_null SELECT number FROM numbers(100); +OPTIMIZE TABLE 03173_nested_function_lc_null FINAL; + +SELECT count() FROM 03173_nested_function_lc_null WHERE id IN (10) SETTINGS log_comment='03173_nested_function_lc_null'; +SELECT count() FROM 03173_nested_function_lc_null WHERE xxHash32(id) IN (2158931063, 1449383981) SETTINGS log_comment='03173_nested_function_subexpr_lc_null'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_lc_null'; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_subexpr_lc_null'; + +DROP TABLE IF EXISTS 03173_nested_function_lc_null; + +SELECT '-- Non-safe cast'; + +DROP TABLE IF EXISTS 03173_nonsafe_cast; +CREATE TABLE 03173_nonsafe_cast( + id Int64, +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY xxHash32(id) % 3; + +INSERT INTO 03173_nonsafe_cast SELECT number FROM numbers(100); +OPTIMIZE TABLE 03173_nonsafe_cast FINAL; + +SELECT count() FROM 03173_nonsafe_cast WHERE id IN (SELECT '50' UNION ALL SELECT '99') SETTINGS log_comment='03173_nonsafe_cast'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nonsafe_cast'; + +DROP TABLE IF EXISTS 03173_nonsafe_cast; + +SELECT '-- Multiple partition columns'; + +DROP TABLE IF EXISTS 03173_multiple_partition_cols; +CREATE TABLE 03173_multiple_partition_cols ( + key1 Int32, + key2 Int32 +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY (intDiv(key1, 50), xxHash32(key2) % 3); + +INSERT INTO 03173_multiple_partition_cols SELECT number, number FROM numbers(100); +OPTIMIZE TABLE 03173_multiple_partition_cols FINAL; + +SELECT count() FROM 03173_multiple_partition_cols WHERE key2 IN (4) SETTINGS log_comment='03173_multiple_columns'; +SELECT count() FROM 03173_multiple_partition_cols WHERE xxHash32(key2) IN (4251411170) SETTINGS log_comment='03173_multiple_columns_subexpr'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_multiple_columns'; +-- Due to xxHash32() in WHERE condition, MinMax is unable to eliminate any parts, +-- so partition pruning leave two parts (for key1 // 50 = 0 and key1 // 50 = 1) +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_multiple_columns_subexpr'; + +-- Preparing base table for filtering by LowCardinality/Nullable sets +DROP TABLE IF EXISTS 03173_base_data_source; +CREATE TABLE 03173_base_data_source( + id Int32, +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY xxHash32(id) % 3; + +INSERT INTO 03173_base_data_source SELECT number FROM numbers(100); +OPTIMIZE TABLE 03173_base_data_source FINAL; + +SELECT '-- LowCardinality set'; + +SET allow_suspicious_low_cardinality_types = 1; +DROP TABLE IF EXISTS 03173_low_cardinality_set; +CREATE TABLE 03173_low_cardinality_set (id LowCardinality(Int32)) ENGINE=Memory AS SELECT 10; + +SELECT count() FROM 03173_base_data_source WHERE id IN (SELECT id FROM 03173_low_cardinality_set) SETTINGS log_comment='03173_low_cardinality_set'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_low_cardinality_set'; + +DROP TABLE IF EXISTS 03173_low_cardinality_set; + +SELECT '-- Nullable set'; + +DROP TABLE IF EXISTS 03173_nullable_set; +CREATE TABLE 03173_nullable_set (id Nullable(Int32)) ENGINE=Memory AS SELECT 10; + +SELECT count() FROM 03173_base_data_source WHERE id IN (SELECT id FROM 03173_nullable_set) SETTINGS log_comment='03173_nullable_set'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nullable_set'; + +DROP TABLE IF EXISTS 03173_nullable_set; + +SELECT '-- LowCardinality + Nullable set'; + +DROP TABLE IF EXISTS 03173_lc_nullable_set; +CREATE TABLE 03173_lc_nullable_set (id LowCardinality(Nullable(Int32))) ENGINE=Memory AS SELECT 10 UNION ALL SELECT NULL; + +SELECT count() FROM 03173_base_data_source WHERE id IN (SELECT id FROM 03173_lc_nullable_set) SETTINGS log_comment='03173_lc_nullable_set'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_lc_nullable_set'; + +DROP TABLE IF EXISTS 03173_lc_nullable_set; + +SELECT '-- Not failing with date parsing functions'; + +DROP TABLE IF EXISTS 03173_date_parsing; +CREATE TABLE 03173_date_parsing ( + id String +) +ENGINE=MergeTree +ORDER BY tuple() +PARTITION BY toDate(id); + +INSERT INTO 03173_date_parsing +SELECT toString(toDate('2023-04-01') + number) +FROM numbers(20); + +SELECT count() FROM 03173_date_parsing WHERE id IN ('2023-04-02', '2023-05-02'); +SELECT count() FROM 03173_date_parsing WHERE id IN ('not a date'); + +DROP TABLE IF EXISTS 03173_date_parsing; + +SELECT '-- Pruning + not failing with nested date parsing functions'; + +DROP TABLE IF EXISTS 03173_nested_date_parsing; +CREATE TABLE 03173_nested_date_parsing ( + id String +) +ENGINE=MergeTree +ORDER BY tuple() +PARTITION BY toMonth(toDate(id)); + +INSERT INTO 03173_nested_date_parsing +SELECT toString(toDate('2000-01-01') + 10 * number) FROM numbers(50) +UNION ALL +SELECT toString(toDate('2100-01-01') + 10 * number) FROM numbers(50); + +SELECT count() FROM 03173_nested_date_parsing WHERE id IN ('2000-01-21', '2023-05-02') SETTINGS log_comment='03173_nested_date_parsing'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_date_parsing'; +SELECT count() FROM 03173_nested_date_parsing WHERE id IN ('not a date'); + +DROP TABLE IF EXISTS 03173_nested_date_parsing; + +SELECT '-- Empty transform functions'; + +DROP TABLE IF EXISTS 03173_empty_transform; +CREATE TABLE 03173_empty_transform( + id Int32, +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY xxHash32(id) % 3; + +INSERT INTO 03173_empty_transform SELECT number FROM numbers(6); +OPTIMIZE TABLE 03173_empty_transform FINAL; + +SELECT id FROM 03173_empty_transform WHERE xxHash32(id) % 3 IN (xxHash32(2::Int32) % 3) SETTINGS log_comment='03173_empty_transform'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_empty_transform'; + +DROP TABLE IF EXISTS 03173_empty_transform; diff --git a/tests/queries/0_stateless/03174_projection_deduplicate.reference b/tests/queries/0_stateless/03174_projection_deduplicate.reference new file mode 100644 index 00000000000..1796b2f1dee --- /dev/null +++ b/tests/queries/0_stateless/03174_projection_deduplicate.reference @@ -0,0 +1,3 @@ +1 one +1 one +1 one diff --git a/tests/queries/0_stateless/03174_projection_deduplicate.sql b/tests/queries/0_stateless/03174_projection_deduplicate.sql new file mode 100644 index 00000000000..46222b69dc7 --- /dev/null +++ b/tests/queries/0_stateless/03174_projection_deduplicate.sql @@ -0,0 +1,30 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/65548 +DROP TABLE IF EXISTS test_projection_deduplicate; + +CREATE TABLE test_projection_deduplicate +( + `id` Int32, + `string` String, + PROJECTION test_projection + ( + SELECT id + GROUP BY id + ) +) +ENGINE = MergeTree +PRIMARY KEY id; + +INSERT INTO test_projection_deduplicate VALUES (1, 'one'); +INSERT INTO test_projection_deduplicate VALUES (1, 'one'); + +OPTIMIZE TABLE test_projection_deduplicate DEDUPLICATE; -- { serverError NOT_IMPLEMENTED } + +SELECT * FROM test_projection_deduplicate; + +ALTER TABLE test_projection_deduplicate DROP PROJECTION test_projection; + +OPTIMIZE TABLE test_projection_deduplicate DEDUPLICATE; + +SELECT * FROM test_projection_deduplicate; + +DROP TABLE test_projection_deduplicate; diff --git a/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.reference b/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql b/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql new file mode 100644 index 00000000000..5aa3e4c2e0c --- /dev/null +++ b/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql @@ -0,0 +1,42 @@ +DROP TABLE IF EXISTS t1; +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(SEMI, ALL, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(SEMI, INNER, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(SEMI, FULL, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(ANTI, ALL, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(ANTI, INNER, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(ANTI, FULL, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(ANY, FULL, a); -- { serverError NOT_IMPLEMENTED } diff --git a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.reference b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.reference new file mode 100644 index 00000000000..33b8cd6ee26 --- /dev/null +++ b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.reference @@ -0,0 +1,3 @@ +-- bitShiftRight +-- bitShiftLeft +OK diff --git a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql new file mode 100644 index 00000000000..aec01753673 --- /dev/null +++ b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql @@ -0,0 +1,17 @@ +SELECT '-- bitShiftRight'; +SELECT bitShiftRight(1, -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftRight(toUInt8(1), 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftRight('hola', -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftRight('hola', 4 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftRight(toFixedString('hola', 8), -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftRight(toFixedString('hola', 8), 8 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } + +SELECT '-- bitShiftLeft'; +SELECT bitShiftLeft(1, -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftLeft(toUInt8(1), 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftLeft('hola', -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftLeft('hola', 4 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftLeft(toFixedString('hola', 8), -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftLeft(toFixedString('hola', 8), 8 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } + +SELECT 'OK'; \ No newline at end of file diff --git a/tests/queries/0_stateless/03198_json_extract_more_types.reference b/tests/queries/0_stateless/03198_json_extract_more_types.reference new file mode 100644 index 00000000000..9a6580ff81b --- /dev/null +++ b/tests/queries/0_stateless/03198_json_extract_more_types.reference @@ -0,0 +1,21 @@ +2020-01-01 +2020-01-01 +2020-01-01 00:00:00 +2020-01-01 00:00:00.000000 +127.0.0.1 +2001:db8:85a3::8a2e:370:7334 +42 +42 +42 +42 +42 +42 +42 +42 +42 +42 +Hello +Hello +\0\0\0 +Hello\0\0\0\0\0 +5801c962-1182-458a-89f8-d077da5074f9 diff --git a/tests/queries/0_stateless/03198_json_extract_more_types.sql b/tests/queries/0_stateless/03198_json_extract_more_types.sql new file mode 100644 index 00000000000..28d24bbb271 --- /dev/null +++ b/tests/queries/0_stateless/03198_json_extract_more_types.sql @@ -0,0 +1,29 @@ +set allow_suspicious_low_cardinality_types=1; + +select JSONExtract('{"a" : "2020-01-01"}', 'a', 'Date'); +select JSONExtract('{"a" : "2020-01-01"}', 'a', 'Date32'); +select JSONExtract('{"a" : "2020-01-01 00:00:00"}', 'a', 'DateTime'); +select JSONExtract('{"a" : "2020-01-01 00:00:00.000000"}', 'a', 'DateTime64(6)'); +select JSONExtract('{"a" : "127.0.0.1"}', 'a', 'IPv4'); +select JSONExtract('{"a" : "2001:0db8:85a3:0000:0000:8a2e:0370:7334"}', 'a', 'IPv6'); + + +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(UInt8)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(Int8)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(UInt16)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(Int16)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(UInt32)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(Int32)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(UInt64)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(Int64)'); + +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(Float32)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(Float32)'); + +select JSONExtract('{"a" : "Hello"}', 'a', 'LowCardinality(String)'); +select JSONExtract('{"a" : "Hello"}', 'a', 'LowCardinality(FixedString(5))'); +select JSONExtract('{"a" : "Hello"}', 'a', 'LowCardinality(FixedString(3))'); +select JSONExtract('{"a" : "Hello"}', 'a', 'LowCardinality(FixedString(10))'); + +select JSONExtract('{"a" : "5801c962-1182-458a-89f8-d077da5074f9"}', 'a', 'LowCardinality(UUID)'); + diff --git a/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.reference b/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.reference new file mode 100644 index 00000000000..fcd78da1283 --- /dev/null +++ b/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.reference @@ -0,0 +1,2 @@ +1000000 +1000000 diff --git a/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.sql b/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.sql new file mode 100644 index 00000000000..25798ef6d33 --- /dev/null +++ b/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS data_02051__fuzz_24; + +CREATE TABLE data_02051__fuzz_24 (`key` Int16, `value` String) ENGINE = MergeTree ORDER BY key SETTINGS index_granularity_bytes = 0, min_rows_for_wide_part = 0, min_bytes_for_wide_part=0 AS SELECT number, repeat(toString(number), 5) FROM numbers(1000000.); + +SELECT count(ignore(*)) FROM data_02051__fuzz_24 PREWHERE materialize(1) GROUP BY ignore(*); + +detach table data_02051__fuzz_24; +attach table data_02051__fuzz_24; + +SELECT count(ignore(*)) FROM data_02051__fuzz_24 PREWHERE materialize(1) GROUP BY ignore(*); + +DROP TABLE data_02051__fuzz_24; diff --git a/tests/queries/0_stateless/03198_orc_read_time_zone.reference b/tests/queries/0_stateless/03198_orc_read_time_zone.reference new file mode 100644 index 00000000000..809dba44400 --- /dev/null +++ b/tests/queries/0_stateless/03198_orc_read_time_zone.reference @@ -0,0 +1 @@ +1 2024-06-30 20:00:00.000 diff --git a/tests/queries/0_stateless/03198_orc_read_time_zone.sh b/tests/queries/0_stateless/03198_orc_read_time_zone.sh new file mode 100755 index 00000000000..27530c06237 --- /dev/null +++ b/tests/queries/0_stateless/03198_orc_read_time_zone.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists test" +$CLICKHOUSE_CLIENT -q "create table test(id UInt64, t DateTime64) Engine=MergeTree order by id" +$CLICKHOUSE_CLIENT -q "insert into test from infile '$CURDIR/data_orc/test_reader_time_zone.snappy.orc' SETTINGS input_format_orc_read_use_writer_time_zone=true FORMAT ORC" +$CLICKHOUSE_CLIENT -q "select * from test SETTINGS session_timezone='Asia/Shanghai'" +$CLICKHOUSE_CLIENT -q "drop table test" \ No newline at end of file diff --git a/tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.reference b/tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.reference new file mode 100644 index 00000000000..aecacd10e00 --- /dev/null +++ b/tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.reference @@ -0,0 +1,28 @@ +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +a Nullable(Int64) +b Nullable(Int64) +c Nullable(Int64) +2 +a Nullable(Int64) +b Nullable(Int64) +c Nullable(Int64) +a Nullable(String) +b Nullable(Int64) +c Nullable(Int64) +2 +a Nullable(String) +b Nullable(Int64) +c Nullable(Int64) +a Tuple(Nullable(Int64), Nullable(Int64), Nullable(Int64)) +b Nullable(Int64) +c Nullable(Int64) +2 +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +a Nullable(Int64) +b Nullable(Int64) +c Nullable(Int64) +2 diff --git a/tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.sh b/tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.sh new file mode 100755 index 00000000000..ce53f467823 --- /dev/null +++ b/tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +echo -e 'a,b,c\n1,2,3' > $CLICKHOUSE_TEST_UNIQUE_NAME.csv +$CLICKHOUSE_LOCAL -nm -q " +DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_skip_first_lines=1; +DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_skip_first_lines=0; +SELECT count() from system.schema_inference_cache where format = 'CSV' and additional_format_info like '%skip_first_lines%';" + +echo -e 'a,b,c\n"1",2,3' > $CLICKHOUSE_TEST_UNIQUE_NAME.csv +$CLICKHOUSE_LOCAL -nm -q " +DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_try_infer_numbers_from_strings=1; +DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_try_infer_numbers_from_strings=0; +SELECT count() from system.schema_inference_cache where format = 'CSV' and additional_format_info like '%try_infer_numbers_from_strings%';" + +echo -e 'a,b,c\n"(1,2,3)",2,3' > $CLICKHOUSE_TEST_UNIQUE_NAME.csv +$CLICKHOUSE_LOCAL -nm -q " +DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_try_infer_strings_from_quoted_tuples=1; +DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_try_infer_strings_from_quoted_tuples=0; +SELECT count() from system.schema_inference_cache where format = 'CSV' and additional_format_info like '%try_infer_strings_from_quoted_tuples%';" + +echo -e 'a\tb\tc\n1\t2\t3' > $CLICKHOUSE_TEST_UNIQUE_NAME.tsv +$CLICKHOUSE_LOCAL -nm -q " +DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.tsv') SETTINGS input_format_tsv_skip_first_lines=1; +DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.tsv') SETTINGS input_format_tsv_skip_first_lines=0; +SELECT count() from system.schema_inference_cache where format = 'TSV' and additional_format_info like '%skip_first_lines%';" + + +rm $CLICKHOUSE_TEST_UNIQUE_NAME.csv +rm $CLICKHOUSE_TEST_UNIQUE_NAME.tsv + diff --git a/tests/queries/0_stateless/03198_table_function_directory_path.reference b/tests/queries/0_stateless/03198_table_function_directory_path.reference new file mode 100644 index 00000000000..19920de3d3c --- /dev/null +++ b/tests/queries/0_stateless/03198_table_function_directory_path.reference @@ -0,0 +1,3 @@ +2 +2 +1 diff --git a/tests/queries/0_stateless/03198_table_function_directory_path.sql b/tests/queries/0_stateless/03198_table_function_directory_path.sql new file mode 100644 index 00000000000..9e2791847af --- /dev/null +++ b/tests/queries/0_stateless/03198_table_function_directory_path.sql @@ -0,0 +1,13 @@ +-- Tags: no-parallel + +INSERT INTO FUNCTION file('data_03198_table_function_directory_path/1.csv', 'csv') SELECT '1.csv' SETTINGS engine_file_truncate_on_insert=1; +INSERT INTO FUNCTION file('data_03198_table_function_directory_path/2.csv', 'csv') SELECT '2.csv' SETTINGS engine_file_truncate_on_insert=1; +INSERT INTO FUNCTION file('data_03198_table_function_directory_path/dir/3.csv', 'csv') SELECT '3.csv' SETTINGS engine_file_truncate_on_insert=1; +INSERT INTO FUNCTION file('data_03198_table_function_directory_path/dir1/dir/4.csv', 'csv') SELECT '4.csv' SETTINGS engine_file_truncate_on_insert=1; +INSERT INTO FUNCTION file('data_03198_table_function_directory_path/dir2/dir/5.csv', 'csv') SELECT '5.csv' SETTINGS engine_file_truncate_on_insert=1; + +SELECT COUNT(*) FROM file('data_03198_table_function_directory_path'); +SELECT COUNT(*) FROM file('data_03198_table_function_directory_path/'); +SELECT COUNT(*) FROM file('data_03198_table_function_directory_path/dir'); +SELECT COUNT(*) FROM file('data_03198_table_function_directory_path/*/dir', 'csv'); -- { serverError 74, 636 } +SELECT COUNT(*) FROM file('data_03198_table_function_directory_pat'); -- { serverError 400 } diff --git a/tests/queries/0_stateless/03198_unload_primary_key_outdated.reference b/tests/queries/0_stateless/03198_unload_primary_key_outdated.reference new file mode 100644 index 00000000000..28655f938ba --- /dev/null +++ b/tests/queries/0_stateless/03198_unload_primary_key_outdated.reference @@ -0,0 +1,3 @@ +all_1_1_0 1 16 +all_1_1_0 0 0 +all_1_1_0_2 1 16 diff --git a/tests/queries/0_stateless/03198_unload_primary_key_outdated.sh b/tests/queries/0_stateless/03198_unload_primary_key_outdated.sh new file mode 100755 index 00000000000..4f217935123 --- /dev/null +++ b/tests/queries/0_stateless/03198_unload_primary_key_outdated.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -n " + DROP TABLE IF EXISTS t_unload_primary_key; + + CREATE TABLE t_unload_primary_key (a UInt64, b UInt64) + ENGINE = MergeTree ORDER BY a + SETTINGS old_parts_lifetime = 10000; + + INSERT INTO t_unload_primary_key VALUES (1, 1); + + SELECT name, active, primary_key_bytes_in_memory FROM system.parts WHERE database = '$CLICKHOUSE_DATABASE' AND table = 't_unload_primary_key' ORDER BY name; + + ALTER TABLE t_unload_primary_key UPDATE b = 100 WHERE 1 SETTINGS mutations_sync = 2; +" + +for _ in {1..100}; do + res=$($CLICKHOUSE_CLIENT -q "SELECT primary_key_bytes_in_memory FROM system.parts WHERE database = '$CLICKHOUSE_DATABASE' AND table = 't_unload_primary_key' AND name = 'all_1_1_0'") + if [[ $res -eq 0 ]]; then + break + fi + sleep 0.3 +done + +$CLICKHOUSE_CLIENT -n " + SELECT name, active, primary_key_bytes_in_memory FROM system.parts WHERE database = '$CLICKHOUSE_DATABASE' AND table = 't_unload_primary_key' ORDER BY name; + DROP TABLE IF EXISTS t_unload_primary_key; +" diff --git a/tests/queries/0_stateless/03199_fix_auc_tie_handling.reference b/tests/queries/0_stateless/03199_fix_auc_tie_handling.reference new file mode 100644 index 00000000000..f35b39d5972 --- /dev/null +++ b/tests/queries/0_stateless/03199_fix_auc_tie_handling.reference @@ -0,0 +1,3 @@ +nan +0.58333 +0.58333 diff --git a/tests/queries/0_stateless/03199_fix_auc_tie_handling.sql b/tests/queries/0_stateless/03199_fix_auc_tie_handling.sql new file mode 100644 index 00000000000..5de0844f445 --- /dev/null +++ b/tests/queries/0_stateless/03199_fix_auc_tie_handling.sql @@ -0,0 +1,32 @@ +CREATE TABLE labels_unordered +( + idx Int64, + score Float64, + label Int64 +) +ENGINE = MergeTree +PRIMARY KEY idx +ORDER BY idx; + +SELECT floor(arrayAUC(array_concat_agg([score]), array_concat_agg([label])), 5) +FROM labels_unordered; + +INSERT INTO labels_unordered (idx,score,label) VALUES (1,0.1,0), (2,0.35,1), (3,0.4,0), (4,0.8,1), (5,0.8,0); + +SELECT floor(arrayAUC(array_concat_agg([score]), array_concat_agg([label])), 5) +FROM labels_unordered; + +CREATE TABLE labels_ordered +( + idx Int64, + score Float64, + label Int64 +) +ENGINE = MergeTree +PRIMARY KEY idx +ORDER BY idx; + +INSERT INTO labels_ordered (idx,score,label) VALUES (1,0.1,0), (2,0.35,1), (3,0.4,0), (4,0.8,0), (5,0.8,1); + +SELECT floor(arrayAUC(array_concat_agg([score]), array_concat_agg([label])), 5) +FROM labels_ordered; \ No newline at end of file diff --git a/tests/queries/0_stateless/01487_distributed_in_not_default_db.reference b/tests/queries/0_stateless/03199_has_lc_fixed_string.reference similarity index 100% rename from tests/queries/0_stateless/01487_distributed_in_not_default_db.reference rename to tests/queries/0_stateless/03199_has_lc_fixed_string.reference index 0d66ea1aee9..b261da18d51 100644 --- a/tests/queries/0_stateless/01487_distributed_in_not_default_db.reference +++ b/tests/queries/0_stateless/03199_has_lc_fixed_string.reference @@ -1,2 +1,2 @@ -0 1 +0 diff --git a/tests/queries/0_stateless/03199_has_lc_fixed_string.sql b/tests/queries/0_stateless/03199_has_lc_fixed_string.sql new file mode 100644 index 00000000000..3cb551804b7 --- /dev/null +++ b/tests/queries/0_stateless/03199_has_lc_fixed_string.sql @@ -0,0 +1,7 @@ +DROP TABLE IF EXISTS 03199_fixedstring_array; +CREATE TABLE 03199_fixedstring_array (arr Array(LowCardinality(FixedString(8)))) ENGINE = Memory; +INSERT INTO 03199_fixedstring_array VALUES (['a', 'b']), (['c', 'd']); + +SELECT has(arr, toFixedString(materialize('a'), 1)) FROM 03199_fixedstring_array; + +DROP TABLE 03199_fixedstring_array; diff --git a/tests/queries/0_stateless/03199_join_with_materialized_column.reference b/tests/queries/0_stateless/03199_join_with_materialized_column.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03199_join_with_materialized_column.sql b/tests/queries/0_stateless/03199_join_with_materialized_column.sql new file mode 100644 index 00000000000..8c53c5b3e66 --- /dev/null +++ b/tests/queries/0_stateless/03199_join_with_materialized_column.sql @@ -0,0 +1,6 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS table_with_materialized; +CREATE TABLE table_with_materialized (col String MATERIALIZED 'A') ENGINE = Memory; +SELECT number FROM numbers(1) AS n, table_with_materialized; +DROP TABLE table_with_materialized; diff --git a/tests/queries/0_stateless/03199_json_extract_dynamic.reference b/tests/queries/0_stateless/03199_json_extract_dynamic.reference new file mode 100644 index 00000000000..759b7763cd1 --- /dev/null +++ b/tests/queries/0_stateless/03199_json_extract_dynamic.reference @@ -0,0 +1,30 @@ +true Bool +42 Int64 +-42 Int64 +18446744073709551615 UInt64 +42.42 Float64 +42 Int64 +-42 Int64 +18446744073709551615 UInt64 +Hello String +2020-01-01 Date +2020-01-01 00:00:00.000000000 DateTime64(9) +[1,2,3] Array(Nullable(Int64)) +['str1','str2','str3'] Array(Nullable(String)) +[[[1],[2,3,4]],[[5,6],[7]]] Array(Array(Array(Nullable(Int64)))) +['2020-01-01 00:00:00.000000000','2020-01-01 00:00:00.000000000'] Array(Nullable(DateTime64(9))) +['2020-01-01','2020-01-01 date'] Array(Nullable(String)) +['2020-01-01','2020-01-01 00:00:00','str'] Array(Nullable(String)) +['2020-01-01','2020-01-01 00:00:00','42'] Array(Nullable(String)) +['str','42'] Array(Nullable(String)) +[42,42.42] Array(Nullable(Float64)) +[42,18446744073709552000,42.42] Array(Nullable(Float64)) +[42,42.42] Array(Nullable(Float64)) +[NULL,NULL] Array(Nullable(String)) +[NULL,42] Array(Nullable(Int64)) +[[NULL],[],[42]] Array(Array(Nullable(Int64))) +[[],[NULL,NULL],[1,NULL,3],[NULL,2,NULL]] Array(Array(Nullable(Int64))) +[[],[NULL,NULL],['1',NULL,'3'],[NULL,'2',NULL],['2020-01-01']] Array(Array(Nullable(String))) +('str',42,[42]) Tuple(Nullable(String), Nullable(Int64), Array(Nullable(Int64))) +[42,18446744073709551615] Array(Nullable(UInt64)) +(-42,18446744073709551615) Tuple(Nullable(Int64), Nullable(UInt64)) diff --git a/tests/queries/0_stateless/03199_json_extract_dynamic.sql b/tests/queries/0_stateless/03199_json_extract_dynamic.sql new file mode 100644 index 00000000000..286949f4d3e --- /dev/null +++ b/tests/queries/0_stateless/03199_json_extract_dynamic.sql @@ -0,0 +1,37 @@ +set input_format_json_try_infer_numbers_from_strings=1; + +select JSONExtract(materialize('{"d" : true}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : 42}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : -42}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : 18446744073709551615}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : 42.42}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : "42"}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : "-42"}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : "18446744073709551615"}'), 'd', 'Dynamic') as d, dynamicType(d); + +select JSONExtract(materialize('{"d" : "Hello"}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : "2020-01-01"}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : "2020-01-01 00:00:00.000"}'), 'd', 'Dynamic') as d, dynamicType(d); + +select JSONExtract(materialize('{"d" : [1, 2, 3]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : ["str1", "str2", "str3"]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [[[1], [2, 3, 4]], [[5, 6], [7]]]}'), 'd', 'Dynamic') as d, dynamicType(d); + +select JSONExtract(materialize('{"d" : ["2020-01-01", "2020-01-01 00:00:00"]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : ["2020-01-01", "2020-01-01 date"]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : ["2020-01-01", "2020-01-01 00:00:00", "str"]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : ["2020-01-01", "2020-01-01 00:00:00", "42"]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : ["str", "42"]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [42, 42.42]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [42, 18446744073709551615, 42.42]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [42, 42.42]}'), 'd', 'Dynamic') as d, dynamicType(d); + +select JSONExtract(materialize('{"d" : [null, null]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [null, 42]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [[null], [], [42]]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"a" : [[], [null, null], ["1", null, "3"], [null, "2", null]]}'), 'a', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"a" : [[], [null, null], ["1", null, "3"], [null, "2", null], ["2020-01-01"]]}'), 'a', 'Dynamic') as d, dynamicType(d); + +select JSONExtract(materialize('{"d" : ["str", 42, [42]]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [42, 18446744073709551615]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [-42, 18446744073709551615]}'), 'd', 'Dynamic') as d, dynamicType(d); diff --git a/tests/queries/0_stateless/03199_merge_filters_bug.reference b/tests/queries/0_stateless/03199_merge_filters_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03199_merge_filters_bug.sql b/tests/queries/0_stateless/03199_merge_filters_bug.sql new file mode 100644 index 00000000000..ed2ec2ea217 --- /dev/null +++ b/tests/queries/0_stateless/03199_merge_filters_bug.sql @@ -0,0 +1,70 @@ +drop table if exists t1; +drop table if exists t2; + +CREATE TABLE t1 +( + `s1` String, + `s2` String, + `s3` String +) +ENGINE = MergeTree +ORDER BY tuple(); + + +CREATE TABLE t2 +( + `fs1` FixedString(10), + `fs2` FixedString(10) +) +ENGINE = MergeTree +ORDER BY tuple(); + +INSERT INTO t1 SELECT + repeat('t', 15) s1, + 'test' s2, + 'test' s3; + +INSERT INTO t1 SELECT + substring(s1, 1, 10), + s2, + s3 +FROM generateRandom('s1 String, s2 String, s3 String') +LIMIT 10000; + +INSERT INTO t2 SELECT * +FROM generateRandom() +LIMIT 10000; + +WITH +tmp1 AS +( + SELECT + CAST(s1, 'FixedString(10)') AS fs1, + s2 AS sector, + s3 + FROM t1 + WHERE (s3 != 'test') +) + SELECT + fs1 + FROM t2 + LEFT JOIN tmp1 USING (fs1) + WHERE (fs1 IN ('test')) SETTINGS enable_multiple_prewhere_read_steps = 0; + +optimize table t1 final; + +WITH +tmp1 AS +( + SELECT + CAST(s1, 'FixedString(10)') AS fs1, + s2 AS sector, + s3 + FROM t1 + WHERE (s3 != 'test') +) + SELECT + fs1 + FROM t2 + LEFT JOIN tmp1 USING (fs1) + WHERE (fs1 IN ('test')); diff --git a/tests/queries/0_stateless/03199_queries_with_new_analyzer.reference b/tests/queries/0_stateless/03199_queries_with_new_analyzer.reference new file mode 100644 index 00000000000..10ce589000d --- /dev/null +++ b/tests/queries/0_stateless/03199_queries_with_new_analyzer.reference @@ -0,0 +1,27 @@ +5 (4230072075578472911,4230072075578472911) 71789584853496063 +2 (4401188181514187637,4401188181514187637) 878466845199253299 +4 (4940826638032106783,4940826638032106783) 3675164899122807807 +6 (10957420562507184961,10957420562507184961) 3732623117916254211 +0 (797076400500506358,797076400500506358) 3746094338409299772 +7 (10843611042193511775,10843611042193511775) 4607251742847087615 +3 (12588286986351526898,12588286986351526898) 13889114719560662796 +8 (452995860660674674,452995860660674674) 17365664920787500812 +9 (12206106972241516904,12206106972241516904) 17567684527097330880 +1 (14558425114501132193,14558425114501132193) 18445898820068822019 +3 255 255 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 +1 +2 +3 +4 +5 +6 diff --git a/tests/queries/0_stateless/03199_queries_with_new_analyzer.sql b/tests/queries/0_stateless/03199_queries_with_new_analyzer.sql new file mode 100644 index 00000000000..c32d7524492 --- /dev/null +++ b/tests/queries/0_stateless/03199_queries_with_new_analyzer.sql @@ -0,0 +1,41 @@ +SET allow_experimental_analyzer=1; + +SELECT *, ngramMinHash(*) AS minhash, mortonEncode(untuple(ngramMinHash(*))) AS z +FROM (SELECT toString(number) FROM numbers(10)) +ORDER BY z LIMIT 100; + +CREATE TABLE test ( + idx UInt64, + coverage Array(UInt64), + test_name String +) +ENGINE = MergeTree +ORDER BY tuple(); + +INSERT INTO test VALUES (10, [0,1,2,3], 'xx'), (20, [3,4,5,6], 'xxx'), (90, [3,4,5,6,9], 'xxxx'); + +WITH + 4096 AS w, 4096 AS h, w * h AS pixels, + arrayJoin(coverage) AS num, + num DIV (32768 * 32768 DIV pixels) AS idx, + mortonDecode(2, idx) AS coord, + 255 AS b, + least(255, uniq(test_name)) AS r, + 255 * uniq(test_name) / (max(uniq(test_name)) OVER ()) AS g +SELECT r::UInt8, g::UInt8, b::UInt8 +FROM test +GROUP BY coord +ORDER BY coord.2 * w + coord.1 +WITH FILL FROM 0 TO 10; + + +CREATE TABLE seq ( + number UInt64 +) +ENGINE = MergeTree +ORDER BY tuple(); + +INSERT INTO seq VALUES (0), (6), (7); + +WITH (Select min(number), max(number) from seq) as range Select * from numbers(range.1, range.2); + diff --git a/tests/queries/0_stateless/03199_unbin_buffer_overflow.reference b/tests/queries/0_stateless/03199_unbin_buffer_overflow.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03199_unbin_buffer_overflow.sh b/tests/queries/0_stateless/03199_unbin_buffer_overflow.sh new file mode 100755 index 00000000000..337debebb14 --- /dev/null +++ b/tests/queries/0_stateless/03199_unbin_buffer_overflow.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +# check for buffer overflow in unbin (due to not enough memory preallocate for output buffer) +# we iterate over all remainders of input string length modulo word_size and check that no assertions are triggered + +word_size=8 +for i in $(seq 1 $((word_size+1))); do + str=$(printf "%${i}s" | tr ' ' 'x') + $CLICKHOUSE_CLIENT -q "SELECT count() FROM numbers(99) GROUP BY unbin(toFixedString(materialize('$str'), $i)) WITH ROLLUP WITH TOTALS FORMAT NULL" +done + +word_size=8 +for i in $(seq 1 $((word_size+1))); do + str=$(printf "%${i}s" | tr ' ' 'x') + $CLICKHOUSE_CLIENT -q "SELECT count() FROM numbers(99) GROUP BY unbin(materialize('$str')) WITH ROLLUP WITH TOTALS FORMAT NULL" +done + +word_size=2 +for i in $(seq 1 $((word_size+1))); do + str=$(printf "%${i}s" | tr ' ' 'x') + $CLICKHOUSE_CLIENT -q "SELECT count() FROM numbers(99) GROUP BY unhex(toFixedString(materialize('$str'), $i)) WITH ROLLUP WITH TOTALS FORMAT NULL" +done + +word_size=2 +for i in $(seq 1 $((word_size+1))); do + str=$(printf "%${i}s" | tr ' ' 'x') + $CLICKHOUSE_CLIENT -q "SELECT count() FROM numbers(99) GROUP BY unhex(materialize('$str')) WITH ROLLUP WITH TOTALS FORMAT NULL" +done diff --git a/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.reference b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.reference new file mode 100644 index 00000000000..6d2c1334d6e --- /dev/null +++ b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.reference @@ -0,0 +1,10 @@ +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N diff --git a/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql new file mode 100644 index 00000000000..a01a595dbb5 --- /dev/null +++ b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql @@ -0,0 +1,7 @@ +set allow_experimental_dynamic_type=1; +create table test (d Dynamic) engine=Memory; +insert into table test select * from numbers(5); +alter table test modify column d Dynamic(max_types=1); +select d.UInt64 from test settings allow_experimental_analyzer=1; +select d.UInt64 from test settings allow_experimental_analyzer=0; + diff --git a/tests/queries/0_stateless/00967_ubsan_bit_test.reference b/tests/queries/0_stateless/03200_subcolumns_join_use_nulls.reference similarity index 100% rename from tests/queries/0_stateless/00967_ubsan_bit_test.reference rename to tests/queries/0_stateless/03200_subcolumns_join_use_nulls.reference diff --git a/tests/queries/0_stateless/03200_subcolumns_join_use_nulls.sql b/tests/queries/0_stateless/03200_subcolumns_join_use_nulls.sql new file mode 100644 index 00000000000..2dd0a37657d --- /dev/null +++ b/tests/queries/0_stateless/03200_subcolumns_join_use_nulls.sql @@ -0,0 +1,13 @@ +DROP TABLE IF EXISTS t_subcolumns_join; + +CREATE TABLE t_subcolumns_join (id UInt64) ENGINE=MergeTree ORDER BY tuple(); + +INSERT INTO t_subcolumns_join SELECT number as number FROM numbers(10000); + +SELECT + count() +FROM (SELECT number FROM numbers(10)) as tbl LEFT JOIN t_subcolumns_join ON number = id +WHERE id is null +SETTINGS allow_experimental_analyzer = 1, optimize_functions_to_subcolumns = 1, join_use_nulls = 1; + +DROP TABLE t_subcolumns_join; diff --git a/tests/queries/0_stateless/03201_avro_negative_block_size_arrays.reference b/tests/queries/0_stateless/03201_avro_negative_block_size_arrays.reference new file mode 100644 index 00000000000..912bff45da5 --- /dev/null +++ b/tests/queries/0_stateless/03201_avro_negative_block_size_arrays.reference @@ -0,0 +1,11 @@ +str_array Array(String) +1318 +5779 +1715 +6422 +5875 +1887 +3763 +4245 +4270 +758 diff --git a/tests/queries/0_stateless/03201_avro_negative_block_size_arrays.sh b/tests/queries/0_stateless/03201_avro_negative_block_size_arrays.sh new file mode 100755 index 00000000000..dcecd7b3bea --- /dev/null +++ b/tests/queries/0_stateless/03201_avro_negative_block_size_arrays.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +# Tags: no-parallel, no-fasttest + +set -e + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +DATA_DIR=$CUR_DIR/data_avro + +# See https://github.com/ClickHouse/ClickHouse/issues/60438 +$CLICKHOUSE_LOCAL -q "DESC file('$DATA_DIR/negative_block_size_arrays.avro')" +$CLICKHOUSE_LOCAL -q "SELECT arraySum(arrayMap(x -> length(x), str_array)) AS res FROM file('$DATA_DIR/negative_block_size_arrays.avro')" diff --git a/tests/queries/0_stateless/03201_local_named_collections.reference b/tests/queries/0_stateless/03201_local_named_collections.reference new file mode 100644 index 00000000000..af5626b4a11 --- /dev/null +++ b/tests/queries/0_stateless/03201_local_named_collections.reference @@ -0,0 +1 @@ +Hello, world! diff --git a/tests/queries/0_stateless/03201_local_named_collections.sh b/tests/queries/0_stateless/03201_local_named_collections.sh new file mode 100755 index 00000000000..809b4b52f41 --- /dev/null +++ b/tests/queries/0_stateless/03201_local_named_collections.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " +DROP TABLE IF EXISTS test; +CREATE TABLE test (s String) ORDER BY (); +INSERT INTO test VALUES ('Hello, world!'); +" + +${CLICKHOUSE_LOCAL} --multiquery " +CREATE NAMED COLLECTION mydb AS host = '${CLICKHOUSE_HOST}', port = ${CLICKHOUSE_PORT_TCP}, user = 'default', password = '', db = '${CLICKHOUSE_DATABASE}'; +SELECT * FROM remote(mydb, table = 'test'); +" 2>&1 | grep --text -F -v "ASan doesn't fully support makecontext/swapcontext functions" + +${CLICKHOUSE_CLIENT} --multiquery " +DROP TABLE test; +" diff --git a/tests/queries/0_stateless/03201_sumIf_to_countIf_return_type.reference b/tests/queries/0_stateless/03201_sumIf_to_countIf_return_type.reference new file mode 100644 index 00000000000..62f5eb45106 --- /dev/null +++ b/tests/queries/0_stateless/03201_sumIf_to_countIf_return_type.reference @@ -0,0 +1,24 @@ +QUERY id: 0 + PROJECTION COLUMNS + (sumIf(toInt64(1), 1)) Tuple(Int64) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: tuple, function_type: ordinary, result_type: Tuple(Int64) + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: sumIf, function_type: aggregate, result_type: Int64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: Int64_1, constant_value_type: Int64 + EXPRESSION + FUNCTION id: 7, function_name: toInt64, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 8, nodes: 1 + CONSTANT id: 9, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + JOIN TREE + TABLE_FUNCTION id: 11, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 12, nodes: 1 + CONSTANT id: 13, constant_value: UInt64_100, constant_value_type: UInt8 + SETTINGS optimize_rewrite_sum_if_to_count_if=1 diff --git a/tests/queries/0_stateless/03201_sumIf_to_countIf_return_type.sql b/tests/queries/0_stateless/03201_sumIf_to_countIf_return_type.sql new file mode 100644 index 00000000000..24369fd6497 --- /dev/null +++ b/tests/queries/0_stateless/03201_sumIf_to_countIf_return_type.sql @@ -0,0 +1,2 @@ +SET allow_experimental_analyzer = 1; +EXPLAIN QUERY TREE SELECT tuple(sumIf(toInt64(1), 1)) FROM numbers(100) settings optimize_rewrite_sum_if_to_count_if=1; diff --git a/tests/queries/0_stateless/03201_variant_null_map_subcolumn.reference b/tests/queries/0_stateless/03201_variant_null_map_subcolumn.reference new file mode 100644 index 00000000000..8565fe3d0fa --- /dev/null +++ b/tests/queries/0_stateless/03201_variant_null_map_subcolumn.reference @@ -0,0 +1,402 @@ +Memory +test +[] 1 0 0 [] +1 0 1 0 [] +\N 1 1 0 [] +['str_3','str_3','str_3'] 1 0 3 [1,1,1] +4 0 1 0 [] +\N 1 1 0 [] +[6,6,6,6,6,6] 1 0 6 [0,0,0,0,0,0] +7 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 9 [1,1,1,1,1,1,1,1,1] +10 0 1 0 [] +\N 1 1 0 [] +['str_12','str_12'] 1 0 2 [1,1] +13 0 1 0 [] +\N 1 1 0 [] +[15,15,15,15,15] 1 0 5 [0,0,0,0,0] +16 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 8 [1,1,1,1,1,1,1,1] +19 0 1 0 [] +\N 1 1 0 [] +['str_21'] 1 0 1 [1] +22 0 1 0 [] +\N 1 1 0 [] +[24,24,24,24] 1 0 4 [0,0,0,0] +25 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 7 [1,1,1,1,1,1,1] +28 0 1 0 [] +\N 1 1 0 [] +[] 1 0 0 [] +31 0 1 0 [] +\N 1 1 0 [] +[33,33,33] 1 0 3 [0,0,0] +34 0 1 0 [] +\N 1 1 0 [] +1 0 0 [] +0 1 0 [] +1 1 0 [] +1 0 3 [1,1,1] +0 1 0 [] +1 1 0 [] +1 0 6 [0,0,0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 9 [1,1,1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 2 [1,1] +0 1 0 [] +1 1 0 [] +1 0 5 [0,0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 8 [1,1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 1 [1] +0 1 0 [] +1 1 0 [] +1 0 4 [0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 7 [1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 0 [] +0 1 0 [] +1 1 0 [] +1 0 3 [0,0,0] +0 1 0 [] +1 1 0 [] +0 0 [] [] +1 0 [] [] +1 0 [] [] +0 3 [1,1,1] [0,0,0] +1 0 [] [] +1 0 [] [] +0 6 [0,0,0,0,0,0] [1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 9 [1,1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 2 [1,1] [0,0] +1 0 [] [] +1 0 [] [] +0 5 [0,0,0,0,0] [1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 8 [1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 1 [1] [0] +1 0 [] [] +1 0 [] [] +0 4 [0,0,0,0] [1,1,1,1] +1 0 [] [] +1 0 [] [] +0 7 [1,1,1,1,1,1,1] [1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 0 [] [] +1 0 [] [] +1 0 [] [] +0 3 [0,0,0] [1,1,1] +1 0 [] [] +1 0 [] [] +0 +2 +3 +5 +6 +8 +9 +11 +12 +14 +15 +17 +18 +20 +21 +23 +24 +26 +27 +29 +30 +32 +33 +35 +MergeTree compact +test +[] 1 0 0 [] +1 0 1 0 [] +\N 1 1 0 [] +['str_3','str_3','str_3'] 1 0 3 [1,1,1] +4 0 1 0 [] +\N 1 1 0 [] +[6,6,6,6,6,6] 1 0 6 [0,0,0,0,0,0] +7 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 9 [1,1,1,1,1,1,1,1,1] +10 0 1 0 [] +\N 1 1 0 [] +['str_12','str_12'] 1 0 2 [1,1] +13 0 1 0 [] +\N 1 1 0 [] +[15,15,15,15,15] 1 0 5 [0,0,0,0,0] +16 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 8 [1,1,1,1,1,1,1,1] +19 0 1 0 [] +\N 1 1 0 [] +['str_21'] 1 0 1 [1] +22 0 1 0 [] +\N 1 1 0 [] +[24,24,24,24] 1 0 4 [0,0,0,0] +25 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 7 [1,1,1,1,1,1,1] +28 0 1 0 [] +\N 1 1 0 [] +[] 1 0 0 [] +31 0 1 0 [] +\N 1 1 0 [] +[33,33,33] 1 0 3 [0,0,0] +34 0 1 0 [] +\N 1 1 0 [] +1 0 0 [] +0 1 0 [] +1 1 0 [] +1 0 3 [1,1,1] +0 1 0 [] +1 1 0 [] +1 0 6 [0,0,0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 9 [1,1,1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 2 [1,1] +0 1 0 [] +1 1 0 [] +1 0 5 [0,0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 8 [1,1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 1 [1] +0 1 0 [] +1 1 0 [] +1 0 4 [0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 7 [1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 0 [] +0 1 0 [] +1 1 0 [] +1 0 3 [0,0,0] +0 1 0 [] +1 1 0 [] +0 0 [] [] +1 0 [] [] +1 0 [] [] +0 3 [1,1,1] [0,0,0] +1 0 [] [] +1 0 [] [] +0 6 [0,0,0,0,0,0] [1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 9 [1,1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 2 [1,1] [0,0] +1 0 [] [] +1 0 [] [] +0 5 [0,0,0,0,0] [1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 8 [1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 1 [1] [0] +1 0 [] [] +1 0 [] [] +0 4 [0,0,0,0] [1,1,1,1] +1 0 [] [] +1 0 [] [] +0 7 [1,1,1,1,1,1,1] [1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 0 [] [] +1 0 [] [] +1 0 [] [] +0 3 [0,0,0] [1,1,1] +1 0 [] [] +1 0 [] [] +0 +2 +3 +5 +6 +8 +9 +11 +12 +14 +15 +17 +18 +20 +21 +23 +24 +26 +27 +29 +30 +32 +33 +35 +MergeTree wide +test +[] 1 0 0 [] +1 0 1 0 [] +\N 1 1 0 [] +['str_3','str_3','str_3'] 1 0 3 [1,1,1] +4 0 1 0 [] +\N 1 1 0 [] +[6,6,6,6,6,6] 1 0 6 [0,0,0,0,0,0] +7 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 9 [1,1,1,1,1,1,1,1,1] +10 0 1 0 [] +\N 1 1 0 [] +['str_12','str_12'] 1 0 2 [1,1] +13 0 1 0 [] +\N 1 1 0 [] +[15,15,15,15,15] 1 0 5 [0,0,0,0,0] +16 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 8 [1,1,1,1,1,1,1,1] +19 0 1 0 [] +\N 1 1 0 [] +['str_21'] 1 0 1 [1] +22 0 1 0 [] +\N 1 1 0 [] +[24,24,24,24] 1 0 4 [0,0,0,0] +25 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 7 [1,1,1,1,1,1,1] +28 0 1 0 [] +\N 1 1 0 [] +[] 1 0 0 [] +31 0 1 0 [] +\N 1 1 0 [] +[33,33,33] 1 0 3 [0,0,0] +34 0 1 0 [] +\N 1 1 0 [] +1 0 0 [] +0 1 0 [] +1 1 0 [] +1 0 3 [1,1,1] +0 1 0 [] +1 1 0 [] +1 0 6 [0,0,0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 9 [1,1,1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 2 [1,1] +0 1 0 [] +1 1 0 [] +1 0 5 [0,0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 8 [1,1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 1 [1] +0 1 0 [] +1 1 0 [] +1 0 4 [0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 7 [1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 0 [] +0 1 0 [] +1 1 0 [] +1 0 3 [0,0,0] +0 1 0 [] +1 1 0 [] +0 0 [] [] +1 0 [] [] +1 0 [] [] +0 3 [1,1,1] [0,0,0] +1 0 [] [] +1 0 [] [] +0 6 [0,0,0,0,0,0] [1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 9 [1,1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 2 [1,1] [0,0] +1 0 [] [] +1 0 [] [] +0 5 [0,0,0,0,0] [1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 8 [1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 1 [1] [0] +1 0 [] [] +1 0 [] [] +0 4 [0,0,0,0] [1,1,1,1] +1 0 [] [] +1 0 [] [] +0 7 [1,1,1,1,1,1,1] [1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 0 [] [] +1 0 [] [] +1 0 [] [] +0 3 [0,0,0] [1,1,1] +1 0 [] [] +1 0 [] [] +0 +2 +3 +5 +6 +8 +9 +11 +12 +14 +15 +17 +18 +20 +21 +23 +24 +26 +27 +29 +30 +32 +33 +35 diff --git a/tests/queries/0_stateless/03201_variant_null_map_subcolumn.sh b/tests/queries/0_stateless/03201_variant_null_map_subcolumn.sh new file mode 100755 index 00000000000..8231691e184 --- /dev/null +++ b/tests/queries/0_stateless/03201_variant_null_map_subcolumn.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_suspicious_variant_types=1" + +function test() +{ + echo "test" + $CH_CLIENT -q "insert into test select number, multiIf(number % 3 == 2, NULL, number % 3 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10))) from numbers(36)" + $CH_CLIENT -q "select v, v.UInt64.null, v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id" + $CH_CLIENT -q "select v.UInt64.null, v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id" + $CH_CLIENT -q "select v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null, v.\`Array(Variant(String, UInt64))\`.String.null from test order by id" + $CH_CLIENT -q "select id from test where v.UInt64 is null order by id" + + $CH_CLIENT -q "insert into test select number, multiIf(number % 3 == 2, NULL, number % 3 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10))) from numbers(1000000) settings min_insert_block_size_rows=100000" + $CH_CLIENT -q "select v, v.UInt64.null, v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id format Null" + $CH_CLIENT -q "select v.UInt64.null, v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id format Null" + $CH_CLIENT -q "select v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null, v.\`Array(Variant(String, UInt64))\`.String.null from test order by id format Null" + $CH_CLIENT -q "select id from test where v.UInt64 is null order by id format Null" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, Array(Variant(String, UInt64)))) engine=Memory" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, Array(Variant(String, UInt64)))) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, Array(Variant(String, UInt64)))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +test +$CH_CLIENT -q "drop table test;" + diff --git a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.reference b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.reference new file mode 100644 index 00000000000..8740726c7ef --- /dev/null +++ b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.reference @@ -0,0 +1,57 @@ +Memory +test +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +20 +20 +20 +20 +0 +0 +20 +20 +10 +10 +20 +0 +MergeTree compact +test +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +20 +20 +20 +20 +0 +0 +20 +20 +10 +10 +20 +0 +MergeTree wide +test +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +20 +20 +20 +20 +0 +0 +20 +20 +10 +10 +20 +0 diff --git a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh new file mode 100755 index 00000000000..aa06e48376c --- /dev/null +++ b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" + + +function test() +{ + echo "test" + $CH_CLIENT -q "insert into test select number, number from numbers(10) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(10, 10) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(20, 10) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, NULL from numbers(30, 10) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(40, 40) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)) from numbers(10, 10) settings min_insert_block_size_rows=50000" + + $CH_CLIENT -q "select distinct dynamicType(d) as type from test order by type" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'UInt64'" + $CH_CLIENT -q "select count() from test where d.UInt64 is not NULL" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'String'" + $CH_CLIENT -q "select count() from test where d.String is not NULL" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'Date'" + $CH_CLIENT -q "select count() from test where d.Date is not NULL" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))'" + $CH_CLIENT -q "select count() from test where not empty(d.\`Array(Variant(String, UInt64))\`)" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'Array(Array(Dynamic))'" + $CH_CLIENT -q "select count() from test where not empty(d.\`Array(Array(Dynamic))\`)" + $CH_CLIENT -q "select count() from test where d is NULL" + $CH_CLIENT -q "select count() from test where not empty(d.\`Tuple(a Array(Dynamic))\`.a.String)" + + $CH_CLIENT -q "select d, d.UInt64.null, d.String.null, d.\`Array(Variant(String, UInt64))\`.null from test format Null" + $CH_CLIENT -q "select d.UInt64.null, d.String.null, d.\`Array(Variant(String, UInt64))\`.null from test format Null" + $CH_CLIENT -q "select d.Int8.null, d.Date.null, d.\`Array(String)\`.null from test format Null" + $CH_CLIENT -q "select d, d.UInt64.null, d.Date.null, d.\`Array(Variant(String, UInt64))\`.null, d.\`Array(Variant(String, UInt64))\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64.null from test format Null" + $CH_CLIENT -q "select d.UInt64.null, d.Date.null, d.\`Array(Variant(String, UInt64))\`.null, d.\`Array(Variant(String, UInt64))\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64.null, d.\`Array(Variant(String, UInt64))\`.String.null from test format Null" + $CH_CLIENT -q "select d, d.\`Tuple(a UInt64, b String)\`.a, d.\`Array(Dynamic)\`.\`Variant(String, UInt64)\`.UInt64.null, d.\`Array(Variant(String, UInt64))\`.UInt64.null from test format Null" + $CH_CLIENT -q "select d.\`Array(Dynamic)\`.\`Variant(String, UInt64)\`.UInt64.null, d.\`Array(Dynamic)\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64.null from test format Null" + $CH_CLIENT -q "select d.\`Array(Array(Dynamic))\`.size1, d.\`Array(Array(Dynamic))\`.UInt64.null, d.\`Array(Array(Dynamic))\`.\`Map(String, Tuple(a UInt64))\`.values.a from test format Null" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=Memory" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +test +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03203_grpc_protocol.reference b/tests/queries/0_stateless/03203_grpc_protocol.reference new file mode 100644 index 00000000000..9766475a418 --- /dev/null +++ b/tests/queries/0_stateless/03203_grpc_protocol.reference @@ -0,0 +1 @@ +ok diff --git a/tests/queries/0_stateless/03203_grpc_protocol.sh b/tests/queries/0_stateless/03203_grpc_protocol.sh new file mode 100755 index 00000000000..d51d6382f67 --- /dev/null +++ b/tests/queries/0_stateless/03203_grpc_protocol.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# Tag no-fasttest: In fasttest, ENABLE_LIBRARIES=0, so the grpc library is not built + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +if [[ -z "$CLICKHOUSE_GRPC_CLIENT" ]]; then + CLICKHOUSE_GRPC_CLIENT="$CURDIR/../../../utils/grpc-client/clickhouse-grpc-client.py" +fi + +# Simple test. +$CLICKHOUSE_GRPC_CLIENT --query "SELECT 'ok'" diff --git a/tests/queries/0_stateless/03203_multiif_and_where_2_conditions_old_analyzer_bug.reference b/tests/queries/0_stateless/03203_multiif_and_where_2_conditions_old_analyzer_bug.reference new file mode 100644 index 00000000000..5512e59cc73 --- /dev/null +++ b/tests/queries/0_stateless/03203_multiif_and_where_2_conditions_old_analyzer_bug.reference @@ -0,0 +1,6 @@ +this query used to be broken in old analyser: +c1 c2 yes true +this query worked: +yes true +experimental analyzer: +c1 c2 yes true diff --git a/tests/queries/0_stateless/03203_multiif_and_where_2_conditions_old_analyzer_bug.sql b/tests/queries/0_stateless/03203_multiif_and_where_2_conditions_old_analyzer_bug.sql new file mode 100644 index 00000000000..c97333f7df4 --- /dev/null +++ b/tests/queries/0_stateless/03203_multiif_and_where_2_conditions_old_analyzer_bug.sql @@ -0,0 +1,35 @@ +DROP TABLE IF EXISTS bugcheck1; + +CREATE TABLE bugcheck1 +ENGINE = MergeTree +ORDER BY tuple() +AS SELECT + 'c1' as column_a, + 'c2' as column_b; + +select 'this query used to be broken in old analyser:'; +SELECT *, + multiIf(column_b IN (SELECT 'c2' as someproduct), 'yes', 'no') AS condition_1, + multiIf(column_b = 'c2', 'true', 'false') AS condition_2 +FROM (SELECT column_a, column_b FROM bugcheck1) +WHERE (condition_1 IN ('yes')) AND (condition_2 in ('true')) +settings allow_experimental_analyzer=0; + +select 'this query worked:'; + +SELECT + multiIf(column_b IN (SELECT 'c2' as someproduct), 'yes', 'no') AS condition_1, + multiIf(column_b = 'c2', 'true', 'false') AS condition_2 +FROM (SELECT column_a, column_b FROM bugcheck1) +WHERE (condition_1 IN ('yes')) AND (condition_2 in ('true')) +settings allow_experimental_analyzer=0; + +select 'experimental analyzer:'; +SELECT *, + multiIf(column_b IN (SELECT 'c2' as someproduct), 'yes', 'no') AS condition_1, + multiIf(column_b = 'c2', 'true', 'false') AS condition_2 +FROM (SELECT column_a, column_b FROM bugcheck1) +WHERE (condition_1 IN ('yes')) AND (condition_2 in ('true')) +settings allow_experimental_analyzer=1; + +DROP TABLE bugcheck1; diff --git a/tests/queries/0_stateless/03204_distributed_with_scalar_subquery.reference b/tests/queries/0_stateless/03204_distributed_with_scalar_subquery.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03204_distributed_with_scalar_subquery.sql b/tests/queries/0_stateless/03204_distributed_with_scalar_subquery.sql new file mode 100644 index 00000000000..0a07ce48268 --- /dev/null +++ b/tests/queries/0_stateless/03204_distributed_with_scalar_subquery.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS t_c3oollc8r; +CREATE TABLE t_c3oollc8r (c_k37 Int32, c_y String, c_bou Int32, c_g1 Int32, c_lfntfzg Int32, c_kntw50q Int32) ENGINE = MergeTree ORDER BY (); + +SELECT ( + SELECT c_k37 + FROM t_c3oollc8r + ) > c_lfntfzg +FROM remote('127.0.0.{1,2}', currentDatabase(), t_c3oollc8r); + +DROP TABLE t_c3oollc8r; diff --git a/tests/queries/0_stateless/data_avro/negative_block_size_arrays.avro b/tests/queries/0_stateless/data_avro/negative_block_size_arrays.avro new file mode 100644 index 00000000000..ec785a885dc Binary files /dev/null and b/tests/queries/0_stateless/data_avro/negative_block_size_arrays.avro differ diff --git a/tests/queries/0_stateless/data_orc/test_reader_time_zone.snappy.orc b/tests/queries/0_stateless/data_orc/test_reader_time_zone.snappy.orc new file mode 100644 index 00000000000..ab1b785dbbf Binary files /dev/null and b/tests/queries/0_stateless/data_orc/test_reader_time_zone.snappy.orc differ diff --git a/tests/queries/1_stateful/00166_explain_estimate.reference b/tests/queries/1_stateful/00166_explain_estimate.reference index 71ddd681581..85ecd0b9a71 100644 --- a/tests/queries/1_stateful/00166_explain_estimate.reference +++ b/tests/queries/1_stateful/00166_explain_estimate.reference @@ -1,5 +1,5 @@ test hits 1 57344 7 -test hits 1 8839168 1079 -test hits 1 835584 102 +test hits 1 8832938 1079 +test hits 1 829354 102 test hits 1 8003584 977 test hits 2 581632 71 diff --git a/tests/queries/shell_config.sh b/tests/queries/shell_config.sh index 614bfcece8f..ef2d89f0218 100644 --- a/tests/queries/shell_config.sh +++ b/tests/queries/shell_config.sh @@ -54,9 +54,17 @@ export CLICKHOUSE_OBFUSCATOR=${CLICKHOUSE_OBFUSCATOR:="${CLICKHOUSE_BINARY}-obfu export CLICKHOUSE_COMPRESSOR=${CLICKHOUSE_COMPRESSOR:="${CLICKHOUSE_BINARY}-compressor"} export CLICKHOUSE_GIT_IMPORT=${CLICKHOUSE_GIT_IMPORT="${CLICKHOUSE_BINARY}-git-import"} +export CLICKHOUSE_CONFIG_DIR=${CLICKHOUSE_CONFIG_DIR:="/etc/clickhouse-server"} export CLICKHOUSE_CONFIG=${CLICKHOUSE_CONFIG:="/etc/clickhouse-server/config.xml"} export CLICKHOUSE_CONFIG_CLIENT=${CLICKHOUSE_CONFIG_CLIENT:="/etc/clickhouse-client/config.xml"} +export CLICKHOUSE_USER_FILES=${CLICKHOUSE_USER_FILES:="/var/lib/clickhouse/user_files"} +export CLICKHOUSE_USER_FILES_UNIQUE=${CLICKHOUSE_USER_FILES_UNIQUE:="${CLICKHOUSE_USER_FILES}/${CLICKHOUSE_TEST_UNIQUE_NAME}"} +# synonym +export USER_FILES_PATH=$CLICKHOUSE_USER_FILES + +export CLICKHOUSE_SCHEMA_FILES=${CLICKHOUSE_SCHEMA_FILES:="/var/lib/clickhouse/format_schemas"} + [ -x "${CLICKHOUSE_BINARY}-extract-from-config" ] && CLICKHOUSE_EXTRACT_CONFIG=${CLICKHOUSE_EXTRACT_CONFIG:="$CLICKHOUSE_BINARY-extract-from-config --config=$CLICKHOUSE_CONFIG"} [ -x "${CLICKHOUSE_BINARY}" ] && CLICKHOUSE_EXTRACT_CONFIG=${CLICKHOUSE_EXTRACT_CONFIG:="$CLICKHOUSE_BINARY extract-from-config --config=$CLICKHOUSE_CONFIG"} export CLICKHOUSE_EXTRACT_CONFIG=${CLICKHOUSE_EXTRACT_CONFIG:="$CLICKHOUSE_BINARY-extract-from-config --config=$CLICKHOUSE_CONFIG"} diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 229eccefa48..7de065cc589 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1,4 +1,4 @@ -personal_ws-1.1 en 2758 +personal_ws-1.1 en 2942 AArch ACLs ALTERs @@ -48,7 +48,6 @@ AutoML Autocompletion AvroConfluent BIGINT -bigrams BIGSERIAL BORO BSON @@ -223,7 +222,6 @@ DatabaseOrdinaryThreadsActive DateTime DateTimes DbCL -deallocated Decrypted Deduplicate Deduplication @@ -295,7 +293,6 @@ FilesystemMainPathUsedBytes FilesystemMainPathUsedINodes FixedString FlameGraph -flameGraph Flink ForEach FreeBSD @@ -452,7 +449,7 @@ Kahan Kaser KeeperAliveConnections KeeperMap -KeeperOutstandingRequets +KeeperOutstandingRequests Kerberos Khanna KittenHouse @@ -470,7 +467,7 @@ LOCALTIME LOCALTIMESTAMP LONGLONG LOONGARCH -Lemir +Lemire Levenshtein Liao LibFuzzer @@ -995,6 +992,8 @@ UPDATEs URIs URL URL's +URLDecode +URLEncode URLHash URLHierarchy URLPathHierarchy @@ -1012,13 +1011,10 @@ UncompressedCacheBytes UncompressedCacheCells UnidirectionalEdgeIsValid UniqThetaSketch -unigrams Updatable Uppercased Uptime Uptrace -URLDecode -URLEncode UserID Util VARCHAR @@ -1102,6 +1098,8 @@ aggregatefunction aggregatingmergetree aggregatio aggretate +aggthrow +aggThrow aiochclient allocator alphaTokens @@ -1224,6 +1222,7 @@ basename bcrypt benchmarking bfloat +bigrams binlog bitAnd bitCount @@ -1320,6 +1319,12 @@ cfg cgroup cgroups chadmin +changeDay +changeHour +changeMinute +changeMonth +changeSecond +changeYear changelog changelogs charset @@ -1473,6 +1478,7 @@ dbeaver dbgen dbms ddl +deallocated deallocation deallocations debian @@ -1512,11 +1518,11 @@ deserializing destructor destructors detectCharset -detectTonality detectLanguage detectLanguageMixed detectLanguageUnknown detectProgrammingLanguage +detectTonality determinator deterministically dictGet @@ -1532,8 +1538,8 @@ dictIsIn disableProtocols disjunction disjunctions -displaySecretsInShowAndSelect displayName +displaySecretsInShowAndSelect distro divideDecimal dmesg @@ -1583,11 +1589,11 @@ evalMLMethod exFAT expiryMsec exponentialMovingAverage -exponentialmovingaverage exponentialTimeDecayedAvg exponentialTimeDecayedCount exponentialTimeDecayedMax exponentialTimeDecayedSum +exponentialmovingaverage expr exprN extendedVerification @@ -1624,6 +1630,7 @@ firstSignificantSubdomainCustom firstSignificantSubdomainCustomRFC firstSignificantSubdomainRFC fixedstring +flameGraph flamegraph flatbuffers flattenTuple @@ -1657,6 +1664,7 @@ fsync func fuzzBits fuzzJSON +fuzzQuery fuzzer fuzzers gRPC @@ -1806,8 +1814,8 @@ incrementing indexHint indexOf infi -infty inflight +infty initcap initcapUTF initialQueryID @@ -1898,11 +1906,13 @@ kurtosis kurtpop kurtsamp laion +lagInFrame lang laravel largestTriangleThreeBuckets latencies ldap +leadInFrame leftPad leftPadUTF leftUTF @@ -1955,9 +1965,9 @@ loghouse london lookups loongarch -lowcardinality lowCardinalityIndices lowCardinalityKeys +lowcardinality lowerUTF lowercased lttb @@ -1980,6 +1990,8 @@ mapExtractKeyLike mapFilter mapFromArrays mapKeys +mapPartialReverseSort +mapPartialSort mapPopulateSeries mapReverseSort mapSort @@ -1996,6 +2008,7 @@ maxMap maxintersections maxintersectionsposition maxmap +minMappedArrays maxmind mdadm meanZTest @@ -2013,6 +2026,7 @@ metrica metroHash mfedotov minMap +minMappedArrays minSampleSizeContinuous minSampleSizeConversion mindsdb @@ -2120,8 +2134,10 @@ noaa nonNegativeDerivative noop normalizeQuery +normalizeQueryKeepNames normalizeUTF normalizedQueryHash +normalizedQueryHashKeepNames notEmpty notEquals notILike @@ -2184,6 +2200,8 @@ parseReadableSizeOrZero parseTimeDelta parseable parsers +partitionId +partitionID pathFull pclmulqdq pcre @@ -2265,9 +2283,9 @@ proleptic prometheus proportionsZTest proto -protocol protobuf protobufsingle +protocol proxied pseudorandom pseudorandomize @@ -2759,6 +2777,9 @@ topLevelDomain topLevelDomainRFC topk topkweighted +transactionID +transactionLatestSnapshot +transactionOldestSnapshot transactional transactionally translateUTF @@ -2792,6 +2813,7 @@ tupleModulo tupleModuloByNumber tupleMultiply tupleMultiplyByNumber +tupleNames tupleNegate tuplePlus tupleToNameValuePairs @@ -2812,6 +2834,7 @@ unescaping unhex unicode unidimensional +unigrams unintuitive uniq uniqCombined diff --git a/utils/grpc-client/generate_pb2.py b/utils/grpc-client/generate_pb2.py new file mode 100755 index 00000000000..95a39023ed7 --- /dev/null +++ b/utils/grpc-client/generate_pb2.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 + +# This is a helper utility. +# It generates files in the "pb2" folder using the protocol buffer compiler. +# This script must be called manually after any change pf "clickhouse_grpc.proto" + +import grpc_tools # pip3 install grpcio-tools + +import os, shutil, subprocess + + +# Settings. +script_path = os.path.realpath(__file__) +script_name = os.path.basename(script_path) +script_dir = os.path.dirname(script_path) +root_dir = os.path.abspath(os.path.join(script_dir, "../..")) + +grpc_proto_dir = os.path.abspath(os.path.join(root_dir, "src/Server/grpc_protos")) +grpc_proto_filename = "clickhouse_grpc.proto" + +# Files in the "pb2" folder which will be generated by this script. +pb2_filenames = ["clickhouse_grpc_pb2.py", "clickhouse_grpc_pb2_grpc.py"] +pb2_dir = os.path.join(script_dir, "pb2") + + +# Processes the protobuf schema with the protocol buffer compiler and generates the "pb2" folder. +def generate_pb2(): + print(f"Generating files:") + for pb2_filename in pb2_filenames: + print(os.path.join(pb2_dir, pb2_filename)) + + os.makedirs(pb2_dir, exist_ok=True) + + cmd = [ + "python3", + "-m", + "grpc_tools.protoc", + "-I" + grpc_proto_dir, + "--python_out=" + pb2_dir, + "--grpc_python_out=" + pb2_dir, + os.path.join(grpc_proto_dir, grpc_proto_filename), + ] + subprocess.run(cmd) + + for pb2_filename in pb2_filenames: + assert os.path.exists(os.path.join(pb2_dir, pb2_filename)) + print("Done! (generate_pb2)") + + +# MAIN +if __name__ == "__main__": + generate_pb2() diff --git a/utils/grpc-client/pb2/generate.py b/utils/grpc-client/pb2/generate.py deleted file mode 100755 index 2f4b3bf5af7..00000000000 --- a/utils/grpc-client/pb2/generate.py +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env python3 - -import grpc_tools # pip3 install grpcio-tools - -import os -import subprocess - - -script_dir = os.path.dirname(os.path.realpath(__file__)) -dest_dir = script_dir -src_dir = os.path.abspath(os.path.join(script_dir, "../../../src/Server/grpc_protos")) -src_filename = "clickhouse_grpc.proto" - - -def generate(): - cmd = [ - "python3", - "-m", - "grpc_tools.protoc", - "-I" + src_dir, - "--python_out=" + dest_dir, - "--grpc_python_out=" + dest_dir, - os.path.join(src_dir, src_filename), - ] - subprocess.run(cmd) - - -if __name__ == "__main__": - generate() diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp index 5ae4c7a0b1c..587e015b340 100644 --- a/utils/keeper-bench/Runner.cpp +++ b/utils/keeper-bench/Runner.cpp @@ -1311,9 +1311,9 @@ void removeRecursive(Coordination::ZooKeeper & zookeeper, const std::string & pa while (!children_span.empty()) { Coordination::Requests ops; - for (size_t i = 0; i < 1000 && !children.empty(); ++i) + for (size_t i = 0; i < 1000 && !children_span.empty(); ++i) { - removeRecursive(zookeeper, fs::path(path) / children.back()); + removeRecursive(zookeeper, fs::path(path) / children_span.back()); ops.emplace_back(zkutil::makeRemoveRequest(fs::path(path) / children_span.back(), -1)); children_span = children_span.subspan(0, children_span.size() - 1); } diff --git a/utils/keeper-bench/example.yaml b/utils/keeper-bench/example.yaml index e800e923482..c3a62a01eac 100644 --- a/utils/keeper-bench/example.yaml +++ b/utils/keeper-bench/example.yaml @@ -18,45 +18,46 @@ connections: host: "localhost:9181" -generator: - setup: +setup: + node: + name: "test3" + node: + name: "test_create" + node: + name: "test4" + node: + name: "test" + data: "somedata" node: - name: "test3" + repeat: 4 + name: + random_string: + size: 15 + data: + random_string: + size: + min_value: 10 + max_value: 20 node: - name: "test_create" - node: - name: "test4" - node: - name: "test" - data: "somedata" - node: - repeat: 4 - name: - random_string: - size: 15 - data: - random_string: - size: - min_value: 10 - max_value: 20 + repeat: 2 node: repeat: 2 - node: - repeat: 2 - name: - random_string: - size: 12 name: random_string: - size: 15 - data: - random_string: - size: - min_value: 10 - max_value: 20 - node: - name: "test2" - data: "somedata" + size: 12 + name: + random_string: + size: 15 + data: + random_string: + size: + min_value: 10 + max_value: 20 + node: + name: "test2" + data: "somedata" + +generator: requests: create: path: "/test_create" diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp index 25be8549dd6..db6dac52190 100644 --- a/utils/keeper-data-dumper/main.cpp +++ b/utils/keeper-data-dumper/main.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 8112ed9083b..271065a78fb 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v24.6.2.17-stable 2024-07-05 v24.6.1.4423-stable 2024-07-01 v24.5.4.49-stable 2024-07-01 v24.5.3.5-stable 2024-06-13 @@ -6,6 +7,7 @@ v24.5.1.1763-stable 2024-06-01 v24.4.3.25-stable 2024-06-14 v24.4.2.141-stable 2024-06-07 v24.4.1.2088-stable 2024-05-01 +v24.3.5.46-lts 2024-07-03 v24.3.4.147-lts 2024-06-13 v24.3.3.102-lts 2024-05-01 v24.3.2.23-lts 2024-04-03