diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index f18a83e1b97..2853adff48a 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -305,7 +305,7 @@ jobs: runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} MarkReleaseReady: - if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} + if: ${{ !failure() && !cancelled() }} needs: - BuilderBinDarwin - BuilderBinDarwinAarch64 @@ -313,9 +313,25 @@ jobs: - BuilderDebAarch64 runs-on: [self-hosted, style-checker] steps: + - name: Debug + run: | + echo need with different filters + cat << 'EOF' + ${{ toJSON(needs) }} + ${{ toJSON(needs.*.result) }} + no failures ${{ !contains(needs.*.result, 'failure') }} + no skips ${{ !contains(needs.*.result, 'skipped') }} + no both ${{ !(contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} + EOF + - name: Not ready + # fail the job to be able restart it + if: ${{ contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure') }} + run: exit 1 - name: Check out repository code + if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} uses: ClickHouse/checkout@v1 - name: Mark Commit Release Ready + if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 mark_release_ready.py diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index bdb045a70a6..9e95b3d3d8f 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -206,7 +206,7 @@ jobs: runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} MarkReleaseReady: - if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} + if: ${{ !failure() && !cancelled() }} needs: - BuilderBinDarwin - BuilderBinDarwinAarch64 @@ -214,9 +214,25 @@ jobs: - BuilderDebAarch64 runs-on: [self-hosted, style-checker-aarch64] steps: + - name: Debug + run: | + echo need with different filters + cat << 'EOF' + ${{ toJSON(needs) }} + ${{ toJSON(needs.*.result) }} + no failures ${{ !contains(needs.*.result, 'failure') }} + no skips ${{ !contains(needs.*.result, 'skipped') }} + no both ${{ !(contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} + EOF + - name: Not ready + # fail the job to be able restart it + if: ${{ contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure') }} + run: exit 1 - name: Check out repository code + if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} uses: ClickHouse/checkout@v1 - name: Mark Commit Release Ready + if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 mark_release_ready.py diff --git a/.github/workflows/reusable_build.yml b/.github/workflows/reusable_build.yml index 80d78d93e1b..d2fe6f5dbe7 100644 --- a/.github/workflows/reusable_build.yml +++ b/.github/workflows/reusable_build.yml @@ -43,7 +43,8 @@ jobs: runs-on: [self-hosted, '${{inputs.runner_type}}'] steps: - name: Check out repository code - uses: ClickHouse/checkout@v1 + # WIP: temporary try commit with limited perallelization of checkout + uses: ClickHouse/checkout@0be3f7b3098bae494d3ef5d29d2e0676fb606232 with: clear-repository: true ref: ${{ fromJson(inputs.data).git_ref }} diff --git a/.gitignore b/.gitignore index 5341f23a94f..1ea8f83dcc2 100644 --- a/.gitignore +++ b/.gitignore @@ -165,7 +165,7 @@ tests/queries/0_stateless/*.expect.history tests/integration/**/_gen # rust -/rust/**/target +/rust/**/target* # It is autogenerated from *.in /rust/**/.cargo/config.toml /rust/**/vendor diff --git a/CMakeLists.txt b/CMakeLists.txt index 3bd179a799c..b55e9810361 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -319,7 +319,8 @@ if (COMPILER_CLANG) endif() endif () -set (COMPILER_FLAGS "${COMPILER_FLAGS}") +# Disable floating-point expression contraction in order to get consistent floating point calculation results across platforms +set (COMPILER_FLAGS "${COMPILER_FLAGS} -ffp-contract=off") # Our built-in unwinder only supports DWARF version up to 4. set (DEBUG_INFO_FLAGS "-g") diff --git a/README.md b/README.md index 9ada350d173..e00ce42a60b 100644 --- a/README.md +++ b/README.md @@ -31,15 +31,30 @@ curl https://clickhouse.com/ | sh * [Static Analysis (SonarCloud)](https://sonarcloud.io/project/issues?resolved=false&id=ClickHouse_ClickHouse) proposes C++ quality improvements. * [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any. +## Monthly Release & Community Call + +Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know. + +* [v24.3 Community Call](https://clickhouse.com/company/events/v24-3-community-release-call) - Mar 26 +* [v24.4 Community Call](https://clickhouse.com/company/events/v24-4-community-release-call) - Apr 30 + ## Upcoming Events -Keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `` clickhouse `` com. +Keep an eye out for upcoming meetups and eventsaround the world. Somewhere else you want us to be? Please feel free to reach out to tyler `` clickhouse `` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc. + +* [ClickHouse Meetup in Bellevue](https://www.meetup.com/clickhouse-seattle-user-group/events/298650371/) - Mar 11 +* [ClickHouse Meetup at Ramp's Offices in NYC](https://www.meetup.com/clickhouse-new-york-user-group/events/298640542/) - Mar 19 +* [ClickHouse Melbourne Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/299479750/) - Mar 20 +* [ClickHouse Meetup in Paris](https://www.meetup.com/clickhouse-france-user-group/events/298997115/) - Mar 21 +* [ClickHouse Meetup in Bengaluru](https://www.meetup.com/clickhouse-bangalore-user-group/events/299479850/) - Mar 23 +* [ClickHouse Meetup in Zurich](https://www.meetup.com/clickhouse-switzerland-meetup-group/events/299628922/) - Apr 16 +* [ClickHouse Meetup in Copenhagen](https://www.meetup.com/clickhouse-denmark-meetup-group/events/299629133/) - Apr 23 +* [ClickHouse Meetup in Dubai](https://www.meetup.com/clickhouse-dubai-meetup-group/events/299629189/) - May 28 + ## Recent Recordings * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments" -* **Recording available**: [**v24.1 Release Webinar**](https://www.youtube.com/watch?v=pBF9g0wGAGs) All the features of 24.1, one convenient video! Watch it now! -* **All release webinar recordings**: [YouTube playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3jAlSy1JxyP8zluvXaN3nxU) - +* **Recording available**: [**v24.2 Release Call**](https://www.youtube.com/watch?v=iN2y-TK8f3A) All the features of 24.2, one convenient video! Watch it now! ## Interested in joining ClickHouse and making it your full-time job? diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt index 548ba01d86a..610877eae73 100644 --- a/base/base/CMakeLists.txt +++ b/base/base/CMakeLists.txt @@ -13,6 +13,7 @@ set (SRCS cgroupsv2.cpp coverage.cpp demangle.cpp + Decimal.cpp getAvailableMemoryAmount.cpp getFQDNOrHostName.cpp getMemoryAmount.cpp diff --git a/base/base/Decimal.cpp b/base/base/Decimal.cpp new file mode 100644 index 00000000000..7e65c0eb8d1 --- /dev/null +++ b/base/base/Decimal.cpp @@ -0,0 +1,87 @@ +#include +#include + +namespace DB +{ + +/// Explicit template instantiations. + +#define FOR_EACH_UNDERLYING_DECIMAL_TYPE(M) \ + M(Int32) \ + M(Int64) \ + M(Int128) \ + M(Int256) + +#define FOR_EACH_UNDERLYING_DECIMAL_TYPE_PASS(M, X) \ + M(Int32, X) \ + M(Int64, X) \ + M(Int128, X) \ + M(Int256, X) + +template const Decimal & Decimal::operator += (const T & x) { value += x; return *this; } +template const Decimal & Decimal::operator -= (const T & x) { value -= x; return *this; } +template const Decimal & Decimal::operator *= (const T & x) { value *= x; return *this; } +template const Decimal & Decimal::operator /= (const T & x) { value /= x; return *this; } +template const Decimal & Decimal::operator %= (const T & x) { value %= x; return *this; } + +template void NO_SANITIZE_UNDEFINED Decimal::addOverflow(const T & x) { value += x; } + +/// Maybe this explicit instantiation affects performance since operators cannot be inlined. + +template template const Decimal & Decimal::operator += (const Decimal & x) { value += static_cast(x.value); return *this; } +template template const Decimal & Decimal::operator -= (const Decimal & x) { value -= static_cast(x.value); return *this; } +template template const Decimal & Decimal::operator *= (const Decimal & x) { value *= static_cast(x.value); return *this; } +template template const Decimal & Decimal::operator /= (const Decimal & x) { value /= static_cast(x.value); return *this; } +template template const Decimal & Decimal::operator %= (const Decimal & x) { value %= static_cast(x.value); return *this; } + +#define DISPATCH(TYPE_T, TYPE_U) \ + template const Decimal & Decimal::operator += (const Decimal & x); \ + template const Decimal & Decimal::operator -= (const Decimal & x); \ + template const Decimal & Decimal::operator *= (const Decimal & x); \ + template const Decimal & Decimal::operator /= (const Decimal & x); \ + template const Decimal & Decimal::operator %= (const Decimal & x); +#define INVOKE(X) FOR_EACH_UNDERLYING_DECIMAL_TYPE_PASS(DISPATCH, X) +FOR_EACH_UNDERLYING_DECIMAL_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH + +#define DISPATCH(TYPE) template struct Decimal; +FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH) +#undef DISPATCH + +template bool operator< (const Decimal & x, const Decimal & y) { return x.value < y.value; } +template bool operator> (const Decimal & x, const Decimal & y) { return x.value > y.value; } +template bool operator<= (const Decimal & x, const Decimal & y) { return x.value <= y.value; } +template bool operator>= (const Decimal & x, const Decimal & y) { return x.value >= y.value; } +template bool operator== (const Decimal & x, const Decimal & y) { return x.value == y.value; } +template bool operator!= (const Decimal & x, const Decimal & y) { return x.value != y.value; } + +#define DISPATCH(TYPE) \ +template bool operator< (const Decimal & x, const Decimal & y); \ +template bool operator> (const Decimal & x, const Decimal & y); \ +template bool operator<= (const Decimal & x, const Decimal & y); \ +template bool operator>= (const Decimal & x, const Decimal & y); \ +template bool operator== (const Decimal & x, const Decimal & y); \ +template bool operator!= (const Decimal & x, const Decimal & y); +FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH) +#undef DISPATCH + + +template Decimal operator+ (const Decimal & x, const Decimal & y) { return x.value + y.value; } +template Decimal operator- (const Decimal & x, const Decimal & y) { return x.value - y.value; } +template Decimal operator* (const Decimal & x, const Decimal & y) { return x.value * y.value; } +template Decimal operator/ (const Decimal & x, const Decimal & y) { return x.value / y.value; } +template Decimal operator- (const Decimal & x) { return -x.value; } + +#define DISPATCH(TYPE) \ +template Decimal operator+ (const Decimal & x, const Decimal & y); \ +template Decimal operator- (const Decimal & x, const Decimal & y); \ +template Decimal operator* (const Decimal & x, const Decimal & y); \ +template Decimal operator/ (const Decimal & x, const Decimal & y); \ +template Decimal operator- (const Decimal & x); +FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH) +#undef DISPATCH + +#undef FOR_EACH_UNDERLYING_DECIMAL_TYPE_PASS +#undef FOR_EACH_UNDERLYING_DECIMAL_TYPE +} diff --git a/base/base/Decimal.h b/base/base/Decimal.h index 66ff623217c..42f9e67c49d 100644 --- a/base/base/Decimal.h +++ b/base/base/Decimal.h @@ -2,6 +2,7 @@ #include #include +#include #include @@ -10,6 +11,18 @@ namespace DB template struct Decimal; class DateTime64; +#define FOR_EACH_UNDERLYING_DECIMAL_TYPE(M) \ + M(Int32) \ + M(Int64) \ + M(Int128) \ + M(Int256) + +#define FOR_EACH_UNDERLYING_DECIMAL_TYPE_PASS(M, X) \ + M(Int32, X) \ + M(Int64, X) \ + M(Int128, X) \ + M(Int256, X) + using Decimal32 = Decimal; using Decimal64 = Decimal; using Decimal128 = Decimal; @@ -50,36 +63,73 @@ struct Decimal return static_cast(value); } - const Decimal & operator += (const T & x) { value += x; return *this; } - const Decimal & operator -= (const T & x) { value -= x; return *this; } - const Decimal & operator *= (const T & x) { value *= x; return *this; } - const Decimal & operator /= (const T & x) { value /= x; return *this; } - const Decimal & operator %= (const T & x) { value %= x; return *this; } + const Decimal & operator += (const T & x); + const Decimal & operator -= (const T & x); + const Decimal & operator *= (const T & x); + const Decimal & operator /= (const T & x); + const Decimal & operator %= (const T & x); - template const Decimal & operator += (const Decimal & x) { value += x.value; return *this; } - template const Decimal & operator -= (const Decimal & x) { value -= x.value; return *this; } - template const Decimal & operator *= (const Decimal & x) { value *= x.value; return *this; } - template const Decimal & operator /= (const Decimal & x) { value /= x.value; return *this; } - template const Decimal & operator %= (const Decimal & x) { value %= x.value; return *this; } + template const Decimal & operator += (const Decimal & x); + template const Decimal & operator -= (const Decimal & x); + template const Decimal & operator *= (const Decimal & x); + template const Decimal & operator /= (const Decimal & x); + template const Decimal & operator %= (const Decimal & x); /// This is to avoid UB for sumWithOverflow() - void NO_SANITIZE_UNDEFINED addOverflow(const T & x) { value += x; } + void NO_SANITIZE_UNDEFINED addOverflow(const T & x); T value; }; -template inline bool operator< (const Decimal & x, const Decimal & y) { return x.value < y.value; } -template inline bool operator> (const Decimal & x, const Decimal & y) { return x.value > y.value; } -template inline bool operator<= (const Decimal & x, const Decimal & y) { return x.value <= y.value; } -template inline bool operator>= (const Decimal & x, const Decimal & y) { return x.value >= y.value; } -template inline bool operator== (const Decimal & x, const Decimal & y) { return x.value == y.value; } -template inline bool operator!= (const Decimal & x, const Decimal & y) { return x.value != y.value; } +#define DISPATCH(TYPE) extern template struct Decimal; +FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH) +#undef DISPATCH -template inline Decimal operator+ (const Decimal & x, const Decimal & y) { return x.value + y.value; } -template inline Decimal operator- (const Decimal & x, const Decimal & y) { return x.value - y.value; } -template inline Decimal operator* (const Decimal & x, const Decimal & y) { return x.value * y.value; } -template inline Decimal operator/ (const Decimal & x, const Decimal & y) { return x.value / y.value; } -template inline Decimal operator- (const Decimal & x) { return -x.value; } +#define DISPATCH(TYPE_T, TYPE_U) \ + extern template const Decimal & Decimal::operator += (const Decimal & x); \ + extern template const Decimal & Decimal::operator -= (const Decimal & x); \ + extern template const Decimal & Decimal::operator *= (const Decimal & x); \ + extern template const Decimal & Decimal::operator /= (const Decimal & x); \ + extern template const Decimal & Decimal::operator %= (const Decimal & x); +#define INVOKE(X) FOR_EACH_UNDERLYING_DECIMAL_TYPE_PASS(DISPATCH, X) +FOR_EACH_UNDERLYING_DECIMAL_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH + +template bool operator< (const Decimal & x, const Decimal & y); +template bool operator> (const Decimal & x, const Decimal & y); +template bool operator<= (const Decimal & x, const Decimal & y); +template bool operator>= (const Decimal & x, const Decimal & y); +template bool operator== (const Decimal & x, const Decimal & y); +template bool operator!= (const Decimal & x, const Decimal & y); + +#define DISPATCH(TYPE) \ +extern template bool operator< (const Decimal & x, const Decimal & y); \ +extern template bool operator> (const Decimal & x, const Decimal & y); \ +extern template bool operator<= (const Decimal & x, const Decimal & y); \ +extern template bool operator>= (const Decimal & x, const Decimal & y); \ +extern template bool operator== (const Decimal & x, const Decimal & y); \ +extern template bool operator!= (const Decimal & x, const Decimal & y); +FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH) +#undef DISPATCH + +template Decimal operator+ (const Decimal & x, const Decimal & y); +template Decimal operator- (const Decimal & x, const Decimal & y); +template Decimal operator* (const Decimal & x, const Decimal & y); +template Decimal operator/ (const Decimal & x, const Decimal & y); +template Decimal operator- (const Decimal & x); + +#define DISPATCH(TYPE) \ +extern template Decimal operator+ (const Decimal & x, const Decimal & y); \ +extern template Decimal operator- (const Decimal & x, const Decimal & y); \ +extern template Decimal operator* (const Decimal & x, const Decimal & y); \ +extern template Decimal operator/ (const Decimal & x, const Decimal & y); \ +extern template Decimal operator- (const Decimal & x); +FOR_EACH_UNDERLYING_DECIMAL_TYPE(DISPATCH) +#undef DISPATCH + +#undef FOR_EACH_UNDERLYING_DECIMAL_TYPE_PASS +#undef FOR_EACH_UNDERLYING_DECIMAL_TYPE /// Distinguishable type to allow function resolution/deduction based on value type, /// but also relatively easy to convert to/from Decimal64. diff --git a/base/base/extended_types.h b/base/base/extended_types.h index b58df45a97e..796167ab45d 100644 --- a/base/base/extended_types.h +++ b/base/base/extended_types.h @@ -64,6 +64,44 @@ template <> struct is_arithmetic { static constexpr bool value = true; template inline constexpr bool is_arithmetic_v = is_arithmetic::value; +#define FOR_EACH_ARITHMETIC_TYPE(M) \ + M(DataTypeDate) \ + M(DataTypeDate32) \ + M(DataTypeDateTime) \ + M(DataTypeInt8) \ + M(DataTypeUInt8) \ + M(DataTypeInt16) \ + M(DataTypeUInt16) \ + M(DataTypeInt32) \ + M(DataTypeUInt32) \ + M(DataTypeInt64) \ + M(DataTypeUInt64) \ + M(DataTypeInt128) \ + M(DataTypeUInt128) \ + M(DataTypeInt256) \ + M(DataTypeUInt256) \ + M(DataTypeFloat32) \ + M(DataTypeFloat64) + +#define FOR_EACH_ARITHMETIC_TYPE_PASS(M, X) \ + M(DataTypeDate, X) \ + M(DataTypeDate32, X) \ + M(DataTypeDateTime, X) \ + M(DataTypeInt8, X) \ + M(DataTypeUInt8, X) \ + M(DataTypeInt16, X) \ + M(DataTypeUInt16, X) \ + M(DataTypeInt32, X) \ + M(DataTypeUInt32, X) \ + M(DataTypeInt64, X) \ + M(DataTypeUInt64, X) \ + M(DataTypeInt128, X) \ + M(DataTypeUInt128, X) \ + M(DataTypeInt256, X) \ + M(DataTypeUInt256, X) \ + M(DataTypeFloat32, X) \ + M(DataTypeFloat64, X) + template struct make_unsigned // NOLINT(readability-identifier-naming) { diff --git a/docker/images.json b/docker/images.json index 2bf1efe005f..7439517379b 100644 --- a/docker/images.json +++ b/docker/images.json @@ -1,8 +1,12 @@ { - "docker/packager/binary": { + "docker/packager/binary-builder": { "name": "clickhouse/binary-builder", "dependent": [] }, + "docker/packager/cctools": { + "name": "clickhouse/cctools", + "dependent": [] + }, "docker/test/compatibility/centos": { "name": "clickhouse/test-old-centos", "dependent": [] @@ -30,7 +34,6 @@ "docker/test/util": { "name": "clickhouse/test-util", "dependent": [ - "docker/packager/binary", "docker/test/base", "docker/test/fasttest" ] @@ -67,7 +70,9 @@ }, "docker/test/fasttest": { "name": "clickhouse/fasttest", - "dependent": [] + "dependent": [ + "docker/packager/binary-builder" + ] }, "docker/test/style": { "name": "clickhouse/style-test", diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary-builder/Dockerfile similarity index 64% rename from docker/packager/binary/Dockerfile rename to docker/packager/binary-builder/Dockerfile index e20cbe9781c..96c90403187 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary-builder/Dockerfile @@ -1,43 +1,6 @@ # docker build -t clickhouse/binary-builder . ARG FROM_TAG=latest -FROM clickhouse/test-util:latest AS cctools -# The cctools are built always from the clickhouse/test-util:latest and cached inline -# Theoretically, it should improve rebuild speed significantly -ENV CC=clang-${LLVM_VERSION} -ENV CXX=clang++-${LLVM_VERSION} -# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -# DO NOT PUT ANYTHING BEFORE THE NEXT TWO `RUN` DIRECTIVES -# THE MOST HEAVY OPERATION MUST BE THE FIRST IN THE CACHE -# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -# libtapi is required to support .tbh format from recent MacOS SDKs -RUN git clone https://github.com/tpoechtrager/apple-libtapi.git \ - && cd apple-libtapi \ - && git checkout 15dfc2a8c9a2a89d06ff227560a69f5265b692f9 \ - && INSTALLPREFIX=/cctools ./build.sh \ - && ./install.sh \ - && cd .. \ - && rm -rf apple-libtapi - -# Build and install tools for cross-linking to Darwin (x86-64) -# Build and install tools for cross-linking to Darwin (aarch64) -RUN git clone https://github.com/tpoechtrager/cctools-port.git \ - && cd cctools-port/cctools \ - && git checkout 2a3e1c2a6ff54a30f898b70cfb9ba1692a55fad7 \ - && ./configure --prefix=/cctools --with-libtapi=/cctools \ - --target=x86_64-apple-darwin \ - && make install -j$(nproc) \ - && make clean \ - && ./configure --prefix=/cctools --with-libtapi=/cctools \ - --target=aarch64-apple-darwin \ - && make install -j$(nproc) \ - && cd ../.. \ - && rm -rf cctools-port - -# !!!!!!!!!!! -# END COMPILE -# !!!!!!!!!!! - -FROM clickhouse/test-util:$FROM_TAG +FROM clickhouse/fasttest:$FROM_TAG ENV CC=clang-${LLVM_VERSION} ENV CXX=clang++-${LLVM_VERSION} @@ -110,7 +73,8 @@ RUN curl -Lo /usr/bin/clang-tidy-cache \ "https://raw.githubusercontent.com/matus-chochlik/ctcache/$CLANG_TIDY_SHA1/clang-tidy-cache" \ && chmod +x /usr/bin/clang-tidy-cache -COPY --from=cctools /cctools /cctools +# If the cctools is updated, then first build it in the CI, then update here in a different commit +COPY --from=clickhouse/cctools:5a908f73878a /cctools /cctools RUN mkdir /workdir && chmod 777 /workdir WORKDIR /workdir diff --git a/docker/packager/binary/build.sh b/docker/packager/binary-builder/build.sh similarity index 100% rename from docker/packager/binary/build.sh rename to docker/packager/binary-builder/build.sh diff --git a/docker/packager/cctools/Dockerfile b/docker/packager/cctools/Dockerfile new file mode 100644 index 00000000000..1b8c675a5c5 --- /dev/null +++ b/docker/packager/cctools/Dockerfile @@ -0,0 +1,31 @@ +# This is a hack to significantly reduce the build time of the clickhouse/binary-builder +# It's based on the assumption that we don't care of the cctools version so much +# It event does not depend on the clickhouse/fasttest in the `docker/images.json` +ARG FROM_TAG=latest +FROM clickhouse/fasttest:$FROM_TAG + +ENV CC=clang-${LLVM_VERSION} +ENV CXX=clang++-${LLVM_VERSION} + +RUN git clone https://github.com/tpoechtrager/apple-libtapi.git \ + && cd apple-libtapi \ + && git checkout 15dfc2a8c9a2a89d06ff227560a69f5265b692f9 \ + && INSTALLPREFIX=/cctools ./build.sh \ + && ./install.sh \ + && cd .. \ + && rm -rf apple-libtapi + +# Build and install tools for cross-linking to Darwin (x86-64) +# Build and install tools for cross-linking to Darwin (aarch64) +RUN git clone https://github.com/tpoechtrager/cctools-port.git \ + && cd cctools-port/cctools \ + && git checkout 2a3e1c2a6ff54a30f898b70cfb9ba1692a55fad7 \ + && ./configure --prefix=/cctools --with-libtapi=/cctools \ + --target=x86_64-apple-darwin \ + && make install -j$(nproc) \ + && make clean \ + && ./configure --prefix=/cctools --with-libtapi=/cctools \ + --target=aarch64-apple-darwin \ + && make install -j$(nproc) \ + && cd ../.. \ + && rm -rf cctools-port diff --git a/docker/packager/packager b/docker/packager/packager index ca0ae8358f3..23fc26bc1a4 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -1,16 +1,16 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -import subprocess -import os import argparse import logging +import os +import subprocess import sys from pathlib import Path from typing import List, Optional SCRIPT_PATH = Path(__file__).absolute() -IMAGE_TYPE = "binary" -IMAGE_NAME = f"clickhouse/{IMAGE_TYPE}-builder" +IMAGE_TYPE = "binary-builder" +IMAGE_NAME = f"clickhouse/{IMAGE_TYPE}" class BuildException(Exception): diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index e10555d4d4a..62cdcc3f830 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -6,9 +6,18 @@ FROM clickhouse/test-util:$FROM_TAG RUN apt-get update \ && apt-get install \ brotli \ + clang-${LLVM_VERSION} \ + clang-tidy-${LLVM_VERSION} \ + cmake \ expect \ file \ + libclang-${LLVM_VERSION}-dev \ + libclang-rt-${LLVM_VERSION}-dev \ + lld-${LLVM_VERSION} \ + llvm-${LLVM_VERSION} \ + llvm-${LLVM_VERSION}-dev \ lsof \ + ninja-build \ odbcinst \ psmisc \ python3 \ @@ -26,14 +35,50 @@ RUN apt-get update \ RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3 -ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.4.20200302/clickhouse-odbc-1.1.4-Linux.tar.gz" +# This symlink is required by gcc to find the lld linker +RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld +# for external_symbolizer_path +RUN ln -s /usr/bin/llvm-symbolizer-${LLVM_VERSION} /usr/bin/llvm-symbolizer +# FIXME: workaround for "The imported target "merge-fdata" references the file" error +# https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d +RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake -RUN mkdir -p /tmp/clickhouse-odbc-tmp \ - && wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \ - && cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib/ \ - && odbcinst -i -d -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbcinst.ini.sample \ - && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \ - && rm -rf /tmp/clickhouse-odbc-tmp +ARG CCACHE_VERSION=4.6.1 +RUN mkdir /tmp/ccache \ + && cd /tmp/ccache \ + && curl -L \ + -O https://github.com/ccache/ccache/releases/download/v$CCACHE_VERSION/ccache-$CCACHE_VERSION.tar.xz \ + -O https://github.com/ccache/ccache/releases/download/v$CCACHE_VERSION/ccache-$CCACHE_VERSION.tar.xz.asc \ + && gpg --recv-keys --keyserver hkps://keyserver.ubuntu.com 5A939A71A46792CF57866A51996DDA075594ADB8 \ + && gpg --verify ccache-4.6.1.tar.xz.asc \ + && tar xf ccache-$CCACHE_VERSION.tar.xz \ + && cd /tmp/ccache/ccache-$CCACHE_VERSION \ + && cmake -DCMAKE_INSTALL_PREFIX=/usr \ + -DCMAKE_BUILD_TYPE=None \ + -DZSTD_FROM_INTERNET=ON \ + -DREDIS_STORAGE_BACKEND=OFF \ + -Wno-dev \ + -B build \ + -S . \ + && make VERBOSE=1 -C build \ + && make install -C build \ + && cd / \ + && rm -rf /tmp/ccache + +ARG TARGETARCH +ARG SCCACHE_VERSION=v0.7.7 +ENV SCCACHE_IGNORE_SERVER_IO_ERROR=1 +# sccache requires a value for the region. So by default we use The Default Region +ENV SCCACHE_REGION=us-east-1 +RUN arch=${TARGETARCH:-amd64} \ + && case $arch in \ + amd64) rarch=x86_64 ;; \ + arm64) rarch=aarch64 ;; \ + esac \ + && curl -Ls "https://github.com/mozilla/sccache/releases/download/$SCCACHE_VERSION/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl.tar.gz" | \ + tar xz -C /tmp \ + && mv "/tmp/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl/sccache" /usr/bin \ + && rm "/tmp/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl" -r # Give suid to gdb to grant it attach permissions # chmod 777 to make the container user independent diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 2834a632631..dc1b35b3a21 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -343,10 +343,9 @@ quit # which is confusing. task_exit_code=$fuzzer_exit_code echo "failure" > status.txt - { rg -ao "Found error:.*" fuzzer.log \ - || rg -ao "Exception:.*" fuzzer.log \ - || echo "Fuzzer failed ($fuzzer_exit_code). See the logs." ; } \ - | tail -1 > description.txt + echo "Achtung!" > description.txt + echo "Fuzzer went wrong with error code: ($fuzzer_exit_code). Its process died somehow when the server stayed alive. The server log probably won't tell you much so try to find information in other files." >>description.txt + { rg -ao "Found error:.*" fuzzer.log || rg -ao "Exception:.*" fuzzer.log; } | tail -1 >>description.txt fi if test -f core.*; then diff --git a/docker/test/sqllogic/Dockerfile b/docker/test/sqllogic/Dockerfile index 05130044c45..1ea1e52e6fa 100644 --- a/docker/test/sqllogic/Dockerfile +++ b/docker/test/sqllogic/Dockerfile @@ -24,17 +24,18 @@ RUN pip3 install \ deepdiff \ sqlglot -ARG odbc_repo="https://github.com/ClickHouse/clickhouse-odbc.git" +ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.6.20200320/clickhouse-odbc-1.1.6-Linux.tar.gz" + +RUN mkdir -p /tmp/clickhouse-odbc-tmp \ + && cd /tmp/clickhouse-odbc-tmp \ + && curl -L ${odbc_driver_url} | tar --strip-components=1 -xz clickhouse-odbc-1.1.6-Linux \ + && mkdir /usr/local/lib64 -p \ + && cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib64/ \ + && odbcinst -i -d -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbcinst.ini.sample \ + && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \ + && sed -i 's"=libclickhouseodbc"=/usr/local/lib64/libclickhouseodbc"' /etc/odbcinst.ini \ + && rm -rf /tmp/clickhouse-odbc-tmp -RUN git clone --recursive ${odbc_repo} \ - && mkdir -p /clickhouse-odbc/build \ - && cmake -S /clickhouse-odbc -B /clickhouse-odbc/build \ - && ls /clickhouse-odbc/build/driver \ - && make -j 10 -C /clickhouse-odbc/build \ - && ls /clickhouse-odbc/build/driver \ - && mkdir -p /usr/local/lib64/ && cp /clickhouse-odbc/build/driver/lib*.so /usr/local/lib64/ \ - && odbcinst -i -d -f /clickhouse-odbc/packaging/odbcinst.ini.sample \ - && odbcinst -i -s -l -f /clickhouse-odbc/packaging/odbc.ini.sample ENV TZ=Europe/Amsterdam ENV MAX_RUN_TIME=9000 diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 7f4bad3d4e6..cd8864c6299 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -3,7 +3,7 @@ ARG FROM_TAG=latest FROM clickhouse/test-base:$FROM_TAG -ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.4.20200302/clickhouse-odbc-1.1.4-Linux.tar.gz" +ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.6.20200320/clickhouse-odbc-1.1.6-Linux.tar.gz" # golang version 1.13 on Ubuntu 20 is enough for tests RUN apt-get update -y \ @@ -35,7 +35,6 @@ RUN apt-get update -y \ sudo \ tree \ unixodbc \ - wget \ rustc \ cargo \ zstd \ @@ -50,11 +49,14 @@ RUN apt-get update -y \ RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3 pyarrow==15.0.0 RUN mkdir -p /tmp/clickhouse-odbc-tmp \ - && wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \ - && cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib/ \ - && odbcinst -i -d -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbcinst.ini.sample \ - && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \ - && rm -rf /tmp/clickhouse-odbc-tmp + && cd /tmp/clickhouse-odbc-tmp \ + && curl -L ${odbc_driver_url} | tar --strip-components=1 -xz clickhouse-odbc-1.1.6-Linux \ + && mkdir /usr/local/lib64 -p \ + && cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib64/ \ + && odbcinst -i -d -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbcinst.ini.sample \ + && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \ + && sed -i 's"=libclickhouseodbc"=/usr/local/lib64/libclickhouseodbc"' /etc/odbcinst.ini \ + && rm -rf /tmp/clickhouse-odbc-tmp ENV TZ=Europe/Amsterdam RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone @@ -70,11 +72,11 @@ ARG TARGETARCH # Download Minio-related binaries RUN arch=${TARGETARCH:-amd64} \ - && wget "https://dl.min.io/server/minio/release/linux-${arch}/archive/minio.RELEASE.${MINIO_SERVER_VERSION}" -O ./minio \ - && wget "https://dl.min.io/client/mc/release/linux-${arch}/archive/mc.RELEASE.${MINIO_CLIENT_VERSION}" -O ./mc \ + && curl -L "https://dl.min.io/server/minio/release/linux-${arch}/archive/minio.RELEASE.${MINIO_SERVER_VERSION}" -o ./minio \ + && curl -L "https://dl.min.io/client/mc/release/linux-${arch}/archive/mc.RELEASE.${MINIO_CLIENT_VERSION}" -o ./mc \ && chmod +x ./mc ./minio -RUN wget --no-verbose 'https://archive.apache.org/dist/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz' \ +RUN curl -L --no-verbose -O 'https://archive.apache.org/dist/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz' \ && tar -xvf hadoop-3.3.1.tar.gz \ && rm -rf hadoop-3.3.1.tar.gz diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index 122f558bab2..b4ffcfb597c 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -60,5 +60,4 @@ RUN arch=${TARGETARCH:-amd64} \ COPY run.sh / -COPY process_style_check_result.py / CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index 396d5801be9..4f2dc9df849 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -41,20 +41,11 @@ RUN apt-get update \ bash \ bsdmainutils \ build-essential \ - clang-${LLVM_VERSION} \ - clang-tidy-${LLVM_VERSION} \ - cmake \ gdb \ git \ gperf \ - libclang-rt-${LLVM_VERSION}-dev \ - lld-${LLVM_VERSION} \ - llvm-${LLVM_VERSION} \ - llvm-${LLVM_VERSION}-dev \ - libclang-${LLVM_VERSION}-dev \ moreutils \ nasm \ - ninja-build \ pigz \ rename \ software-properties-common \ @@ -63,49 +54,4 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -# This symlink is required by gcc to find the lld linker -RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld -# for external_symbolizer_path -RUN ln -s /usr/bin/llvm-symbolizer-${LLVM_VERSION} /usr/bin/llvm-symbolizer -# FIXME: workaround for "The imported target "merge-fdata" references the file" error -# https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d -RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake - -ARG CCACHE_VERSION=4.6.1 -RUN mkdir /tmp/ccache \ - && cd /tmp/ccache \ - && curl -L \ - -O https://github.com/ccache/ccache/releases/download/v$CCACHE_VERSION/ccache-$CCACHE_VERSION.tar.xz \ - -O https://github.com/ccache/ccache/releases/download/v$CCACHE_VERSION/ccache-$CCACHE_VERSION.tar.xz.asc \ - && gpg --recv-keys --keyserver hkps://keyserver.ubuntu.com 5A939A71A46792CF57866A51996DDA075594ADB8 \ - && gpg --verify ccache-4.6.1.tar.xz.asc \ - && tar xf ccache-$CCACHE_VERSION.tar.xz \ - && cd /tmp/ccache/ccache-$CCACHE_VERSION \ - && cmake -DCMAKE_INSTALL_PREFIX=/usr \ - -DCMAKE_BUILD_TYPE=None \ - -DZSTD_FROM_INTERNET=ON \ - -DREDIS_STORAGE_BACKEND=OFF \ - -Wno-dev \ - -B build \ - -S . \ - && make VERBOSE=1 -C build \ - && make install -C build \ - && cd / \ - && rm -rf /tmp/ccache - -ARG TARGETARCH -ARG SCCACHE_VERSION=v0.5.4 -ENV SCCACHE_IGNORE_SERVER_IO_ERROR=1 -# sccache requires a value for the region. So by default we use The Default Region -ENV SCCACHE_REGION=us-east-1 -RUN arch=${TARGETARCH:-amd64} \ - && case $arch in \ - amd64) rarch=x86_64 ;; \ - arm64) rarch=aarch64 ;; \ - esac \ - && curl -Ls "https://github.com/mozilla/sccache/releases/download/$SCCACHE_VERSION/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl.tar.gz" | \ - tar xz -C /tmp \ - && mv "/tmp/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl/sccache" /usr/bin \ - && rm "/tmp/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl" -r - COPY process_functional_tests_result.py / diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 622644a1543..3f4dec9dc93 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3954,6 +3954,7 @@ Possible values: - `none` — Is similar to throw, but distributed DDL query returns no result set. - `null_status_on_timeout` — Returns `NULL` as execution status in some rows of result set instead of throwing `TIMEOUT_EXCEEDED` if query is not finished on the corresponding hosts. - `never_throw` — Do not throw `TIMEOUT_EXCEEDED` and do not rethrow exceptions if query has failed on some hosts. +- `none_only_active` - similar to `none`, but doesn't wait for inactive replicas of the `Replicated` database. Note: with this mode it's impossible to figure out that the query was not executed on some replica and will be executed in background. - `null_status_on_timeout_only_active` — similar to `null_status_on_timeout`, but doesn't wait for inactive replicas of the `Replicated` database - `throw_only_active` — similar to `throw`, but doesn't wait for inactive replicas of the `Replicated` database diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md index 437a5f0fff0..93a3fecf3c6 100644 --- a/docs/en/operations/utilities/clickhouse-local.md +++ b/docs/en/operations/utilities/clickhouse-local.md @@ -201,12 +201,12 @@ Arguments: - `-S`, `--structure` — table structure for input data. - `--input-format` — input format, `TSV` by default. -- `-f`, `--file` — path to data, `stdin` by default. +- `-F`, `--file` — path to data, `stdin` by default. - `-q`, `--query` — queries to execute with `;` as delimiter. `--query` can be specified multiple times, e.g. `--query "SELECT 1" --query "SELECT 2"`. Cannot be used simultaneously with `--queries-file`. - `--queries-file` - file path with queries to execute. `--queries-file` can be specified multiple times, e.g. `--query queries1.sql --query queries2.sql`. Cannot be used simultaneously with `--query`. - `--multiquery, -n` – If specified, multiple queries separated by semicolons can be listed after the `--query` option. For convenience, it is also possible to omit `--query` and pass the queries directly after `--multiquery`. - `-N`, `--table` — table name where to put output data, `table` by default. -- `--format`, `--output-format` — output format, `TSV` by default. +- `-f`, `--format`, `--output-format` — output format, `TSV` by default. - `-d`, `--database` — default database, `_local` by default. - `--stacktrace` — whether to dump debug output in case of exception. - `--echo` — print query before execution. diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 41503abfa2f..ba7695af3fa 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -394,8 +394,7 @@ Result: ## toYear -Converts a date or date with time to the year number (AD) as `UInt16` value. - +Returns the year component (AD) of a date or date with time. **Syntax** @@ -431,7 +430,7 @@ Result: ## toQuarter -Converts a date or date with time to the quarter number (1-4) as `UInt8` value. +Returns the quarter (1-4) of a date or date with time. **Syntax** @@ -465,10 +464,9 @@ Result: └──────────────────────────────────────────────┘ ``` - ## toMonth -Converts a date or date with time to the month number (1-12) as `UInt8` value. +Returns the month component (1-12) of a date or date with time. **Syntax** @@ -504,7 +502,7 @@ Result: ## toDayOfYear -Converts a date or date with time to the number of the day of the year (1-366) as `UInt16` value. +Returns the number of the day within the year (1-366) of a date or date with time. **Syntax** @@ -540,7 +538,7 @@ Result: ## toDayOfMonth -Converts a date or date with time to the number of the day in the month (1-31) as `UInt8` value. +Returns the number of the day within the month (1-31) of a date or date with time. **Syntax** @@ -576,7 +574,7 @@ Result: ## toDayOfWeek -Converts a date or date with time to the number of the day in the week as `UInt8` value. +Returns the number of the day within the week of a date or date with time. The two-argument form of `toDayOfWeek()` enables you to specify whether the week starts on Monday or Sunday, and whether the return value should be in the range from 0 to 6 or 1 to 7. If the mode argument is omitted, the default mode is 0. The time zone of the date can be specified as the third argument. @@ -627,7 +625,7 @@ Result: ## toHour -Converts a date with time to the number of the hour in 24-hour time (0-23) as `UInt8` value. +Returns the hour component (0-24) of a date with time. Assumes that if clocks are moved ahead, it is by one hour and occurs at 2 a.m., and if clocks are moved back, it is by one hour and occurs at 3 a.m. (which is not always exactly when it occurs - it depends on the timezone). @@ -641,7 +639,7 @@ Alias: `HOUR` **Arguments** -- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) **Returned value** @@ -665,7 +663,7 @@ Result: ## toMinute -Converts a date with time to the number of the minute of the hour (0-59) as `UInt8` value. +Returns the minute component (0-59) a date with time. **Syntax** @@ -677,7 +675,7 @@ Alias: `MINUTE` **Arguments** -- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) **Returned value** @@ -701,7 +699,7 @@ Result: ## toSecond -Converts a date with time to the second in the minute (0-59) as `UInt8` value. Leap seconds are not considered. +Returns the second component (0-59) of a date with time. Leap seconds are not considered. **Syntax** @@ -713,7 +711,7 @@ Alias: `SECOND` **Arguments** -- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) **Returned value** @@ -735,6 +733,40 @@ Result: └─────────────────────────────────────────────┘ ``` +## toMillisecond + +Returns the millisecond component (0-999) of a date with time. + +**Syntax** + +```sql +toMillisecond(value) +``` + +*Arguments** + +- `value` - [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) + +Alias: `MILLISECOND` + +```sql +SELECT toMillisecond(toDateTime64('2023-04-21 10:20:30.456', 3)) +``` + +Result: + +```response +┌──toMillisecond(toDateTime64('2023-04-21 10:20:30.456', 3))─┐ +│ 456 │ +└────────────────────────────────────────────────────────────┘ +``` + +**Returned value** + +- The millisecond in the minute (0 - 59) of the given date/time + +Type: `UInt16` + ## toUnixTimestamp Converts a string, a date or a date with time to the [Unix Timestamp](https://en.wikipedia.org/wiki/Unix_time) in `UInt32` representation. diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md index 4e5476210e3..a93db29e82c 100644 --- a/docs/en/sql-reference/statements/grant.md +++ b/docs/en/sql-reference/statements/grant.md @@ -202,6 +202,13 @@ Hierarchy of privileges: - `S3` - [dictGet](#grant-dictget) - [displaySecretsInShowAndSelect](#grant-display-secrets) +- [NAMED COLLECTION ADMIN](#grant-named-collection-admin) + - `CREATE NAMED COLLECTION` + - `DROP NAMED COLLECTION` + - `ALTER NAMED COLLECTION` + - `SHOW NAMED COLLECTIONS` + - `SHOW NAMED COLLECTIONS SECRETS` + - `NAMED COLLECTION` Examples of how this hierarchy is treated: @@ -498,6 +505,25 @@ and [`format_display_secrets_in_show_and_select` format setting](../../operations/settings/formats#format_display_secrets_in_show_and_select) are turned on. +### NAMED COLLECTION ADMIN + +Allows a certain operation on a specified named collection. Before version 23.7 it was called NAMED COLLECTION CONTROL, and after 23.7 NAMED COLLECTION ADMIN was added and NAMED COLLECTION CONTROL is preserved as an alias. + +- `NAMED COLLECTION ADMIN`. Level: `NAMED_COLLECTION`. Aliases: `NAMED COLLECTION CONTROL` + - `CREATE NAMED COLLECTION`. Level: `NAMED_COLLECTION` + - `DROP NAMED COLLECTION`. Level: `NAMED_COLLECTION` + - `ALTER NAMED COLLECTION`. Level: `NAMED_COLLECTION` + - `SHOW NAMED COLLECTIONS`. Level: `NAMED_COLLECTION`. Aliases: `SHOW NAMED COLLECTIONS` + - `SHOW NAMED COLLECTIONS SECRETS`. Level: `NAMED_COLLECTION`. Aliases: `SHOW NAMED COLLECTIONS SECRETS` + - `NAMED COLLECTION`. Level: `NAMED_COLLECTION`. Aliases: `NAMED COLLECTION USAGE, USE NAMED COLLECTION` + +Unlike all other grants (CREATE, DROP, ALTER, SHOW) grant NAMED COLLECTION was added only in 23.7, while all others were added earlier - in 22.12. + +**Examples** + +Assuming a named collection is called abc, we grant privilege CREATE NAMED COLLECTION to user john. +- `GRANT CREATE NAMED COLLECTION ON abc TO john` + ### ALL Grants all the privileges on regulated entity to a user account or a role. diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 6340c369bff..9b2ded7b6ce 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -5,7 +5,12 @@ sidebar_label: Window Functions title: Window Functions --- -ClickHouse supports the standard grammar for defining windows and window functions. The following features are currently supported: +Windows functions let you perform calculations across a set of rows that are related to the current row. +Some of the calculations that you can do are similar to those that can be done with an aggregate function, but a window function doesn't cause rows to be grouped into a single output - the individual rows are still returned. + +## Standard Window Functions + +ClickHouse supports the standard grammar for defining windows and window functions. The table below indicates whether a feature is currently supported. | Feature | Support or workaround | |------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| @@ -25,6 +30,8 @@ ClickHouse supports the standard grammar for defining windows and window functio ## ClickHouse-specific Window Functions +There are also the following window function that's specific to ClickHouse: + ### nonNegativeDerivative(metric_column, timestamp_column[, INTERVAL X UNITS]) Finds non-negative derivative for given `metric_column` by `timestamp_column`. @@ -33,40 +40,6 @@ The computed value is the following for each row: - `0` for 1st row, - ${metric_i - metric_{i-1} \over timestamp_i - timestamp_{i-1}} * interval$ for $i_th$ row. -## References - -### GitHub Issues - -The roadmap for the initial support of window functions is [in this issue](https://github.com/ClickHouse/ClickHouse/issues/18097). - -All GitHub issues related to window functions have the [comp-window-functions](https://github.com/ClickHouse/ClickHouse/labels/comp-window-functions) tag. - -### Tests - -These tests contain the examples of the currently supported grammar: - -https://github.com/ClickHouse/ClickHouse/blob/master/tests/performance/window_functions.xml - -https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/01591_window_functions.sql - -### Postgres Docs - -https://www.postgresql.org/docs/current/sql-select.html#SQL-WINDOW - -https://www.postgresql.org/docs/devel/sql-expressions.html#SYNTAX-WINDOW-FUNCTIONS - -https://www.postgresql.org/docs/devel/functions-window.html - -https://www.postgresql.org/docs/devel/tutorial-window.html - -### MySQL Docs - -https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html - -https://dev.mysql.com/doc/refman/8.0/en/window-functions-usage.html - -https://dev.mysql.com/doc/refman/8.0/en/window-functions-frames.html - ## Syntax ```text @@ -80,20 +53,7 @@ WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column] - `PARTITION BY` - defines how to break a resultset into groups. - `ORDER BY` - defines how to order rows inside the group during calculation aggregate_function. - `ROWS or RANGE` - defines bounds of a frame, aggregate_function is calculated within a frame. -- `WINDOW` - allows to reuse a window definition with multiple expressions. - -### Functions - -These functions can be used only as a window function. - -- `row_number()` - Number the current row within its partition starting from 1. -- `first_value(x)` - Return the first non-NULL value evaluated within its ordered frame. -- `last_value(x)` - Return the last non-NULL value evaluated within its ordered frame. -- `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. -- `rank()` - Rank the current row within its partition with gaps. -- `dense_rank()` - Rank the current row within its partition without gaps. -- `lagInFrame(x)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. -- `leadInFrame(x)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. +- `WINDOW` - allows multiple expressions to use the same window definition. ```text PARTITION @@ -112,8 +72,23 @@ These functions can be used only as a window function. └─────────────────┘ <--- UNBOUNDED FOLLOWING (END of the PARTITION) ``` +### Functions + +These functions can be used only as a window function. + +- `row_number()` - Number the current row within its partition starting from 1. +- `first_value(x)` - Return the first non-NULL value evaluated within its ordered frame. +- `last_value(x)` - Return the last non-NULL value evaluated within its ordered frame. +- `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. +- `rank()` - Rank the current row within its partition with gaps. +- `dense_rank()` - Rank the current row within its partition without gaps. +- `lagInFrame(x)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. +- `leadInFrame(x)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. + ## Examples +Let's have a look at some examples of how window functions can be used. + ```sql CREATE TABLE wf_partition ( @@ -589,6 +564,41 @@ ORDER BY └──────────────┴─────────────────────┴───────┴─────────────────────────┘ ``` +## References + +### GitHub Issues + +The roadmap for the initial support of window functions is [in this issue](https://github.com/ClickHouse/ClickHouse/issues/18097). + +All GitHub issues related to window functions have the [comp-window-functions](https://github.com/ClickHouse/ClickHouse/labels/comp-window-functions) tag. + +### Tests + +These tests contain the examples of the currently supported grammar: + +https://github.com/ClickHouse/ClickHouse/blob/master/tests/performance/window_functions.xml + +https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/01591_window_functions.sql + +### Postgres Docs + +https://www.postgresql.org/docs/current/sql-select.html#SQL-WINDOW + +https://www.postgresql.org/docs/devel/sql-expressions.html#SYNTAX-WINDOW-FUNCTIONS + +https://www.postgresql.org/docs/devel/functions-window.html + +https://www.postgresql.org/docs/devel/tutorial-window.html + +### MySQL Docs + +https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html + +https://dev.mysql.com/doc/refman/8.0/en/window-functions-usage.html + +https://dev.mysql.com/doc/refman/8.0/en/window-functions-frames.html + + ## Related Content - Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) diff --git a/docs/zh/operations/settings/settings.md b/docs/zh/operations/settings/settings.md index 1874970ac95..c3b4194ed44 100644 --- a/docs/zh/operations/settings/settings.md +++ b/docs/zh/operations/settings/settings.md @@ -649,11 +649,22 @@ log_query_threads=1 ## max_query_size {#settings-max_query_size} -查询的最大部分,可以被带到RAM用于使用SQL解析器进行解析。 -插入查询还包含由单独的流解析器(消耗O(1)RAM)处理的插入数据,这些数据不包含在此限制中。 +SQL 解析器解析的查询字符串的最大字节数。 INSERT 查询的 VALUES 子句中的数据由单独的流解析器(消耗 O(1) RAM)处理,并且不受此限制的影响。 默认值:256KiB。 + +## max_parser_depth {#max_parser_depth} + +限制递归下降解析器中的最大递归深度。允许控制堆栈大小。 + +可能的值: + +- 正整数。 +- 0 — 递归深度不受限制。 + +默认值:1000。 + ## interactive_delay {#interactive-delay} 以微秒为单位的间隔,用于检查请求执行是否已被取消并发送进度。 @@ -1064,6 +1075,28 @@ ClickHouse生成异常 默认值:0。 +## optimize_functions_to_subcolumns {#optimize_functions_to_subcolumns} + +启用或禁用通过将某些函数转换为读取子列的优化。这减少了要读取的数据量。 + +这些函数可以转化为: + +- [length](../../sql-reference/functions/array-functions.md/#array_functions-length) 读取 [size0](../../sql-reference/data-types/array.md/#array-size)子列。 +- [empty](../../sql-reference/functions/array-functions.md/#empty函数) 读取 [size0](../../sql-reference/data-types/array.md/#array-size)子列。 +- [notEmpty](../../sql-reference/functions/array-functions.md/#notempty函数) 读取 [size0](../../sql-reference/data-types/array.md/#array-size)子列。 +- [isNull](../../sql-reference/operators/index.md#operator-is-null) 读取 [null](../../sql-reference/data-types/nullable. md/#finding-null) 子列。 +- [isNotNull](../../sql-reference/operators/index.md#is-not-null) 读取 [null](../../sql-reference/data-types/nullable. md/#finding-null) 子列。 +- [count](../../sql-reference/aggregate-functions/reference/count.md) 读取 [null](../../sql-reference/data-types/nullable.md/#finding-null) 子列。 +- [mapKeys](../../sql-reference/functions/tuple-map-functions.mdx/#mapkeys) 读取 [keys](../../sql-reference/data-types/map.md/#map-subcolumns) 子列。 +- [mapValues](../../sql-reference/functions/tuple-map-functions.mdx/#mapvalues) 读取 [values](../../sql-reference/data-types/map.md/#map-subcolumns) 子列。 + +可能的值: + +- 0 — 禁用优化。 +- 1 — 优化已启用。 + +默认值:`0`。 + ## distributed_replica_error_half_life {#settings-distributed_replica_error_half_life} - 类型:秒 diff --git a/docs/zh/sql-reference/data-types/array.md b/docs/zh/sql-reference/data-types/array.md index 46c40b889ad..da4cea65101 100644 --- a/docs/zh/sql-reference/data-types/array.md +++ b/docs/zh/sql-reference/data-types/array.md @@ -1,7 +1,7 @@ --- slug: /zh/sql-reference/data-types/array --- -# 阵列(T) {#data-type-array} +# 数组(T) {#data-type-array} 由 `T` 类型元素组成的数组。 @@ -66,3 +66,27 @@ SELECT array(1, 'a') Received exception from server (version 1.1.54388): Code: 386. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: There is no supertype for types UInt8, String because some of them are String/FixedString and some of them are not. ``` + +## 数组大小 {#array-size} + +可以使用 `size0` 子列找到数组的大小,而无需读取整个列。对于多维数组,您可以使用 `sizeN-1`,其中 `N` 是所需的维度。 + +**例子** + +SQL查询: + +```sql +CREATE TABLE t_arr (`arr` Array(Array(Array(UInt32)))) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO t_arr VALUES ([[[12, 13, 0, 1],[12]]]); + +SELECT arr.size0, arr.size1, arr.size2 FROM t_arr; +``` + +结果: + +``` text +┌─arr.size0─┬─arr.size1─┬─arr.size2─┐ +│ 1 │ [2] │ [[4,1]] │ +└───────────┴───────────┴───────────┘ +``` diff --git a/docs/zh/sql-reference/data-types/nullable.md b/docs/zh/sql-reference/data-types/nullable.md index 94311f8298a..b1cc9dd7bae 100644 --- a/docs/zh/sql-reference/data-types/nullable.md +++ b/docs/zh/sql-reference/data-types/nullable.md @@ -20,6 +20,33 @@ slug: /zh/sql-reference/data-types/nullable 掩码文件中的条目允许ClickHouse区分每个表行的对应数据类型的«NULL»和默认值由于有额外的文件,«Nullable»列比普通列消耗更多的存储空间 +## null子列 {#finding-null} + +通过使用 `null` 子列可以在列中查找 `NULL` 值,而无需读取整个列。如果对应的值为 `NULL`,则返回 `1`,否则返回 `0`。 + +**示例** + +SQL查询: + +``` sql +CREATE TABLE nullable (`n` Nullable(UInt32)) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO nullable VALUES (1) (NULL) (2) (NULL); + +SELECT n.null FROM nullable; +``` + +结果: + +``` text +┌─n.null─┐ +│ 0 │ +│ 1 │ +│ 0 │ +│ 1 │ +└────────┘ +``` + ## 用法示例 {#yong-fa-shi-li} ``` sql diff --git a/programs/copier/Internals.cpp b/programs/copier/Internals.cpp index 0cfff7e3f6c..dcd199c6b38 100644 --- a/programs/copier/Internals.cpp +++ b/programs/copier/Internals.cpp @@ -259,7 +259,7 @@ ShardPriority getReplicasPriority(const Cluster::Addresses & replicas, const std res.is_remote = 1; for (const auto & replica : replicas) { - if (isLocalAddress(DNSResolver::instance().resolveHost(replica.host_name))) + if (isLocalAddress(DNSResolver::instance().resolveHostAllInOriginOrder(replica.host_name).front())) { res.is_remote = 0; break; diff --git a/rust/Cargo.lock b/rust/Cargo.lock index dbbe184228c..a242a8243b5 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -6,6 +6,7 @@ version = 3 name = "_ch_rust_prql" version = "0.1.0" dependencies = [ + "anstream", "prqlc", "serde_json", ] @@ -698,9 +699,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" dependencies = [ "adler", ] @@ -751,9 +752,9 @@ dependencies = [ [[package]] name = "object" -version = "0.32.1" +version = "0.32.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" dependencies = [ "memchr", ] diff --git a/rust/prql/Cargo.toml b/rust/prql/Cargo.toml index e70a49658e3..514b5176d48 100644 --- a/rust/prql/Cargo.toml +++ b/rust/prql/Cargo.toml @@ -4,6 +4,7 @@ name = "_ch_rust_prql" version = "0.1.0" [dependencies] +anstream = {version = "0.6.12"} prqlc = {version = "0.11.3", default-features = false} serde_json = "1.0" diff --git a/rust/prql/src/lib.rs b/rust/prql/src/lib.rs index 9e4b0ae194a..2e5b2061fcb 100644 --- a/rust/prql/src/lib.rs +++ b/rust/prql/src/lib.rs @@ -39,6 +39,11 @@ pub unsafe extern "C" fn prql_to_sql_impl( }; if let Ok(sql_str) = prqlc::compile(&query_str, &opts) { + // NOTE: Over at PRQL we're considering to un-deprecate & re-enable the + // `color: false` option. If that happens, we can remove the `strip_str` + // here, which strips color codes from the output. + use anstream::adapter::strip_str; + let sql_str = strip_str(&sql_str).to_string(); set_output(sql_str, out, out_size); 0 } else { @@ -54,17 +59,50 @@ pub unsafe extern "C" fn prql_to_sql( out: *mut *mut u8, out_size: *mut u64, ) -> i64 { - let ret = panic::catch_unwind(|| { - return prql_to_sql_impl(query, size, out, out_size); - }); - return match ret { - // NOTE: using cxxbridge we can return proper Result<> type. - Err(_err) => 1, - Ok(res) => res, - } + // NOTE: using cxxbridge we can return proper Result<> type. + panic::catch_unwind(|| prql_to_sql_impl(query, size, out, out_size)).unwrap_or_else(|_| { + set_output("prqlc panicked".to_string(), out, out_size); + 1 + }) } #[no_mangle] pub unsafe extern "C" fn prql_free_pointer(ptr_to_free: *mut u8) { std::mem::drop(CString::from_raw(ptr_to_free as *mut c_char)); } + +#[cfg(test)] +mod tests { + use super::*; + use std::ffi::{CStr, CString}; + + /// A test helper to offer a rust interface to the C bindings + fn run_compile(query: &str) -> (String, i64) { + let query_cstr = CString::new(query).unwrap(); + let query_ptr = query_cstr.as_ptr() as *const u8; + let query_size = query_cstr.to_bytes_with_nul().len() as u64 - 1; // Excluding the null terminator + + let mut out: *mut u8 = std::ptr::null_mut(); + let mut out_size = 0_u64; + + unsafe { + let success = prql_to_sql(query_ptr, query_size, &mut out, &mut out_size); + let output = CStr::from_ptr(out as *const i8) + .to_str() + .unwrap() + .to_string(); + prql_free_pointer(out); + (output, success) + } + } + + #[test] + fn test_prql_to_sql() { + assert!(run_compile("from x").0.contains("SELECT")); + assert!(run_compile("asdf").1 == 1); + // In prqlc 0.11.3, this is a panic, so that allows us to test that the + // panic is caught. When we upgrade prqlc, it won't be a panic any + // longer. + assert!(run_compile("x -> y").1 == 1); + } +} diff --git a/src/Access/Common/AllowedClientHosts.cpp b/src/Access/Common/AllowedClientHosts.cpp index c677465a7a1..bee0cdd7264 100644 --- a/src/Access/Common/AllowedClientHosts.cpp +++ b/src/Access/Common/AllowedClientHosts.cpp @@ -55,7 +55,7 @@ namespace { IPAddress addr_v6 = toIPv6(address); - auto host_addresses = DNSResolver::instance().resolveHostAll(host); + auto host_addresses = DNSResolver::instance().resolveHostAllInOriginOrder(host); for (const auto & addr : host_addresses) { diff --git a/src/Access/Common/QuotaDefs.cpp b/src/Access/Common/QuotaDefs.cpp index 0e9a4d5a365..04c16a562d2 100644 --- a/src/Access/Common/QuotaDefs.cpp +++ b/src/Access/Common/QuotaDefs.cpp @@ -49,71 +49,135 @@ String QuotaTypeInfo::valueToStringWithName(QuotaValue value) const const QuotaTypeInfo & QuotaTypeInfo::get(QuotaType type) { - static constexpr auto make_info = [](const char * raw_name_, UInt64 output_denominator_) + static constexpr auto make_info = [](const char * raw_name_, String current_usage_description_, String max_allowed_usage_description_, UInt64 output_denominator_) { String init_name = raw_name_; boost::to_lower(init_name); String init_keyword = raw_name_; boost::replace_all(init_keyword, "_", " "); bool init_output_as_float = (output_denominator_ != 1); - return QuotaTypeInfo{raw_name_, std::move(init_name), std::move(init_keyword), init_output_as_float, output_denominator_}; + return QuotaTypeInfo + { + .raw_name = raw_name_, + .name = std::move(init_name), + .keyword = std::move(init_keyword), + .current_usage_description = std::move(current_usage_description_), + .max_allowed_usage_description = std::move(max_allowed_usage_description_), + .output_as_float = init_output_as_float, + .output_denominator = output_denominator_ + }; }; switch (type) { case QuotaType::QUERIES: { - static const auto info = make_info("QUERIES", 1); + static const auto info = make_info( + "QUERIES", + "The current number of executed queries.", + "The maximum allowed number of queries of all types allowed to be executed.", + 1 + ); return info; } case QuotaType::QUERY_SELECTS: { - static const auto info = make_info("QUERY_SELECTS", 1); + static const auto info = make_info( + "QUERY_SELECTS", + "The current number of executed SELECT queries.", + "The maximum allowed number of SELECT queries allowed to be executed.", + 1 + ); return info; } case QuotaType::QUERY_INSERTS: { - static const auto info = make_info("QUERY_INSERTS", 1); + static const auto info = make_info( + "QUERY_INSERTS", + "The current number of executed INSERT queries.", + "The maximum allowed number of INSERT queries allowed to be executed.", + 1 + ); return info; } case QuotaType::ERRORS: { - static const auto info = make_info("ERRORS", 1); + static const auto info = make_info( + "ERRORS", + "The current number of queries resulted in an error.", + "The maximum number of queries resulted in an error allowed within the specified period of time.", + 1 + ); return info; } case QuotaType::RESULT_ROWS: { - static const auto info = make_info("RESULT_ROWS", 1); + static const auto info = make_info( + "RESULT_ROWS", + "The current total number of rows in the result set of all queries within the current period of time.", + "The maximum total number of rows in the result set of all queries allowed within the specified period of time.", + 1 + ); return info; } case QuotaType::RESULT_BYTES: { - static const auto info = make_info("RESULT_BYTES", 1); + static const auto info = make_info( + "RESULT_BYTES", + "The current total number of bytes in the result set of all queries within the current period of time.", + "The maximum total number of bytes in the result set of all queries allowed within the specified period of time.", + 1 + ); return info; } case QuotaType::READ_ROWS: { - static const auto info = make_info("READ_ROWS", 1); + static const auto info = make_info( + "READ_ROWS", + "The current total number of rows read during execution of all queries within the current period of time.", + "The maximum number of rows to read during execution of all queries allowed within the specified period of time.", + 1 + ); return info; } case QuotaType::READ_BYTES: { - static const auto info = make_info("READ_BYTES", 1); + static const auto info = make_info( + "READ_BYTES", + "The current total number of bytes read during execution of all queries within the current period of time.", + "The maximum number of bytes to read during execution of all queries allowed within the specified period of time.", + 1 + ); return info; } case QuotaType::EXECUTION_TIME: { - static const auto info = make_info("EXECUTION_TIME", 1000000000 /* execution_time is stored in nanoseconds */); + static const auto info = make_info( + "EXECUTION_TIME", + "The current total amount of time (in nanoseconds) spent to execute queries within the current period of time", + "The maximum amount of time (in nanoseconds) allowed for all queries to execute within the specified period of time", + 1000000000 /* execution_time is stored in nanoseconds */ + ); return info; } case QuotaType::WRITTEN_BYTES: { - static const auto info = make_info("WRITTEN_BYTES", 1); + static const auto info = make_info( + "WRITTEN_BYTES", + "The current total number of bytes written during execution of all queries within the current period of time.", + "The maximum number of bytes to written during execution of all queries allowed within the specified period of time.", + 1 + ); return info; } case QuotaType::FAILED_SEQUENTIAL_AUTHENTICATIONS: { - static const auto info = make_info("FAILED_SEQUENTIAL_AUTHENTICATIONS", 1); + static const auto info = make_info( + "FAILED_SEQUENTIAL_AUtheNTICATIONS", + "The current number of consecutive authentication failures within the current period of time.", + "The maximum number of consecutive authentication failures allowed within the specified period of time.", + 1 + ); return info; } case QuotaType::MAX: break; diff --git a/src/Access/Common/QuotaDefs.h b/src/Access/Common/QuotaDefs.h index 4f849a72b43..6618f01c8f9 100644 --- a/src/Access/Common/QuotaDefs.h +++ b/src/Access/Common/QuotaDefs.h @@ -33,6 +33,8 @@ struct QuotaTypeInfo const char * const raw_name = ""; const String name; /// Lowercased with underscores, e.g. "result_rows". const String keyword; /// Uppercased with spaces, e.g. "RESULT ROWS". + const String current_usage_description; + const String max_allowed_usage_description; const bool output_as_float = false; const UInt64 output_denominator = 1; String valueToString(QuotaValue value) const; diff --git a/src/Access/Common/RowPolicyDefs.cpp b/src/Access/Common/RowPolicyDefs.cpp index b1f882fe971..a9509b6dd76 100644 --- a/src/Access/Common/RowPolicyDefs.cpp +++ b/src/Access/Common/RowPolicyDefs.cpp @@ -33,7 +33,7 @@ String toString(RowPolicyFilterType type) const RowPolicyFilterTypeInfo & RowPolicyFilterTypeInfo::get(RowPolicyFilterType type_) { - static constexpr auto make_info = [](const char * raw_name_) + static constexpr auto make_info = [](const char * raw_name_, const String & comment_) { String init_name = raw_name_; boost::to_lower(init_name); @@ -41,14 +41,17 @@ const RowPolicyFilterTypeInfo & RowPolicyFilterTypeInfo::get(RowPolicyFilterType String init_command = init_name.substr(0, underscore_pos); boost::to_upper(init_command); bool init_is_check = (std::string_view{init_name}.substr(underscore_pos + 1) == "check"); - return RowPolicyFilterTypeInfo{raw_name_, std::move(init_name), std::move(init_command), init_is_check}; + return RowPolicyFilterTypeInfo{raw_name_, std::move(init_name), std::move(init_command), comment_, init_is_check}; }; switch (type_) { case RowPolicyFilterType::SELECT_FILTER: { - static const auto info = make_info("SELECT_FILTER"); + static const auto info = make_info( + "SELECT_FILTER", + "Expression which is used for filtering in SELECT queries." + ); return info; } #if 0 /// Row-level security for INSERT, UPDATE, DELETE is not implemented yet. diff --git a/src/Access/Common/RowPolicyDefs.h b/src/Access/Common/RowPolicyDefs.h index 7ffc99e1272..bf2f632e98b 100644 --- a/src/Access/Common/RowPolicyDefs.h +++ b/src/Access/Common/RowPolicyDefs.h @@ -52,6 +52,7 @@ struct RowPolicyFilterTypeInfo const char * const raw_name; const String name; /// Lowercased with underscores, e.g. "select_filter". const String command; /// Uppercased without last word, e.g. "SELECT". + const String description; const bool is_check; /// E.g. false for SELECT_FILTER. static const RowPolicyFilterTypeInfo & get(RowPolicyFilterType type); }; diff --git a/src/Analyzer/ConstantNode.cpp b/src/Analyzer/ConstantNode.cpp index 69bed3dbe90..e26500a9886 100644 --- a/src/Analyzer/ConstantNode.cpp +++ b/src/Analyzer/ConstantNode.cpp @@ -1,5 +1,7 @@ #include +#include + #include #include #include @@ -38,52 +40,9 @@ ConstantNode::ConstantNode(Field value_) : ConstantNode(value_, applyVisitor(FieldToDataType(), value_)) {} -void ConstantNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const -{ - buffer << std::string(indent, ' ') << "CONSTANT id: " << format_state.getNodeId(this); - - if (hasAlias()) - buffer << ", alias: " << getAlias(); - - buffer << ", constant_value: " << constant_value->getValue().dump(); - buffer << ", constant_value_type: " << constant_value->getType()->getName(); - - if (getSourceExpression()) - { - buffer << '\n' << std::string(indent + 2, ' ') << "EXPRESSION" << '\n'; - getSourceExpression()->dumpTreeImpl(buffer, format_state, indent + 4); - } -} - -bool ConstantNode::isEqualImpl(const IQueryTreeNode & rhs) const -{ - const auto & rhs_typed = assert_cast(rhs); - return *constant_value == *rhs_typed.constant_value && value_string == rhs_typed.value_string; -} - -void ConstantNode::updateTreeHashImpl(HashState & hash_state) const -{ - auto type_name = constant_value->getType()->getName(); - hash_state.update(type_name.size()); - hash_state.update(type_name); - - hash_state.update(value_string.size()); - hash_state.update(value_string); -} - -QueryTreeNodePtr ConstantNode::cloneImpl() const -{ - return std::make_shared(constant_value, source_expression); -} - -ASTPtr ConstantNode::toASTImpl(const ConvertToASTOptions & options) const +bool ConstantNode::requiresCastCall() const { const auto & constant_value_literal = constant_value->getValue(); - auto constant_value_ast = std::make_shared(constant_value_literal); - - if (!options.add_cast_for_constants) - return constant_value_ast; - bool need_to_add_cast_function = false; auto constant_value_literal_type = constant_value_literal.getType(); WhichDataType constant_value_type(constant_value->getType()); @@ -131,7 +90,72 @@ ASTPtr ConstantNode::toASTImpl(const ConvertToASTOptions & options) const // Add cast if constant was created as a result of constant folding. // Constant folding may lead to type transformation and literal on shard // may have a different type. - if (need_to_add_cast_function || source_expression != nullptr) + return need_to_add_cast_function || source_expression != nullptr; +} + +bool ConstantNode::receivedFromInitiatorServer() const +{ + if (!hasSourceExpression()) + return false; + + auto * cast_function = getSourceExpression()->as(); + if (!cast_function || cast_function->getFunctionName() != "_CAST") + return false; + return true; +} + +void ConstantNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const +{ + buffer << std::string(indent, ' ') << "CONSTANT id: " << format_state.getNodeId(this); + + if (hasAlias()) + buffer << ", alias: " << getAlias(); + + buffer << ", constant_value: "; + if (mask_id) + buffer << "[HIDDEN id: " << mask_id << "]"; + else + buffer << constant_value->getValue().dump(); + + buffer << ", constant_value_type: " << constant_value->getType()->getName(); + + if (!mask_id && getSourceExpression()) + { + buffer << '\n' << std::string(indent + 2, ' ') << "EXPRESSION" << '\n'; + getSourceExpression()->dumpTreeImpl(buffer, format_state, indent + 4); + } +} + +bool ConstantNode::isEqualImpl(const IQueryTreeNode & rhs) const +{ + const auto & rhs_typed = assert_cast(rhs); + return *constant_value == *rhs_typed.constant_value && value_string == rhs_typed.value_string; +} + +void ConstantNode::updateTreeHashImpl(HashState & hash_state) const +{ + auto type_name = constant_value->getType()->getName(); + hash_state.update(type_name.size()); + hash_state.update(type_name); + + hash_state.update(value_string.size()); + hash_state.update(value_string); +} + +QueryTreeNodePtr ConstantNode::cloneImpl() const +{ + return std::make_shared(constant_value, source_expression); +} + +ASTPtr ConstantNode::toASTImpl(const ConvertToASTOptions & options) const +{ + const auto & constant_value_literal = constant_value->getValue(); + auto constant_value_ast = std::make_shared(constant_value_literal); + + if (!options.add_cast_for_constants) + return constant_value_ast; + + if (requiresCastCall()) { auto constant_type_name_ast = std::make_shared(constant_value->getType()->getName()); return makeASTFunction("_CAST", std::move(constant_value_ast), std::move(constant_type_name_ast)); diff --git a/src/Analyzer/ConstantNode.h b/src/Analyzer/ConstantNode.h index 51c98a4a3b3..b065853e315 100644 --- a/src/Analyzer/ConstantNode.h +++ b/src/Analyzer/ConstantNode.h @@ -75,6 +75,17 @@ public: return constant_value->getType(); } + /// Check if conversion to AST requires wrapping with _CAST function. + bool requiresCastCall() const; + + /// Check if constant is a result of _CAST function constant folding. + bool receivedFromInitiatorServer() const; + + void setMaskId(size_t id) + { + mask_id = id; + } + void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: @@ -90,6 +101,7 @@ private: ConstantValuePtr constant_value; String value_string; QueryTreeNodePtr source_expression; + size_t mask_id = 0; static constexpr size_t children_size = 0; }; diff --git a/src/Analyzer/FunctionSecretArgumentsFinderTreeNode.h b/src/Analyzer/FunctionSecretArgumentsFinderTreeNode.h new file mode 100644 index 00000000000..439ddffe5e5 --- /dev/null +++ b/src/Analyzer/FunctionSecretArgumentsFinderTreeNode.h @@ -0,0 +1,372 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +namespace DB +{ + + +/// Finds arguments of a specified function which should not be displayed for most users for security reasons. +/// That involves passwords and secret keys. +class FunctionSecretArgumentsFinderTreeNode +{ +public: + explicit FunctionSecretArgumentsFinderTreeNode(const FunctionNode & function_) : function(function_), arguments(function.getArguments()) + { + if (arguments.getNodes().empty()) + return; + + findFunctionSecretArguments(); + } + + struct Result + { + /// Result constructed by default means no arguments will be hidden. + size_t start = static_cast(-1); + size_t count = 0; /// Mostly it's either 0 or 1. There are only a few cases where `count` can be greater than 1 (e.g. see `encrypt`). + /// In all known cases secret arguments are consecutive + bool are_named = false; /// Arguments like `password = 'password'` are considered as named arguments. + /// E.g. "headers" in `url('..', headers('foo' = '[HIDDEN]'))` + std::vector nested_maps; + + bool hasSecrets() const + { + return count != 0 || !nested_maps.empty(); + } + }; + + FunctionSecretArgumentsFinder::Result getResult() const { return result; } + +private: + const FunctionNode & function; + const ListNode & arguments; + FunctionSecretArgumentsFinder::Result result; + + void markSecretArgument(size_t index, bool argument_is_named = false) + { + if (index >= arguments.getNodes().size()) + return; + if (!result.count) + { + result.start = index; + result.are_named = argument_is_named; + } + chassert(index >= result.start); /// We always check arguments consecutively + result.count = index + 1 - result.start; + if (!argument_is_named) + result.are_named = false; + } + + void findFunctionSecretArguments() + { + const auto & name = function.getFunctionName(); + + if ((name == "mysql") || (name == "postgresql") || (name == "mongodb")) + { + /// mysql('host:port', 'database', 'table', 'user', 'password', ...) + /// postgresql('host:port', 'database', 'table', 'user', 'password', ...) + /// mongodb('host:port', 'database', 'collection', 'user', 'password', ...) + findMySQLFunctionSecretArguments(); + } + else if ((name == "s3") || (name == "cosn") || (name == "oss") || + (name == "deltaLake") || (name == "hudi") || (name == "iceberg")) + { + /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) + findS3FunctionSecretArguments(/* is_cluster_function= */ false); + } + else if (name == "s3Cluster") + { + /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...) + findS3FunctionSecretArguments(/* is_cluster_function= */ true); + } + else if ((name == "remote") || (name == "remoteSecure")) + { + /// remote('addresses_expr', 'db', 'table', 'user', 'password', ...) + findRemoteFunctionSecretArguments(); + } + else if ((name == "encrypt") || (name == "decrypt") || + (name == "aes_encrypt_mysql") || (name == "aes_decrypt_mysql") || + (name == "tryDecrypt")) + { + /// encrypt('mode', 'plaintext', 'key' [, iv, aad]) + findEncryptionFunctionSecretArguments(); + } + else if (name == "url") + { + findURLSecretArguments(); + } + } + + void findMySQLFunctionSecretArguments() + { + if (isNamedCollectionName(0)) + { + /// mysql(named_collection, ..., password = 'password', ...) + findSecretNamedArgument("password", 1); + } + else + { + /// mysql('host:port', 'database', 'table', 'user', 'password', ...) + markSecretArgument(4); + } + } + + /// Returns the number of arguments excluding "headers" and "extra_credentials" (which should + /// always be at the end). Marks "headers" as secret, if found. + size_t excludeS3OrURLNestedMaps() + { + const auto & nodes = arguments.getNodes(); + size_t count = nodes.size(); + while (count > 0) + { + const FunctionNode * f = nodes.at(count - 1)->as(); + if (!f) + break; + if (f->getFunctionName() == "headers") + result.nested_maps.push_back(f->getFunctionName()); + else if (f->getFunctionName() != "extra_credentials") + break; + count -= 1; + } + return count; + } + + void findS3FunctionSecretArguments(bool is_cluster_function) + { + /// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument. + size_t url_arg_idx = is_cluster_function ? 1 : 0; + + if (!is_cluster_function && isNamedCollectionName(0)) + { + /// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...) + findSecretNamedArgument("secret_access_key", 1); + return; + } + + /// We should check other arguments first because we don't need to do any replacement in case of + /// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)]) + /// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)]) + size_t count = excludeS3OrURLNestedMaps(); + if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4)) + { + String second_arg; + if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg)) + { + if (boost::iequals(second_arg, "NOSIGN")) + return; /// The argument after 'url' is "NOSIGN". + + if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg)) + return; /// The argument after 'url' is a format: s3('url', 'format', ...) + } + } + + /// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures: + /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) + /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') + if (url_arg_idx + 2 < count) + markSecretArgument(url_arg_idx + 2); + } + + void findURLSecretArguments() + { + if (!isNamedCollectionName(0)) + excludeS3OrURLNestedMaps(); + } + + bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const + { + if (arg_idx >= arguments.getNodes().size()) + return false; + + return tryGetStringFromArgument(arguments.getNodes()[arg_idx], res, allow_identifier); + } + + static bool tryGetStringFromArgument(const QueryTreeNodePtr argument, String * res, bool allow_identifier = true) + { + if (const auto * literal = argument->as()) + { + if (literal->getValue().getType() != Field::Types::String) + return false; + if (res) + *res = literal->getValue().safeGet(); + return true; + } + + if (allow_identifier) + { + if (const auto * id = argument->as()) + { + if (res) + *res = id->getIdentifier().getFullName(); + return true; + } + } + + return false; + } + + void findRemoteFunctionSecretArguments() + { + if (isNamedCollectionName(0)) + { + /// remote(named_collection, ..., password = 'password', ...) + findSecretNamedArgument("password", 1); + return; + } + + /// We're going to replace 'password' with '[HIDDEN'] for the following signatures: + /// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key]) + /// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key]) + /// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key]) + + /// But we should check the number of arguments first because we don't need to do any replacements in case of + /// remote('addresses_expr', db.table) + if (arguments.getNodes().size() < 3) + return; + + size_t arg_num = 1; + + /// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'. + const auto * table_function = arguments.getNodes()[arg_num]->as(); + if (table_function && KnownTableFunctionNames::instance().exists(table_function->getFunctionName())) + { + ++arg_num; + } + else + { + std::optional database; + std::optional qualified_table_name; + if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name)) + { + /// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'. + /// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user' + /// before the argument 'password'. So it's safer to wipe two arguments just in case. + /// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string + /// before wiping it (because the `password` argument is always a literal string). + if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false)) + { + /// Wipe either `password` or `user`. + markSecretArgument(arg_num + 2); + } + if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false)) + { + /// Wipe either `password` or `sharding_key`. + markSecretArgument(arg_num + 3); + } + return; + } + + /// Skip the current argument (which is either a database name or a qualified table name). + ++arg_num; + if (database) + { + /// Skip the 'table' argument if the previous argument was a database name. + ++arg_num; + } + } + + /// Skip username. + ++arg_num; + + /// Do our replacement: + /// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...) + /// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string + /// before wiping it (because the `password` argument is always a literal string). + bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false); + if (can_be_password) + markSecretArgument(arg_num); + } + + /// Tries to get either a database name or a qualified table name from an argument. + /// Empty string is also allowed (it means the default database). + /// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password. + bool tryGetDatabaseNameOrQualifiedTableName( + size_t arg_idx, + std::optional & res_database, + std::optional & res_qualified_table_name) const + { + res_database.reset(); + res_qualified_table_name.reset(); + + String str; + if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true)) + return false; + + if (str.empty()) + { + res_database = ""; + return true; + } + + auto qualified_table_name = QualifiedTableName::tryParseFromString(str); + if (!qualified_table_name) + return false; + + if (qualified_table_name->database.empty()) + res_database = std::move(qualified_table_name->table); + else + res_qualified_table_name = std::move(qualified_table_name); + return true; + } + + void findEncryptionFunctionSecretArguments() + { + if (arguments.getNodes().empty()) + return; + + /// We replace all arguments after 'mode' with '[HIDDEN]': + /// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]') + result.start = 1; + result.count = arguments.getNodes().size() - 1; + } + + + /// Whether a specified argument can be the name of a named collection? + bool isNamedCollectionName(size_t arg_idx) const + { + if (arguments.getNodes().size() <= arg_idx) + return false; + + const auto * identifier = arguments.getNodes()[arg_idx]->as(); + return identifier != nullptr; + } + + /// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified. + void findSecretNamedArgument(const std::string_view & key, size_t start = 0) + { + for (size_t i = start; i < arguments.getNodes().size(); ++i) + { + const auto & argument = arguments.getNodes()[i]; + const auto * equals_func = argument->as(); + if (!equals_func || (equals_func->getFunctionName() != "equals")) + continue; + + const auto * expr_list = equals_func->getArguments().as(); + if (!expr_list) + continue; + + const auto & equal_args = expr_list->getNodes(); + if (equal_args.size() != 2) + continue; + + String found_key; + if (!tryGetStringFromArgument(equal_args[0], &found_key)) + continue; + + if (found_key == key) + markSecretArgument(i, /* argument_is_named= */ true); + } + } +}; + +} diff --git a/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp b/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp index cc334cde9c7..0c37749c706 100644 --- a/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp +++ b/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp @@ -94,7 +94,8 @@ public: if (!func_node || func_node->getArguments().getNodes().size() != 1) return; - const auto * column_id = func_node->getArguments().getNodes()[0]->as(); + const auto & argument_node = func_node->getArguments().getNodes()[0]; + const auto * column_id = argument_node->as(); if (!column_id) return; @@ -119,7 +120,7 @@ public: if (!preimage_range) return; - const auto new_node = generateOptimizedDateFilter(comparator, *column_id, *preimage_range); + const auto new_node = generateOptimizedDateFilter(comparator, argument_node, *preimage_range); if (!new_node) return; @@ -128,20 +129,22 @@ public: } private: - QueryTreeNodePtr - generateOptimizedDateFilter(const String & comparator, const ColumnNode & column_node, const std::pair & range) const + QueryTreeNodePtr generateOptimizedDateFilter( + const String & comparator, const QueryTreeNodePtr & column_node, const std::pair & range) const { const DateLUTImpl & date_lut = DateLUT::instance("UTC"); String start_date_or_date_time; String end_date_or_date_time; - if (isDateOrDate32(column_node.getColumnType().get())) + const auto & column_node_typed = column_node->as(); + const auto & column_type = column_node_typed.getColumnType().get(); + if (isDateOrDate32(column_type)) { start_date_or_date_time = date_lut.dateToString(range.first.get()); end_date_or_date_time = date_lut.dateToString(range.second.get()); } - else if (isDateTime(column_node.getColumnType().get()) || isDateTime64(column_node.getColumnType().get())) + else if (isDateTime(column_type) || isDateTime64(column_type)) { start_date_or_date_time = date_lut.timeToString(range.first.get()); end_date_or_date_time = date_lut.timeToString(range.second.get()); @@ -151,69 +154,29 @@ private: if (comparator == "equals") { - const auto lhs = std::make_shared("greaterOrEquals"); - lhs->getArguments().getNodes().push_back(std::make_shared(column_node.getColumn(), column_node.getColumnSource())); - lhs->getArguments().getNodes().push_back(std::make_shared(start_date_or_date_time)); - resolveOrdinaryFunctionNode(*lhs, lhs->getFunctionName()); - - const auto rhs = std::make_shared("less"); - rhs->getArguments().getNodes().push_back(std::make_shared(column_node.getColumn(), column_node.getColumnSource())); - rhs->getArguments().getNodes().push_back(std::make_shared(end_date_or_date_time)); - resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName()); - - const auto new_date_filter = std::make_shared("and"); - new_date_filter->getArguments().getNodes() = {lhs, rhs}; - resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName()); - - return new_date_filter; + return createFunctionNode( + "and", + createFunctionNode("greaterOrEquals", column_node, std::make_shared(start_date_or_date_time)), + createFunctionNode("less", column_node, std::make_shared(end_date_or_date_time))); } else if (comparator == "notEquals") { - const auto lhs = std::make_shared("less"); - lhs->getArguments().getNodes().push_back(std::make_shared(column_node.getColumn(), column_node.getColumnSource())); - lhs->getArguments().getNodes().push_back(std::make_shared(start_date_or_date_time)); - resolveOrdinaryFunctionNode(*lhs, lhs->getFunctionName()); - - const auto rhs = std::make_shared("greaterOrEquals"); - rhs->getArguments().getNodes().push_back(std::make_shared(column_node.getColumn(), column_node.getColumnSource())); - rhs->getArguments().getNodes().push_back(std::make_shared(end_date_or_date_time)); - resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName()); - - const auto new_date_filter = std::make_shared("or"); - new_date_filter->getArguments().getNodes() = {lhs, rhs}; - resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName()); - - return new_date_filter; + return createFunctionNode( + "or", + createFunctionNode("less", column_node, std::make_shared(start_date_or_date_time)), + createFunctionNode("greaterOrEquals", column_node, std::make_shared(end_date_or_date_time))); } else if (comparator == "greater") { - const auto new_date_filter = std::make_shared("greaterOrEquals"); - new_date_filter->getArguments().getNodes().push_back( - std::make_shared(column_node.getColumn(), column_node.getColumnSource())); - new_date_filter->getArguments().getNodes().push_back(std::make_shared(end_date_or_date_time)); - resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName()); - - return new_date_filter; + return createFunctionNode("greaterOrEquals", column_node, std::make_shared(end_date_or_date_time)); } else if (comparator == "lessOrEquals") { - const auto new_date_filter = std::make_shared("less"); - new_date_filter->getArguments().getNodes().push_back( - std::make_shared(column_node.getColumn(), column_node.getColumnSource())); - new_date_filter->getArguments().getNodes().push_back(std::make_shared(end_date_or_date_time)); - resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName()); - - return new_date_filter; + return createFunctionNode("less", column_node, std::make_shared(end_date_or_date_time)); } else if (comparator == "less" || comparator == "greaterOrEquals") { - const auto new_date_filter = std::make_shared(comparator); - new_date_filter->getArguments().getNodes().push_back( - std::make_shared(column_node.getColumn(), column_node.getColumnSource())); - new_date_filter->getArguments().getNodes().push_back(std::make_shared(start_date_or_date_time)); - resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName()); - - return new_date_filter; + return createFunctionNode(comparator, column_node, std::make_shared(start_date_or_date_time)); } else [[unlikely]] { @@ -224,10 +187,17 @@ private: } } - void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const + template + QueryTreeNodePtr createFunctionNode(const String & function_name, Args &&... args) const { auto function = FunctionFactory::instance().get(function_name, getContext()); - function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); + const auto function_node = std::make_shared(function_name); + auto & new_arguments = function_node->getArguments().getNodes(); + new_arguments.reserve(sizeof...(args)); + (new_arguments.push_back(std::forward(args)), ...); + function_node->resolveAsFunction(function->build(function_node->getArgumentColumns())); + + return function_node; } }; diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 1f81ac54078..c62641ca05c 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -706,7 +707,10 @@ struct IdentifierResolveScope { subquery_depth = parent_scope->subquery_depth; context = parent_scope->context; + projection_mask_map = parent_scope->projection_mask_map; } + else + projection_mask_map = std::make_shared>(); if (auto * union_node = scope_node->as()) { @@ -718,6 +722,11 @@ struct IdentifierResolveScope group_by_use_nulls = context->getSettingsRef().group_by_use_nulls && (query_node->isGroupByWithGroupingSets() || query_node->isGroupByWithRollup() || query_node->isGroupByWithCube()); } + + if (context) + join_use_nulls = context->getSettingsRef().join_use_nulls; + else if (parent_scope) + join_use_nulls = parent_scope->join_use_nulls; } QueryTreeNodePtr scope_node; @@ -772,6 +781,8 @@ struct IdentifierResolveScope /// Apply nullability to aggregation keys bool group_by_use_nulls = false; + /// Join retutns NULLs instead of default values + bool join_use_nulls = false; /// JOINs count size_t joins_count = 0; @@ -784,6 +795,9 @@ struct IdentifierResolveScope */ QueryTreeNodePtr expression_join_tree_node; + /// Node hash to mask id map + std::shared_ptr> projection_mask_map; + [[maybe_unused]] const IdentifierResolveScope * getNearestQueryScope() const { const IdentifierResolveScope * scope_to_check = this; @@ -1068,6 +1082,8 @@ private: class QueryAnalyzer { public: + explicit QueryAnalyzer(bool only_analyze_) : only_analyze(only_analyze_) {} + void resolve(QueryTreeNodePtr & node, const QueryTreeNodePtr & table_expression, ContextPtr context) { IdentifierResolveScope scope(node, nullptr /*parent_scope*/); @@ -1430,6 +1446,7 @@ private: /// Global scalar subquery to scalar value map std::unordered_map scalar_subquery_to_scalar_value; + const bool only_analyze; }; /// Utility functions implementation @@ -1977,80 +1994,96 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden auto interpreter = std::make_unique(node->toAST(), subquery_context, subquery_context->getViewSource(), options); auto io = interpreter->execute(); - PullingAsyncPipelineExecutor executor(io.pipeline); io.pipeline.setProgressCallback(context->getProgressCallback()); io.pipeline.setProcessListElement(context->getProcessListElement()); - Block block; - - while (block.rows() == 0 && executor.pull(block)) + if (only_analyze) { - } - - if (block.rows() == 0) - { - auto types = interpreter->getSampleBlock().getDataTypes(); - if (types.size() != 1) - types = {std::make_shared(types)}; - - auto & type = types[0]; - if (!type->isNullable()) + /// If query is only analyzed, then constants are not correct. + scalar_block = interpreter->getSampleBlock(); + for (auto & column : scalar_block) { - if (!type->canBeInsideNullable()) - throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, - "Scalar subquery returned empty result of type {} which cannot be Nullable", - type->getName()); - - type = makeNullable(type); + if (column.column->empty()) + { + auto mut_col = column.column->cloneEmpty(); + mut_col->insertDefault(); + column.column = std::move(mut_col); + } } - - auto scalar_column = type->createColumn(); - scalar_column->insert(Null()); - scalar_block.insert({std::move(scalar_column), type, "null"}); } else { - if (block.rows() != 1) - throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row"); + Block block; - Block tmp_block; - while (tmp_block.rows() == 0 && executor.pull(tmp_block)) + while (block.rows() == 0 && executor.pull(block)) { } - if (tmp_block.rows() != 0) - throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row"); - - block = materializeBlock(block); - size_t columns = block.columns(); - - if (columns == 1) + if (block.rows() == 0) { - auto & column = block.getByPosition(0); - /// Here we wrap type to nullable if we can. - /// It is needed cause if subquery return no rows, it's result will be Null. - /// In case of many columns, do not check it cause tuple can't be nullable. - if (!column.type->isNullable() && column.type->canBeInsideNullable()) + auto types = interpreter->getSampleBlock().getDataTypes(); + if (types.size() != 1) + types = {std::make_shared(types)}; + + auto & type = types[0]; + if (!type->isNullable()) { - column.type = makeNullable(column.type); - column.column = makeNullable(column.column); + if (!type->canBeInsideNullable()) + throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, + "Scalar subquery returned empty result of type {} which cannot be Nullable", + type->getName()); + + type = makeNullable(type); } - scalar_block = block; + auto scalar_column = type->createColumn(); + scalar_column->insert(Null()); + scalar_block.insert({std::move(scalar_column), type, "null"}); } else { - /** Make unique column names for tuple. - * - * Example: SELECT (SELECT 2 AS x, x) - */ - makeUniqueColumnNamesInBlock(block); + if (block.rows() != 1) + throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row"); - scalar_block.insert({ - ColumnTuple::create(block.getColumns()), - std::make_shared(block.getDataTypes(), block.getNames()), - "tuple"}); + Block tmp_block; + while (tmp_block.rows() == 0 && executor.pull(tmp_block)) + { + } + + if (tmp_block.rows() != 0) + throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row"); + + block = materializeBlock(block); + size_t columns = block.columns(); + + if (columns == 1) + { + auto & column = block.getByPosition(0); + /// Here we wrap type to nullable if we can. + /// It is needed cause if subquery return no rows, it's result will be Null. + /// In case of many columns, do not check it cause tuple can't be nullable. + if (!column.type->isNullable() && column.type->canBeInsideNullable()) + { + column.type = makeNullable(column.type); + column.column = makeNullable(column.column); + } + + scalar_block = block; + } + else + { + /** Make unique column names for tuple. + * + * Example: SELECT (SELECT 2 AS x, x) + */ + makeUniqueColumnNamesInBlock(block); + + scalar_block.insert({ + ColumnTuple::create(block.getColumns()), + std::make_shared(block.getDataTypes(), block.getNames()), + "tuple"}); + } } } @@ -3286,7 +3319,6 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo QueryTreeNodePtr resolved_identifier; JoinKind join_kind = from_join_node.getKind(); - bool join_use_nulls = scope.context->getSettingsRef().join_use_nulls; /// If columns from left or right table were missed Object(Nullable('json')) subcolumns, they will be replaced /// to ConstantNode(NULL), which can't be cast to ColumnNode, so we resolve it here. @@ -3451,7 +3483,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo if (join_node_in_resolve_process || !resolved_identifier) return resolved_identifier; - if (join_use_nulls) + if (scope.join_use_nulls) { resolved_identifier = resolved_identifier->clone(); convertJoinedColumnTypeToNullIfNeeded(resolved_identifier, join_kind, resolved_side); @@ -4439,7 +4471,7 @@ ProjectionNames QueryAnalyzer::resolveMatcher(QueryTreeNodePtr & matcher_node, I else matched_expression_nodes_with_names = resolveUnqualifiedMatcher(matcher_node, scope); - if (scope.context->getSettingsRef().join_use_nulls) + if (scope.join_use_nulls) { /** If we are resolving matcher came from the result of JOIN and `join_use_nulls` is set, * we need to convert joined column type to Nullable. @@ -5124,22 +5156,31 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi } /// Resolve function arguments - bool allow_table_expressions = is_special_function_in; auto arguments_projection_names = resolveExpressionNodeList(function_node_ptr->getArgumentsNode(), scope, true /*allow_lambda_expression*/, allow_table_expressions /*allow_table_expression*/); - if (function_node_ptr->toAST()->hasSecretParts()) + /// Mask arguments if needed + if (!scope.context->getSettingsRef().format_display_secrets_in_show_and_select) { - for (auto & argument : arguments_projection_names) + if (FunctionSecretArgumentsFinder::Result secret_arguments = FunctionSecretArgumentsFinderTreeNode(*function_node_ptr).getResult(); secret_arguments.count) { - SipHash hash; - hash.update(argument); - argument = getHexUIntLowercase(hash.get128()); + auto & argument_nodes = function_node_ptr->getArgumentsNode()->as().getNodes(); + + for (size_t n = secret_arguments.start; n < secret_arguments.start + secret_arguments.count; ++n) + { + if (auto * constant = argument_nodes[n]->as()) + { + auto mask = scope.projection_mask_map->insert({constant->getTreeHash(), scope.projection_mask_map->size() + 1}).first->second; + constant->setMaskId(mask); + arguments_projection_names[n] = "[HIDDEN id: " + std::to_string(mask) + "]"; + } + } } } + auto & function_node = *function_node_ptr; /// Replace right IN function argument if it is table or table function with subquery that read ordinary columns @@ -7559,8 +7600,22 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier } if (query_node_typed.getPrewhere()) + { + /** Expression in PREWHERE with JOIN should not be modified by join_use_nulls. + * Example: SELECT * FROM t1 JOIN t2 USING (id) PREWHERE a = 1 + * Column `a` should be resolved from table and should not change its type to Nullable. + */ + bool join_use_nulls = scope.join_use_nulls; + bool use_identifier_lookup_to_result_cache = scope.use_identifier_lookup_to_result_cache; + scope.join_use_nulls = false; + scope.use_identifier_lookup_to_result_cache = false; + resolveExpressionNode(query_node_typed.getPrewhere(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + scope.join_use_nulls = join_use_nulls; + scope.use_identifier_lookup_to_result_cache = use_identifier_lookup_to_result_cache; + } + if (query_node_typed.getWhere()) resolveExpressionNode(query_node_typed.getWhere(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); @@ -7750,13 +7805,16 @@ void QueryAnalyzer::resolveUnion(const QueryTreeNodePtr & union_node, Identifier } -QueryAnalysisPass::QueryAnalysisPass(QueryTreeNodePtr table_expression_) +QueryAnalysisPass::QueryAnalysisPass(QueryTreeNodePtr table_expression_, bool only_analyze_) : table_expression(std::move(table_expression_)) + , only_analyze(only_analyze_) {} +QueryAnalysisPass::QueryAnalysisPass(bool only_analyze_) : only_analyze(only_analyze_) {} + void QueryAnalysisPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context) { - QueryAnalyzer analyzer; + QueryAnalyzer analyzer(only_analyze); analyzer.resolve(query_tree_node, table_expression, context); createUniqueTableAliases(query_tree_node, table_expression, context); } diff --git a/src/Analyzer/Passes/QueryAnalysisPass.h b/src/Analyzer/Passes/QueryAnalysisPass.h index 5d335d3e712..8c746833eee 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.h +++ b/src/Analyzer/Passes/QueryAnalysisPass.h @@ -71,13 +71,13 @@ public: /** Construct query analysis pass for query or union analysis. * Available columns are extracted from query node join tree. */ - QueryAnalysisPass() = default; + explicit QueryAnalysisPass(bool only_analyze_ = false); /** Construct query analysis pass for expression or list of expressions analysis. * Available expression columns are extracted from table expression. * Table expression node must have query, union, table, table function type. */ - explicit QueryAnalysisPass(QueryTreeNodePtr table_expression_); + QueryAnalysisPass(QueryTreeNodePtr table_expression_, bool only_analyze_ = false); String getName() override { @@ -93,6 +93,7 @@ public: private: QueryTreeNodePtr table_expression; + const bool only_analyze; }; } diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp index 43bb534a44e..9c07884a464 100644 --- a/src/Analyzer/QueryTreePassManager.cpp +++ b/src/Analyzer/QueryTreePassManager.cpp @@ -246,9 +246,9 @@ void QueryTreePassManager::dump(WriteBuffer & buffer, size_t up_to_pass_index) } } -void addQueryTreePasses(QueryTreePassManager & manager) +void addQueryTreePasses(QueryTreePassManager & manager, bool only_analyze) { - manager.addPass(std::make_unique()); + manager.addPass(std::make_unique(only_analyze)); manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); diff --git a/src/Analyzer/QueryTreePassManager.h b/src/Analyzer/QueryTreePassManager.h index 270563590ba..0a0d72a6698 100644 --- a/src/Analyzer/QueryTreePassManager.h +++ b/src/Analyzer/QueryTreePassManager.h @@ -47,6 +47,6 @@ private: std::vector passes; }; -void addQueryTreePasses(QueryTreePassManager & manager); +void addQueryTreePasses(QueryTreePassManager & manager, bool only_analyze = false); } diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 7c6f0c74b1a..1b4279ca9a7 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -486,7 +487,7 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context /// process_list_element_holder is used to make an element in ProcessList live while BACKUP is working asynchronously. auto process_list_element = context_in_use->getProcessListElement(); - scheduleFromThreadPool( + thread_pool.scheduleOrThrowOnError( [this, backup_query, backup_id, @@ -502,6 +503,8 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context BackupMutablePtr backup_async; try { + setThreadName("BackupWorker"); + CurrentThread::QueryScope query_scope(context_in_use); doBackup( backup_async, backup_query, @@ -517,8 +520,7 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context { on_exception(backup_async, backup_id, backup_name_for_logging, backup_settings, backup_coordination); } - }, - thread_pool, "BackupWorker"); + }); } else { @@ -864,7 +866,7 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt /// process_list_element_holder is used to make an element in ProcessList live while RESTORE is working asynchronously. auto process_list_element = context_in_use->getProcessListElement(); - scheduleFromThreadPool( + thread_pool.scheduleOrThrowOnError( [this, restore_query, restore_id, @@ -878,6 +880,8 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt { try { + setThreadName("RestorerWorker"); + CurrentThread::QueryScope query_scope(context_in_use); doRestore( restore_query, restore_id, @@ -891,9 +895,7 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt { on_exception(restore_id, backup_name_for_logging, restore_settings, restore_coordination); } - }, - thread_pool, - "RestoreWorker"); + }); } else { diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp index 5c22b6c6d3f..16911f97e84 100644 --- a/src/Client/ConnectionParameters.cpp +++ b/src/Client/ConnectionParameters.cpp @@ -115,7 +115,7 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati /// At the same time, I want clickhouse-local to always work, regardless. /// TODO: get rid of glibc, or replace getaddrinfo to c-ares. - compression = config.getBool("compression", host != "localhost" && !isLocalAddress(DNSResolver::instance().resolveHost(host))) + compression = config.getBool("compression", host != "localhost" && !isLocalAddress(DNSResolver::instance().resolveHostAllInOriginOrder(host).front())) ? Protocol::Compression::Enable : Protocol::Compression::Disable; timeouts = ConnectionTimeouts() diff --git a/src/Columns/ColumnUnique.cpp b/src/Columns/ColumnUnique.cpp new file mode 100644 index 00000000000..edfee69a752 --- /dev/null +++ b/src/Columns/ColumnUnique.cpp @@ -0,0 +1,25 @@ +#include + +namespace DB +{ + +/// Explicit template instantiations. +template class ColumnUnique; +template class ColumnUnique; +template class ColumnUnique; +template class ColumnUnique; +template class ColumnUnique; +template class ColumnUnique; +template class ColumnUnique; +template class ColumnUnique; +template class ColumnUnique; +template class ColumnUnique; +template class ColumnUnique; +template class ColumnUnique; +template class ColumnUnique; +template class ColumnUnique; +template class ColumnUnique; +template class ColumnUnique; +template class ColumnUnique; + +} diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h index 34f1ffc15cd..76bbbbacdbf 100644 --- a/src/Columns/ColumnUnique.h +++ b/src/Columns/ColumnUnique.h @@ -15,6 +15,8 @@ #include #include #include +#include "Columns/ColumnsDateTime.h" +#include "Columns/ColumnsNumber.h" #include #include @@ -736,4 +738,23 @@ UInt128 ColumnUnique::IncrementalHash::getHash(const ColumnType & co return cur_hash; } + +extern template class ColumnUnique; +extern template class ColumnUnique; +extern template class ColumnUnique; +extern template class ColumnUnique; +extern template class ColumnUnique; +extern template class ColumnUnique; +extern template class ColumnUnique; +extern template class ColumnUnique; +extern template class ColumnUnique; +extern template class ColumnUnique; +extern template class ColumnUnique; +extern template class ColumnUnique; +extern template class ColumnUnique; +extern template class ColumnUnique; +extern template class ColumnUnique; +extern template class ColumnUnique; +extern template class ColumnUnique; + } diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp index 250a8b3fc49..5b5f5369d5e 100644 --- a/src/Common/DNSResolver.cpp +++ b/src/Common/DNSResolver.cpp @@ -202,10 +202,10 @@ DNSResolver::DNSResolver() : impl(std::make_unique()), log(ge Poco::Net::IPAddress DNSResolver::resolveHost(const std::string & host) { - return pickAddress(resolveHostAll(host)); + return pickAddress(resolveHostAll(host)); // random order -> random pick } -DNSResolver::IPAddresses DNSResolver::resolveHostAll(const std::string & host) +DNSResolver::IPAddresses DNSResolver::resolveHostAllInOriginOrder(const std::string & host) { if (impl->disable_cache) return resolveIPAddressImpl(host); @@ -214,6 +214,13 @@ DNSResolver::IPAddresses DNSResolver::resolveHostAll(const std::string & host) return resolveIPAddressWithCache(impl->cache_host, host); } +DNSResolver::IPAddresses DNSResolver::resolveHostAll(const std::string & host) +{ + auto addresses = resolveHostAllInOriginOrder(host); + std::shuffle(addresses.begin(), addresses.end(), thread_local_rng); + return addresses; +} + Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host_and_port) { if (impl->disable_cache) diff --git a/src/Common/DNSResolver.h b/src/Common/DNSResolver.h index 27d81c9442a..e3030e51a96 100644 --- a/src/Common/DNSResolver.h +++ b/src/Common/DNSResolver.h @@ -34,6 +34,9 @@ public: Poco::Net::IPAddress resolveHost(const std::string & host); /// Accepts host names like 'example.com' or '127.0.0.1' or '::1' and resolves all its IPs + /// resolveHostAllInOriginOrder returns addresses with the same order as system call returns it + IPAddresses resolveHostAllInOriginOrder(const std::string & host); + /// resolveHostAll returns addresses in random order IPAddresses resolveHostAll(const std::string & host); /// Accepts host names like 'example.com:port' or '127.0.0.1:port' or '[::1]:port' and resolves its IP and port diff --git a/src/Common/DateLUTImpl.h b/src/Common/DateLUTImpl.h index 0e72b489ace..7bf66c0504a 100644 --- a/src/Common/DateLUTImpl.h +++ b/src/Common/DateLUTImpl.h @@ -3,13 +3,13 @@ #include #include #include +#include #include #include #include #include - #define DATE_SECONDS_PER_DAY 86400 /// Number of seconds in a day, 60 * 60 * 24 #define DATE_LUT_MIN_YEAR 1900 /// 1900 since majority of financial organizations consider 1900 as an initial year. @@ -280,9 +280,9 @@ private: static_assert(std::is_integral_v && std::is_integral_v); assert(divisor > 0); - if (likely(offset_is_whole_number_of_hours_during_epoch)) + if (offset_is_whole_number_of_hours_during_epoch) [[likely]] { - if (likely(x >= 0)) + if (x >= 0) [[likely]] return static_cast(x / divisor * divisor); /// Integer division for negative numbers rounds them towards zero (up). @@ -576,10 +576,10 @@ public: unsigned toSecond(Time t) const { - if (likely(offset_is_whole_number_of_minutes_during_epoch)) + if (offset_is_whole_number_of_minutes_during_epoch) [[likely]] { Time res = t % 60; - if (likely(res >= 0)) + if (res >= 0) [[likely]] return static_cast(res); return static_cast(res) + 60; } @@ -593,6 +593,30 @@ public: return time % 60; } + template + unsigned toMillisecond(const DateOrTime & datetime, Int64 scale_multiplier) const + { + constexpr Int64 millisecond_multiplier = 1'000; + constexpr Int64 microsecond_multiplier = 1'000 * millisecond_multiplier; + constexpr Int64 divider = microsecond_multiplier / millisecond_multiplier; + + auto components = DB::DecimalUtils::splitWithScaleMultiplier(datetime, scale_multiplier); + + if (datetime.value < 0 && components.fractional) + { + components.fractional = scale_multiplier + (components.whole ? Int64(-1) : Int64(1)) * components.fractional; + --components.whole; + } + Int64 fractional = components.fractional; + if (scale_multiplier > microsecond_multiplier) + fractional = fractional / (scale_multiplier / microsecond_multiplier); + else if (scale_multiplier < microsecond_multiplier) + fractional = fractional * (microsecond_multiplier / scale_multiplier); + + UInt16 millisecond = static_cast(fractional / divider); + return millisecond; + } + unsigned toMinute(Time t) const { if (t >= 0 && offset_is_whole_number_of_hours_during_epoch) @@ -1122,9 +1146,9 @@ public: DateOrTime toStartOfMinuteInterval(DateOrTime t, UInt64 minutes) const { Int64 divisor = 60 * minutes; - if (likely(offset_is_whole_number_of_minutes_during_epoch)) + if (offset_is_whole_number_of_minutes_during_epoch) [[likely]] { - if (likely(t >= 0)) + if (t >= 0) [[likely]] return static_cast(t / divisor * divisor); return static_cast((t + 1 - divisor) / divisor * divisor); } @@ -1339,7 +1363,7 @@ public: UInt8 saturateDayOfMonth(Int16 year, UInt8 month, UInt8 day_of_month) const { - if (likely(day_of_month <= 28)) + if (day_of_month <= 28) [[likely]] return day_of_month; UInt8 days_in_month = daysInMonth(year, month); diff --git a/src/Common/FieldVisitorConvertToNumber.cpp b/src/Common/FieldVisitorConvertToNumber.cpp new file mode 100644 index 00000000000..75b3fbfe02a --- /dev/null +++ b/src/Common/FieldVisitorConvertToNumber.cpp @@ -0,0 +1,23 @@ +#include +#include "base/Decimal.h" + +namespace DB +{ + +/// Explicit template instantiations. +template class FieldVisitorConvertToNumber; +template class FieldVisitorConvertToNumber; +template class FieldVisitorConvertToNumber; +template class FieldVisitorConvertToNumber; +template class FieldVisitorConvertToNumber; +template class FieldVisitorConvertToNumber; +template class FieldVisitorConvertToNumber; +template class FieldVisitorConvertToNumber; +template class FieldVisitorConvertToNumber; +template class FieldVisitorConvertToNumber; +template class FieldVisitorConvertToNumber; +template class FieldVisitorConvertToNumber; +template class FieldVisitorConvertToNumber; +template class FieldVisitorConvertToNumber; + +} diff --git a/src/Common/FieldVisitorConvertToNumber.h b/src/Common/FieldVisitorConvertToNumber.h index 47a1e669969..86e03cb5d77 100644 --- a/src/Common/FieldVisitorConvertToNumber.h +++ b/src/Common/FieldVisitorConvertToNumber.h @@ -117,4 +117,19 @@ public: T operator() (const bool & x) const { return T(x); } }; +extern template class FieldVisitorConvertToNumber; +extern template class FieldVisitorConvertToNumber; +extern template class FieldVisitorConvertToNumber; +extern template class FieldVisitorConvertToNumber; +extern template class FieldVisitorConvertToNumber; +extern template class FieldVisitorConvertToNumber; +extern template class FieldVisitorConvertToNumber; +extern template class FieldVisitorConvertToNumber; +extern template class FieldVisitorConvertToNumber; +extern template class FieldVisitorConvertToNumber; +extern template class FieldVisitorConvertToNumber; +extern template class FieldVisitorConvertToNumber; +extern template class FieldVisitorConvertToNumber; +extern template class FieldVisitorConvertToNumber; + } diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 87fcf220ff0..052c059a72d 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -475,6 +475,7 @@ The server successfully detected this situation and will download merged part fr M(FileSegmentUseMicroseconds, "File segment use() time") \ M(FileSegmentRemoveMicroseconds, "File segment remove() time") \ M(FileSegmentHolderCompleteMicroseconds, "File segments holder complete() time") \ + M(FileSegmentFailToIncreasePriority, "Number of times the priority was not increased due to a high contention on the cache lock") \ M(FilesystemCacheHoldFileSegments, "Filesystem cache file segments count, which were hold") \ M(FilesystemCacheUnusedHoldFileSegments, "Filesystem cache file segments count, which were hold, but not used (because of seek or LIMIT n, etc)") \ \ diff --git a/src/Coordination/KeeperStateManager.cpp b/src/Coordination/KeeperStateManager.cpp index 87c56909387..c30df0b6313 100644 --- a/src/Coordination/KeeperStateManager.cpp +++ b/src/Coordination/KeeperStateManager.cpp @@ -30,7 +30,7 @@ bool isLocalhost(const std::string & hostname) { try { - return isLocalAddress(DNSResolver::instance().resolveHost(hostname)); + return isLocalAddress(DNSResolver::instance().resolveHostAllInOriginOrder(hostname).front()); } catch (...) { diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 99a48d043d0..fceee63d4bb 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -186,6 +186,7 @@ class IColumn; \ M(String, cluster_for_parallel_replicas, "", "Cluster for a shard in which current server is located", 0) \ M(UInt64, allow_experimental_parallel_reading_from_replicas, 0, "Use all the replicas from a shard for SELECT query execution. Reading is parallelized and coordinated dynamically. 0 - disabled, 1 - enabled, silently disable them in case of failure, 2 - enabled, throw an exception in case of failure", 0) \ + M(Bool, parallel_replicas_allow_in_with_subquery, true, "If true, subquery for IN will be executed on every follower replica.", 0) \ M(Float, parallel_replicas_single_task_marks_count_multiplier, 2, "A multiplier which will be added during calculation for minimal number of marks to retrieve from coordinator. This will be applied only for remote replicas.", 0) \ M(Bool, parallel_replicas_for_non_replicated_merge_tree, false, "If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables", 0) \ M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \ @@ -597,6 +598,7 @@ class IColumn; M(Bool, normalize_function_names, true, "Normalize function names to their canonical names", 0) \ M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \ M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \ + M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \ M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Should update insert deduplication token with table identifier during insert in dependent materialized views.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \ @@ -697,7 +699,7 @@ class IColumn; M(Bool, database_replicated_allow_replicated_engine_arguments, true, "Allow to create only Replicated tables in database with engine Replicated with explicit arguments", 0) \ M(Bool, cloud_mode, false, "Only available in ClickHouse Cloud", 0) \ M(UInt64, cloud_mode_engine, 1, "Only available in ClickHouse Cloud", 0) \ - M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result, one of: 'none', 'throw', 'null_status_on_timeout', 'never_throw'", 0) \ + M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result, one of: 'none', 'throw', 'null_status_on_timeout', 'never_throw', 'none_only_active', 'throw_only_active', 'null_status_on_timeout_only_active'", 0) \ M(UInt64, distributed_ddl_entry_format_version, 5, "Compatibility version of distributed DDL (ON CLUSTER) queries", 0) \ \ M(UInt64, external_storage_max_read_rows, 0, "Limit maximum number of rows when table with external engine should flush history data. Now supported only for MySQL table engine, database engine, dictionary and MaterializedMySQL. If equal to 0, this setting is disabled", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 7b35c9fb239..2f1da7935e6 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -90,6 +90,8 @@ static std::map sett {"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"}, {"page_cache_inject_eviction", false, false, "Added userspace page cache"}, {"input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects", false, false, "Allow to use String type for ambiguous paths during named tuple inference from JSON objects"}, + {"throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert", false, true, "Deduplication is dependent materialized view cannot work together with async inserts."}, + {"parallel_replicas_allow_in_with_subquery", false, true, "If true, subquery for IN will be executed on every follower replica"}, }}, {"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"}, diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 04e1d0a18c8..de30d6d8eb5 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -118,6 +118,7 @@ IMPLEMENT_SETTING_ENUM(DistributedDDLOutputMode, ErrorCodes::BAD_ARGUMENTS, {"null_status_on_timeout", DistributedDDLOutputMode::NULL_STATUS_ON_TIMEOUT}, {"throw_only_active", DistributedDDLOutputMode::THROW_ONLY_ACTIVE}, {"null_status_on_timeout_only_active", DistributedDDLOutputMode::NULL_STATUS_ON_TIMEOUT_ONLY_ACTIVE}, + {"none_only_active", DistributedDDLOutputMode::NONE_ONLY_ACTIVE}, {"never_throw", DistributedDDLOutputMode::NEVER_THROW}}) IMPLEMENT_SETTING_ENUM(StreamingHandleErrorMode, ErrorCodes::BAD_ARGUMENTS, diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 0d0138e6246..22fcf0389d8 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -177,6 +177,7 @@ enum class DistributedDDLOutputMode NEVER_THROW, THROW_ONLY_ACTIVE, NULL_STATUS_ON_TIMEOUT_ONLY_ACTIVE, + NONE_ONLY_ACTIVE, }; DECLARE_SETTING_ENUM(DistributedDDLOutputMode) diff --git a/src/DataTypes/DataTypeDecimalBase.h b/src/DataTypes/DataTypeDecimalBase.h index adbe9c95b14..9887dfabcdb 100644 --- a/src/DataTypes/DataTypeDecimalBase.h +++ b/src/DataTypes/DataTypeDecimalBase.h @@ -207,4 +207,10 @@ inline DataTypePtr createDecimal(UInt64 precision_value, UInt64 scale_value) return std::make_shared>(precision_value, scale_value); } +extern template class DataTypeDecimalBase; +extern template class DataTypeDecimalBase; +extern template class DataTypeDecimalBase; +extern template class DataTypeDecimalBase; +extern template class DataTypeDecimalBase; + } diff --git a/src/DataTypes/DataTypesDecimal.cpp b/src/DataTypes/DataTypesDecimal.cpp index 7ad9f0b6fd8..77a7a3e7237 100644 --- a/src/DataTypes/DataTypesDecimal.cpp +++ b/src/DataTypes/DataTypesDecimal.cpp @@ -112,6 +112,256 @@ static DataTypePtr createExact(const ASTPtr & arguments) return createDecimal(precision, scale); } +template +requires (IsDataTypeDecimal && IsDataTypeDecimal) +ReturnType convertDecimalsImpl(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename ToDataType::FieldType & result) +{ + using FromFieldType = typename FromDataType::FieldType; + using ToFieldType = typename ToDataType::FieldType; + using MaxFieldType = std::conditional_t<(sizeof(FromFieldType) > sizeof(ToFieldType)), FromFieldType, ToFieldType>; + using MaxNativeType = typename MaxFieldType::NativeType; + + static constexpr bool throw_exception = std::is_same_v; + + MaxNativeType converted_value; + if (scale_to > scale_from) + { + converted_value = DecimalUtils::scaleMultiplier(scale_to - scale_from); + if (common::mulOverflow(static_cast(value.value), converted_value, converted_value)) + { + if constexpr (throw_exception) + throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow while multiplying {} by scale {}", + std::string(ToDataType::family_name), toString(value.value), toString(converted_value)); + else + return ReturnType(false); + } + } + else if (scale_to == scale_from) + { + converted_value = value.value; + } + else + { + converted_value = value.value / DecimalUtils::scaleMultiplier(scale_from - scale_to); + } + + if constexpr (sizeof(FromFieldType) > sizeof(ToFieldType)) + { + if (converted_value < std::numeric_limits::min() || + converted_value > std::numeric_limits::max()) + { + if constexpr (throw_exception) + throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow: {} is not in range ({}, {})", + std::string(ToDataType::family_name), toString(converted_value), + toString(std::numeric_limits::min()), + toString(std::numeric_limits::max())); + else + return ReturnType(false); + } + } + + result = static_cast(converted_value); + + return ReturnType(true); +} + +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + template void convertDecimalsImpl(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename TO_DATA_TYPE::FieldType & result); \ + template bool convertDecimalsImpl(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename TO_DATA_TYPE::FieldType & result); +#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X) +FOR_EACH_DECIMAL_TYPE(INVOKE); +#undef DISPATCH + + +template +requires (IsDataTypeDecimal && IsDataTypeDecimal) +typename ToDataType::FieldType convertDecimals(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to) +{ + using ToFieldType = typename ToDataType::FieldType; + ToFieldType result; + + convertDecimalsImpl(value, scale_from, scale_to, result); + + return result; +} + +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + template typename TO_DATA_TYPE::FieldType convertDecimals(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale_from, UInt32 scale_to); +#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X) +FOR_EACH_DECIMAL_TYPE(INVOKE); +#undef DISPATCH + + +template +requires (IsDataTypeDecimal && IsDataTypeDecimal) +bool tryConvertDecimals(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename ToDataType::FieldType & result) +{ + return convertDecimalsImpl(value, scale_from, scale_to, result); +} + +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + template bool tryConvertDecimals(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename TO_DATA_TYPE::FieldType & result); +#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X) +FOR_EACH_DECIMAL_TYPE(INVOKE); +#undef DISPATCH + + +template +requires (IsDataTypeDecimal && is_arithmetic_v) +ReturnType convertFromDecimalImpl(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType & result) +{ + using FromFieldType = typename FromDataType::FieldType; + using ToFieldType = typename ToDataType::FieldType; + + return DecimalUtils::convertToImpl(value, scale, result); +} + +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + template void convertFromDecimalImpl(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType & result); \ + template bool convertFromDecimalImpl(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType & result); +#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X) +FOR_EACH_ARITHMETIC_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH + + +template +requires (IsDataTypeDecimal && is_arithmetic_v) +inline typename ToDataType::FieldType convertFromDecimal(const typename FromDataType::FieldType & value, UInt32 scale) +{ + typename ToDataType::FieldType result; + + convertFromDecimalImpl(value, scale, result); + + return result; +} + +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + template typename TO_DATA_TYPE::FieldType convertFromDecimal(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale); +#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X) +FOR_EACH_ARITHMETIC_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH + + +template +requires (IsDataTypeDecimal && is_arithmetic_v) +inline bool tryConvertFromDecimal(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result) +{ + return convertFromDecimalImpl(value, scale, result); +} + +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + template bool tryConvertFromDecimal(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType& result); +#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X) +FOR_EACH_ARITHMETIC_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH + + +template +requires (is_arithmetic_v && IsDataTypeDecimal) +ReturnType convertToDecimalImpl(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType & result) +{ + using FromFieldType = typename FromDataType::FieldType; + using ToFieldType = typename ToDataType::FieldType; + using ToNativeType = typename ToFieldType::NativeType; + + static constexpr bool throw_exception = std::is_same_v; + + if constexpr (std::is_floating_point_v) + { + if (!std::isfinite(value)) + { + if constexpr (throw_exception) + throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow. Cannot convert infinity or NaN to decimal", ToDataType::family_name); + else + return ReturnType(false); + } + + auto out = value * static_cast(DecimalUtils::scaleMultiplier(scale)); + + if (out <= static_cast(std::numeric_limits::min()) || + out >= static_cast(std::numeric_limits::max())) + { + if constexpr (throw_exception) + throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow. Float is out of Decimal range", ToDataType::family_name); + else + return ReturnType(false); + } + + result = static_cast(out); + return ReturnType(true); + } + else + { + if constexpr (is_big_int_v) + return ReturnType(convertDecimalsImpl, ToDataType, ReturnType>(static_cast(value), 0, scale, result)); + else if constexpr (std::is_same_v) + return ReturnType(convertDecimalsImpl, ToDataType, ReturnType>(static_cast(value), 0, scale, result)); + else + return ReturnType(convertDecimalsImpl, ToDataType, ReturnType>(static_cast(value), 0, scale, result)); + } +} + +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + template void convertToDecimalImpl(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType & result); \ + template bool convertToDecimalImpl(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType & result); +#define INVOKE(X) FOR_EACH_ARITHMETIC_TYPE_PASS(DISPATCH, X) +FOR_EACH_DECIMAL_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH + + +template +requires (is_arithmetic_v && IsDataTypeDecimal) +inline typename ToDataType::FieldType convertToDecimal(const typename FromDataType::FieldType & value, UInt32 scale) +{ + typename ToDataType::FieldType result; + convertToDecimalImpl(value, scale, result); + return result; +} + +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + template typename TO_DATA_TYPE::FieldType convertToDecimal(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale); +#define INVOKE(X) FOR_EACH_ARITHMETIC_TYPE_PASS(DISPATCH, X) +FOR_EACH_DECIMAL_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH + + +template +requires (is_arithmetic_v && IsDataTypeDecimal) +inline bool tryConvertToDecimal(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result) +{ + return convertToDecimalImpl(value, scale, result); +} + +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + template bool tryConvertToDecimal(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType& result); +#define INVOKE(X) FOR_EACH_ARITHMETIC_TYPE_PASS(DISPATCH, X) +FOR_EACH_DECIMAL_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH + + +template +DataTypePtr createDecimalMaxPrecision(UInt64 scale) +{ + return std::make_shared>(DecimalUtils::max_precision, scale); +} + +template DataTypePtr createDecimalMaxPrecision(UInt64 scale); +template DataTypePtr createDecimalMaxPrecision(UInt64 scale); +template DataTypePtr createDecimalMaxPrecision(UInt64 scale); +template DataTypePtr createDecimalMaxPrecision(UInt64 scale); + +/// Explicit template instantiations. +template class DataTypeDecimal; +template class DataTypeDecimal; +template class DataTypeDecimal; +template class DataTypeDecimal; + void registerDataTypeDecimal(DataTypeFactory & factory) { factory.registerDataType("Decimal32", createExact, DataTypeFactory::CaseInsensitive); @@ -125,10 +375,4 @@ void registerDataTypeDecimal(DataTypeFactory & factory) factory.registerAlias("FIXED", "Decimal", DataTypeFactory::CaseInsensitive); } -/// Explicit template instantiations. -template class DataTypeDecimal; -template class DataTypeDecimal; -template class DataTypeDecimal; -template class DataTypeDecimal; - } diff --git a/src/DataTypes/DataTypesDecimal.h b/src/DataTypes/DataTypesDecimal.h index e2b433cbe2f..badefc4c75a 100644 --- a/src/DataTypes/DataTypesDecimal.h +++ b/src/DataTypes/DataTypesDecimal.h @@ -3,7 +3,11 @@ #include #include #include +#include +#include #include +#include +#include #include #include @@ -13,7 +17,6 @@ namespace DB namespace ErrorCodes { - extern const int DECIMAL_OVERFLOW; extern const int LOGICAL_ERROR; } @@ -99,171 +102,145 @@ inline UInt32 getDecimalScale(const DataTypeDecimal & data_type) return data_type.getScale(); } +#define FOR_EACH_DECIMAL_TYPE(M) \ + M(DataTypeDecimal) \ + M(DataTypeDateTime64) \ + M(DataTypeDecimal32) \ + M(DataTypeDecimal64) \ + M(DataTypeDecimal128) \ + M(DataTypeDecimal256) + +#define FOR_EACH_DECIMAL_TYPE_PASS(M, X) \ + M(DataTypeDecimal, X) \ + M(DataTypeDateTime64, X) \ + M(DataTypeDecimal32, X) \ + M(DataTypeDecimal64, X) \ + M(DataTypeDecimal128, X) \ + M(DataTypeDecimal256, X) + + template requires (IsDataTypeDecimal && IsDataTypeDecimal) -inline ReturnType convertDecimalsImpl(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename ToDataType::FieldType & result) -{ - using FromFieldType = typename FromDataType::FieldType; - using ToFieldType = typename ToDataType::FieldType; - using MaxFieldType = std::conditional_t<(sizeof(FromFieldType) > sizeof(ToFieldType)), FromFieldType, ToFieldType>; - using MaxNativeType = typename MaxFieldType::NativeType; +ReturnType convertDecimalsImpl(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename ToDataType::FieldType & result); - static constexpr bool throw_exception = std::is_same_v; +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + extern template void convertDecimalsImpl(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename TO_DATA_TYPE::FieldType & result); \ + extern template bool convertDecimalsImpl(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename TO_DATA_TYPE::FieldType & result); +#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X) +FOR_EACH_DECIMAL_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH - MaxNativeType converted_value; - if (scale_to > scale_from) - { - converted_value = DecimalUtils::scaleMultiplier(scale_to - scale_from); - if (common::mulOverflow(static_cast(value.value), converted_value, converted_value)) - { - if constexpr (throw_exception) - throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow while multiplying {} by scale {}", - std::string(ToDataType::family_name), toString(value.value), toString(converted_value)); - else - return ReturnType(false); - } - } - else if (scale_to == scale_from) - { - converted_value = value.value; - } - else - { - converted_value = value.value / DecimalUtils::scaleMultiplier(scale_from - scale_to); - } - - if constexpr (sizeof(FromFieldType) > sizeof(ToFieldType)) - { - if (converted_value < std::numeric_limits::min() || - converted_value > std::numeric_limits::max()) - { - if constexpr (throw_exception) - throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow: {} is not in range ({}, {})", - std::string(ToDataType::family_name), toString(converted_value), - toString(std::numeric_limits::min()), - toString(std::numeric_limits::max())); - else - return ReturnType(false); - } - } - - result = static_cast(converted_value); - - return ReturnType(true); -} template requires (IsDataTypeDecimal && IsDataTypeDecimal) -inline typename ToDataType::FieldType convertDecimals(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to) -{ - using ToFieldType = typename ToDataType::FieldType; - ToFieldType result; +typename ToDataType::FieldType convertDecimals(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to); - convertDecimalsImpl(value, scale_from, scale_to, result); +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + extern template typename TO_DATA_TYPE::FieldType convertDecimals(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale_from, UInt32 scale_to); +#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X) +FOR_EACH_DECIMAL_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH - return result; -} template requires (IsDataTypeDecimal && IsDataTypeDecimal) -inline bool tryConvertDecimals(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename ToDataType::FieldType & result) -{ - return convertDecimalsImpl(value, scale_from, scale_to, result); -} +bool tryConvertDecimals(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename ToDataType::FieldType & result); + +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + extern template bool tryConvertDecimals(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename TO_DATA_TYPE::FieldType & result); +#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X) +FOR_EACH_DECIMAL_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH + template requires (IsDataTypeDecimal && is_arithmetic_v) -inline ReturnType convertFromDecimalImpl(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result) -{ - using FromFieldType = typename FromDataType::FieldType; - using ToFieldType = typename ToDataType::FieldType; +ReturnType convertFromDecimalImpl(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType & result); + +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + extern template void convertFromDecimalImpl(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType & result); \ + extern template bool convertFromDecimalImpl(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType & result); +#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X) +FOR_EACH_ARITHMETIC_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH - return DecimalUtils::convertToImpl(value, scale, result); -} template requires (IsDataTypeDecimal && is_arithmetic_v) -inline typename ToDataType::FieldType convertFromDecimal(const typename FromDataType::FieldType & value, UInt32 scale) -{ - typename ToDataType::FieldType result; +typename ToDataType::FieldType convertFromDecimal(const typename FromDataType::FieldType & value, UInt32 scale); - convertFromDecimalImpl(value, scale, result); +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + extern template typename TO_DATA_TYPE::FieldType convertFromDecimal(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale); +#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X) +FOR_EACH_ARITHMETIC_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH - return result; -} template requires (IsDataTypeDecimal && is_arithmetic_v) -inline bool tryConvertFromDecimal(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result) -{ - return convertFromDecimalImpl(value, scale, result); -} +bool tryConvertFromDecimal(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result); + +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + extern template bool tryConvertFromDecimal(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType& result); +#define INVOKE(X) FOR_EACH_DECIMAL_TYPE_PASS(DISPATCH, X) +FOR_EACH_ARITHMETIC_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH + template requires (is_arithmetic_v && IsDataTypeDecimal) -inline ReturnType convertToDecimalImpl(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result) -{ - using FromFieldType = typename FromDataType::FieldType; - using ToFieldType = typename ToDataType::FieldType; - using ToNativeType = typename ToFieldType::NativeType; +ReturnType convertToDecimalImpl(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result); - static constexpr bool throw_exception = std::is_same_v; +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + extern template void convertToDecimalImpl(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType & result); \ + extern template bool convertToDecimalImpl(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType & result); +#define INVOKE(X) FOR_EACH_ARITHMETIC_TYPE_PASS(DISPATCH, X) +FOR_EACH_DECIMAL_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH - if constexpr (std::is_floating_point_v) - { - if (!std::isfinite(value)) - { - if constexpr (throw_exception) - throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow. Cannot convert infinity or NaN to decimal", ToDataType::family_name); - else - return ReturnType(false); - } - - auto out = value * static_cast(DecimalUtils::scaleMultiplier(scale)); - - if (out <= static_cast(std::numeric_limits::min()) || - out >= static_cast(std::numeric_limits::max())) - { - if constexpr (throw_exception) - throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow. Float is out of Decimal range", ToDataType::family_name); - else - return ReturnType(false); - } - - result = static_cast(out); - return ReturnType(true); - } - else - { - if constexpr (is_big_int_v) - return ReturnType(convertDecimalsImpl, ToDataType, ReturnType>(static_cast(value), 0, scale, result)); - else if constexpr (std::is_same_v) - return ReturnType(convertDecimalsImpl, ToDataType, ReturnType>(static_cast(value), 0, scale, result)); - else - return ReturnType(convertDecimalsImpl, ToDataType, ReturnType>(static_cast(value), 0, scale, result)); - } -} template requires (is_arithmetic_v && IsDataTypeDecimal) -inline typename ToDataType::FieldType convertToDecimal(const typename FromDataType::FieldType & value, UInt32 scale) -{ - typename ToDataType::FieldType result; - convertToDecimalImpl(value, scale, result); - return result; -} +typename ToDataType::FieldType convertToDecimal(const typename FromDataType::FieldType & value, UInt32 scale); + +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + extern template typename TO_DATA_TYPE::FieldType convertToDecimal(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale); +#define INVOKE(X) FOR_EACH_ARITHMETIC_TYPE_PASS(DISPATCH, X) +FOR_EACH_DECIMAL_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH + template requires (is_arithmetic_v && IsDataTypeDecimal) -inline bool tryConvertToDecimal(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result) -{ - return convertToDecimalImpl(value, scale, result); -} +bool tryConvertToDecimal(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result); + +#define DISPATCH(FROM_DATA_TYPE, TO_DATA_TYPE) \ + extern template bool tryConvertToDecimal(const typename FROM_DATA_TYPE::FieldType & value, UInt32 scale, typename TO_DATA_TYPE::FieldType& result); +#define INVOKE(X) FOR_EACH_ARITHMETIC_TYPE_PASS(DISPATCH, X) +FOR_EACH_DECIMAL_TYPE(INVOKE); +#undef INVOKE +#undef DISPATCH + template -inline DataTypePtr createDecimalMaxPrecision(UInt64 scale) -{ - return std::make_shared>(DecimalUtils::max_precision, scale); -} +DataTypePtr createDecimalMaxPrecision(UInt64 scale); + +extern template DataTypePtr createDecimalMaxPrecision(UInt64 scale); +extern template DataTypePtr createDecimalMaxPrecision(UInt64 scale); +extern template DataTypePtr createDecimalMaxPrecision(UInt64 scale); +extern template DataTypePtr createDecimalMaxPrecision(UInt64 scale); + +extern template class DataTypeDecimal; +extern template class DataTypeDecimal; +extern template class DataTypeDecimal; +extern template class DataTypeDecimal; } diff --git a/src/DataTypes/DataTypesNumber.cpp b/src/DataTypes/DataTypesNumber.cpp index 008fa287064..99446d24eed 100644 --- a/src/DataTypes/DataTypesNumber.cpp +++ b/src/DataTypes/DataTypesNumber.cpp @@ -102,4 +102,21 @@ void registerDataTypeNumbers(DataTypeFactory & factory) factory.registerAlias("DOUBLE PRECISION", "Float64", DataTypeFactory::CaseInsensitive); } +/// Explicit template instantiations. +template class DataTypeNumber; +template class DataTypeNumber; +template class DataTypeNumber; +template class DataTypeNumber; +template class DataTypeNumber; +template class DataTypeNumber; +template class DataTypeNumber; +template class DataTypeNumber; +template class DataTypeNumber; +template class DataTypeNumber; + +template class DataTypeNumber; +template class DataTypeNumber; +template class DataTypeNumber; +template class DataTypeNumber; + } diff --git a/src/DataTypes/DataTypesNumber.h b/src/DataTypes/DataTypesNumber.h index 0c1f88a7925..d550ceababc 100644 --- a/src/DataTypes/DataTypesNumber.h +++ b/src/DataTypes/DataTypesNumber.h @@ -55,6 +55,22 @@ private: bool unsigned_can_be_signed = false; }; +extern template class DataTypeNumber; +extern template class DataTypeNumber; +extern template class DataTypeNumber; +extern template class DataTypeNumber; +extern template class DataTypeNumber; +extern template class DataTypeNumber; +extern template class DataTypeNumber; +extern template class DataTypeNumber; +extern template class DataTypeNumber; +extern template class DataTypeNumber; + +extern template class DataTypeNumber; +extern template class DataTypeNumber; +extern template class DataTypeNumber; +extern template class DataTypeNumber; + using DataTypeUInt8 = DataTypeNumber; using DataTypeUInt16 = DataTypeNumber; using DataTypeUInt32 = DataTypeNumber; diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp index 392c56343e3..40915418aea 100644 --- a/src/DataTypes/IDataType.cpp +++ b/src/DataTypes/IDataType.cpp @@ -267,4 +267,91 @@ SerializationPtr IDataType::getSerialization(const NameAndTypePair & column) return column.type->getDefaultSerialization(); } +#define FOR_TYPES_OF_TYPE(M) \ + M(TypeIndex) \ + M(const IDataType &) \ + M(const DataTypePtr &) \ + M(WhichDataType) + +#define DISPATCH(TYPE) \ +bool isUInt8(TYPE data_type) { return WhichDataType(data_type).isUInt8(); } \ +bool isUInt16(TYPE data_type) { return WhichDataType(data_type).isUInt16(); } \ +bool isUInt32(TYPE data_type) { return WhichDataType(data_type).isUInt32(); } \ +bool isUInt64(TYPE data_type) { return WhichDataType(data_type).isUInt64(); } \ +bool isNativeUInt(TYPE data_type) { return WhichDataType(data_type).isNativeUInt(); } \ +bool isUInt(TYPE data_type) { return WhichDataType(data_type).isUInt(); } \ +\ +bool isInt8(TYPE data_type) { return WhichDataType(data_type).isInt8(); } \ +bool isInt16(TYPE data_type) { return WhichDataType(data_type).isInt16(); } \ +bool isInt32(TYPE data_type) { return WhichDataType(data_type).isInt32(); } \ +bool isInt64(TYPE data_type) { return WhichDataType(data_type).isInt64(); } \ +bool isNativeInt(TYPE data_type) { return WhichDataType(data_type).isNativeInt(); } \ +bool isInt(TYPE data_type) { return WhichDataType(data_type).isInt(); } \ +\ +bool isInteger(TYPE data_type) { return WhichDataType(data_type).isInteger(); } \ +bool isNativeInteger(TYPE data_type) { return WhichDataType(data_type).isNativeInteger(); } \ +\ +bool isDecimal(TYPE data_type) { return WhichDataType(data_type).isDecimal(); } \ +\ +bool isFloat(TYPE data_type) { return WhichDataType(data_type).isFloat(); } \ +\ +bool isNativeNumber(TYPE data_type) { return WhichDataType(data_type).isNativeNumber(); } \ +bool isNumber(TYPE data_type) { return WhichDataType(data_type).isNumber(); } \ +\ +bool isEnum8(TYPE data_type) { return WhichDataType(data_type).isEnum8(); } \ +bool isEnum16(TYPE data_type) { return WhichDataType(data_type).isEnum16(); } \ +bool isEnum(TYPE data_type) { return WhichDataType(data_type).isEnum(); } \ +\ +bool isDate(TYPE data_type) { return WhichDataType(data_type).isDate(); } \ +bool isDate32(TYPE data_type) { return WhichDataType(data_type).isDate32(); } \ +bool isDateOrDate32(TYPE data_type) { return WhichDataType(data_type).isDateOrDate32(); } \ +bool isDateTime(TYPE data_type) { return WhichDataType(data_type).isDateTime(); } \ +bool isDateTime64(TYPE data_type) { return WhichDataType(data_type).isDateTime64(); } \ +bool isDateTimeOrDateTime64(TYPE data_type) { return WhichDataType(data_type).isDateTimeOrDateTime64(); } \ +bool isDateOrDate32OrDateTimeOrDateTime64(TYPE data_type) { return WhichDataType(data_type).isDateOrDate32OrDateTimeOrDateTime64(); } \ +\ +bool isString(TYPE data_type) { return WhichDataType(data_type).isString(); } \ +bool isFixedString(TYPE data_type) { return WhichDataType(data_type).isFixedString(); } \ +bool isStringOrFixedString(TYPE data_type) { return WhichDataType(data_type).isStringOrFixedString(); } \ +\ +bool isUUID(TYPE data_type) { return WhichDataType(data_type).isUUID(); } \ +bool isIPv4(TYPE data_type) { return WhichDataType(data_type).isIPv4(); } \ +bool isIPv6(TYPE data_type) { return WhichDataType(data_type).isIPv6(); } \ +bool isArray(TYPE data_type) { return WhichDataType(data_type).isArray(); } \ +bool isTuple(TYPE data_type) { return WhichDataType(data_type).isTuple(); } \ +bool isMap(TYPE data_type) {return WhichDataType(data_type).isMap(); } \ +bool isInterval(TYPE data_type) {return WhichDataType(data_type).isInterval(); } \ +bool isObject(TYPE data_type) { return WhichDataType(data_type).isObject(); } \ +bool isVariant(TYPE data_type) { return WhichDataType(data_type).isVariant(); } \ +bool isNothing(TYPE data_type) { return WhichDataType(data_type).isNothing(); } \ +\ +bool isColumnedAsNumber(TYPE data_type) \ +{ \ + WhichDataType which(data_type); \ + return which.isInteger() || which.isFloat() || which.isDateOrDate32OrDateTimeOrDateTime64() || which.isUUID() || which.isIPv4() || which.isIPv6(); \ +} \ +\ +bool isColumnedAsDecimal(TYPE data_type) \ +{ \ + WhichDataType which(data_type); \ + return which.isDecimal() || which.isDateTime64(); \ +} \ +\ +bool isNotCreatable(TYPE data_type) \ +{ \ + WhichDataType which(data_type); \ + return which.isNothing() || which.isFunction() || which.isSet(); \ +} \ +\ +bool isNotDecimalButComparableToDecimal(TYPE data_type) \ +{ \ + WhichDataType which(data_type); \ + return which.isInt() || which.isUInt() || which.isFloat(); \ +} \ + +FOR_TYPES_OF_TYPE(DISPATCH) + +#undef DISPATCH +#undef FOR_TYPES_OF_TYPE + } diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 220658afda5..55f584ef1e0 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -424,71 +424,76 @@ struct WhichDataType /// IDataType helpers (alternative for IDataType virtual methods with single point of truth) -template inline bool isUInt8(const T & data_type) { return WhichDataType(data_type).isUInt8(); } -template inline bool isUInt16(const T & data_type) { return WhichDataType(data_type).isUInt16(); } -template inline bool isUInt32(const T & data_type) { return WhichDataType(data_type).isUInt32(); } -template inline bool isUInt64(const T & data_type) { return WhichDataType(data_type).isUInt64(); } -template inline bool isNativeUInt(const T & data_type) { return WhichDataType(data_type).isNativeUInt(); } -template inline bool isUInt(const T & data_type) { return WhichDataType(data_type).isUInt(); } +#define FOR_TYPES_OF_TYPE(M) \ + M(TypeIndex) \ + M(const IDataType &) \ + M(const DataTypePtr &) \ + M(WhichDataType) -template inline bool isInt8(const T & data_type) { return WhichDataType(data_type).isInt8(); } -template inline bool isInt16(const T & data_type) { return WhichDataType(data_type).isInt16(); } -template inline bool isInt32(const T & data_type) { return WhichDataType(data_type).isInt32(); } -template inline bool isInt64(const T & data_type) { return WhichDataType(data_type).isInt64(); } -template inline bool isNativeInt(const T & data_type) { return WhichDataType(data_type).isNativeInt(); } -template inline bool isInt(const T & data_type) { return WhichDataType(data_type).isInt(); } +#define DISPATCH(TYPE) \ +bool isUInt8(TYPE data_type); \ +bool isUInt16(TYPE data_type); \ +bool isUInt32(TYPE data_type); \ +bool isUInt64(TYPE data_type); \ +bool isNativeUInt(TYPE data_type); \ +bool isUInt(TYPE data_type); \ +\ +bool isInt8(TYPE data_type); \ +bool isInt16(TYPE data_type); \ +bool isInt32(TYPE data_type); \ +bool isInt64(TYPE data_type); \ +bool isNativeInt(TYPE data_type); \ +bool isInt(TYPE data_type); \ +\ +bool isInteger(TYPE data_type); \ +bool isNativeInteger(TYPE data_type); \ +\ +bool isDecimal(TYPE data_type); \ +\ +bool isFloat(TYPE data_type); \ +\ +bool isNativeNumber(TYPE data_type); \ +bool isNumber(TYPE data_type); \ +\ +bool isEnum8(TYPE data_type); \ +bool isEnum16(TYPE data_type); \ +bool isEnum(TYPE data_type); \ +\ +bool isDate(TYPE data_type); \ +bool isDate32(TYPE data_type); \ +bool isDateOrDate32(TYPE data_type); \ +bool isDateTime(TYPE data_type); \ +bool isDateTime64(TYPE data_type); \ +bool isDateTimeOrDateTime64(TYPE data_type); \ +bool isDateOrDate32OrDateTimeOrDateTime64(TYPE data_type); \ +\ +bool isString(TYPE data_type); \ +bool isFixedString(TYPE data_type); \ +bool isStringOrFixedString(TYPE data_type); \ +\ +bool isUUID(TYPE data_type); \ +bool isIPv4(TYPE data_type); \ +bool isIPv6(TYPE data_type); \ +bool isArray(TYPE data_type); \ +bool isTuple(TYPE data_type); \ +bool isMap(TYPE data_type); \ +bool isInterval(TYPE data_type); \ +bool isObject(TYPE data_type); \ +bool isVariant(TYPE data_type); \ +bool isNothing(TYPE data_type); \ +\ +bool isColumnedAsNumber(TYPE data_type); \ +\ +bool isColumnedAsDecimal(TYPE data_type); \ +\ +bool isNotCreatable(TYPE data_type); \ +\ +bool isNotDecimalButComparableToDecimal(TYPE data_type); \ -template inline bool isInteger(const T & data_type) { return WhichDataType(data_type).isInteger(); } -template inline bool isNativeInteger(const T & data_type) { return WhichDataType(data_type).isNativeInteger(); } +FOR_TYPES_OF_TYPE(DISPATCH) -template inline bool isDecimal(const T & data_type) { return WhichDataType(data_type).isDecimal(); } - -template inline bool isFloat(const T & data_type) { return WhichDataType(data_type).isFloat(); } - -template inline bool isNativeNumber(const T & data_type) { return WhichDataType(data_type).isNativeNumber(); } -template inline bool isNumber(const T & data_type) { return WhichDataType(data_type).isNumber(); } - -template inline bool isEnum8(const T & data_type) { return WhichDataType(data_type).isEnum8(); } -template inline bool isEnum16(const T & data_type) { return WhichDataType(data_type).isEnum16(); } -template inline bool isEnum(const T & data_type) { return WhichDataType(data_type).isEnum(); } - -template inline bool isDate(const T & data_type) { return WhichDataType(data_type).isDate(); } -template inline bool isDate32(const T & data_type) { return WhichDataType(data_type).isDate32(); } -template inline bool isDateOrDate32(const T & data_type) { return WhichDataType(data_type).isDateOrDate32(); } -template inline bool isDateTime(const T & data_type) { return WhichDataType(data_type).isDateTime(); } -template inline bool isDateTime64(const T & data_type) { return WhichDataType(data_type).isDateTime64(); } -template inline bool isDateTimeOrDateTime64(const T & data_type) { return WhichDataType(data_type).isDateTimeOrDateTime64(); } -template inline bool isDateOrDate32OrDateTimeOrDateTime64(const T & data_type) { return WhichDataType(data_type).isDateOrDate32OrDateTimeOrDateTime64(); } - -template inline bool isString(const T & data_type) { return WhichDataType(data_type).isString(); } -template inline bool isFixedString(const T & data_type) { return WhichDataType(data_type).isFixedString(); } -template inline bool isStringOrFixedString(const T & data_type) { return WhichDataType(data_type).isStringOrFixedString(); } - -template inline bool isUUID(const T & data_type) { return WhichDataType(data_type).isUUID(); } -template inline bool isIPv4(const T & data_type) { return WhichDataType(data_type).isIPv4(); } -template inline bool isIPv6(const T & data_type) { return WhichDataType(data_type).isIPv6(); } -template inline bool isArray(const T & data_type) { return WhichDataType(data_type).isArray(); } -template inline bool isTuple(const T & data_type) { return WhichDataType(data_type).isTuple(); } -template inline bool isMap(const T & data_type) {return WhichDataType(data_type).isMap(); } -template inline bool isInterval(const T & data_type) {return WhichDataType(data_type).isInterval(); } -template inline bool isObject(const T & data_type) { return WhichDataType(data_type).isObject(); } -template inline bool isVariant(const T & data_type) { return WhichDataType(data_type).isVariant(); } - -template inline bool isNothing(const T & data_type) { return WhichDataType(data_type).isNothing(); } - -template -inline bool isColumnedAsNumber(const T & data_type) -{ - WhichDataType which(data_type); - return which.isInteger() || which.isFloat() || which.isDateOrDate32OrDateTimeOrDateTime64() || which.isUUID() || which.isIPv4() || which.isIPv6(); -} - -template -inline bool isColumnedAsDecimal(const T & data_type) -{ - WhichDataType which(data_type); - return which.isDecimal() || which.isDateTime64(); -} +#undef DISPATCH +#undef FOR_TYPES_OF_TYPE // Same as isColumnedAsDecimal but also checks value type of underlyig column. template @@ -498,19 +503,6 @@ inline bool isColumnedAsDecimalT(const DataType & data_type) return (which.isDecimal() || which.isDateTime64()) && which.idx == TypeToTypeIndex; } -template -inline bool isNotCreatable(const T & data_type) -{ - WhichDataType which(data_type); - return which.isNothing() || which.isFunction() || which.isSet(); -} - -inline bool isNotDecimalButComparableToDecimal(const DataTypePtr & data_type) -{ - WhichDataType which(data_type); - return which.isInt() || which.isUInt() || which.isFloat(); -} - inline bool isBool(const DataTypePtr & data_type) { return data_type->getName() == "Bool"; diff --git a/src/DataTypes/Serializations/SerializationDecimalBase.h b/src/DataTypes/Serializations/SerializationDecimalBase.h index 08f963cedbb..5676280d34b 100644 --- a/src/DataTypes/Serializations/SerializationDecimalBase.h +++ b/src/DataTypes/Serializations/SerializationDecimalBase.h @@ -29,4 +29,10 @@ public: void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override; }; +extern template class SerializationDecimalBase; +extern template class SerializationDecimalBase; +extern template class SerializationDecimalBase; +extern template class SerializationDecimalBase; +extern template class SerializationDecimalBase; + } diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index 56d42a38c8e..d09f402143e 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -1078,7 +1078,7 @@ void HashedArrayDictionary::calculateBytesAllocate bytes_allocated += container.allocated_bytes(); } - bucket_count = container.capacity(); + bucket_count += container.capacity(); } }; @@ -1089,6 +1089,13 @@ void HashedArrayDictionary::calculateBytesAllocate bytes_allocated += container.size(); } + /// `bucket_count` should be a sum over all shards, + /// but it should not be a sum over all attributes, since it is used to + /// calculate load_factor like this: `element_count / bucket_count` + /// While element_count is a sum over all shards, not over all attributes. + if (attributes.size()) + bucket_count /= attributes.size(); + if (update_field_loaded_block) bytes_allocated += update_field_loaded_block->allocatedBytes(); @@ -1167,17 +1174,24 @@ void registerDictionaryArrayHashed(DictionaryFactory & factory) if (shards <= 0 || 128 < shards) throw Exception(ErrorCodes::BAD_ARGUMENTS,"{}: SHARDS parameter should be within [1, 128]", full_name); - HashedArrayDictionaryStorageConfiguration configuration{require_nonempty, dict_lifetime, static_cast(shards)}; + Int64 shard_load_queue_backlog = config.getInt(config_prefix + dictionary_layout_prefix + ".shard_load_queue_backlog", 10000); + if (shard_load_queue_backlog <= 0) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "{}: SHARD_LOAD_QUEUE_BACKLOG parameter should be greater then zero", full_name); if (source_ptr->hasUpdateField() && shards > 1) throw Exception(ErrorCodes::BAD_ARGUMENTS, "{}: SHARDS parameter does not supports for updatable source (UPDATE_FIELD)", full_name); + HashedArrayDictionaryStorageConfiguration configuration{require_nonempty, dict_lifetime, static_cast(shards), static_cast(shard_load_queue_backlog)}; + ContextMutablePtr context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix); const auto & settings = context->getSettingsRef(); const auto * clickhouse_source = dynamic_cast(source_ptr.get()); configuration.use_async_executor = clickhouse_source && clickhouse_source->isLocal() && settings.dictionary_use_async_executor; + if (settings.max_execution_time.totalSeconds() > 0) + configuration.load_timeout = std::chrono::seconds(settings.max_execution_time.totalSeconds()); + if (dictionary_key_type == DictionaryKeyType::Simple) { if (shards > 1) diff --git a/src/Dictionaries/HashedArrayDictionary.h b/src/Dictionaries/HashedArrayDictionary.h index 4b2570ad928..9877d92d457 100644 --- a/src/Dictionaries/HashedArrayDictionary.h +++ b/src/Dictionaries/HashedArrayDictionary.h @@ -29,6 +29,7 @@ struct HashedArrayDictionaryStorageConfiguration size_t shards = 1; size_t shard_load_queue_backlog = 10000; bool use_async_executor = false; + std::chrono::seconds load_timeout{0}; }; template diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h index 3a5e4ff6306..b3b8cc56868 100644 --- a/src/Dictionaries/HashedDictionary.h +++ b/src/Dictionaries/HashedDictionary.h @@ -67,6 +67,7 @@ struct HashedDictionaryConfiguration const bool require_nonempty; const DictionaryLifetime lifetime; bool use_async_executor = false; + const std::chrono::seconds load_timeout{0}; }; template diff --git a/src/Dictionaries/HashedDictionaryParallelLoader.h b/src/Dictionaries/HashedDictionaryParallelLoader.h index a256f6de0e0..d88ee88f9a9 100644 --- a/src/Dictionaries/HashedDictionaryParallelLoader.h +++ b/src/Dictionaries/HashedDictionaryParallelLoader.h @@ -31,6 +31,7 @@ template clas namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int TIMEOUT_EXCEEDED; } } @@ -50,9 +51,10 @@ public: , shards(dictionary.configuration.shards) , pool(CurrentMetrics::HashedDictionaryThreads, CurrentMetrics::HashedDictionaryThreadsActive, CurrentMetrics::HashedDictionaryThreadsScheduled, shards) , shards_queues(shards) + , loading_timeout(dictionary.configuration.load_timeout) { UInt64 backlog = dictionary.configuration.shard_load_queue_backlog; - LOG_TRACE(dictionary.log, "Will load the {} dictionary using {} threads (with {} backlog)", dictionary_name, shards, backlog); + LOG_TRACE(dictionary.log, "Will load the {} dictionary using {} threads (with {} backlog and timeout {} sec)", dictionary_name, shards, backlog, loading_timeout.count()); shards_slots.resize(shards); iota(shards_slots.data(), shards_slots.size(), UInt64(0)); @@ -62,7 +64,11 @@ public: shards_queues[shard].emplace(backlog); pool.scheduleOrThrowOnError([this, shard, thread_group = CurrentThread::getGroup()] { + WorkerStatistic statistic; SCOPE_EXIT_SAFE( + LOG_TRACE(dictionary.log, "Finished worker for dictionary {} shard {}, processed {} blocks, {} rows, total time {}ms", + dictionary_name, shard, statistic.total_blocks, statistic.total_rows, statistic.total_elapsed_ms); + if (thread_group) CurrentThread::detachFromGroupIfNotDetached(); ); @@ -74,7 +80,9 @@ public: CurrentThread::attachToGroupIfDetached(thread_group); setThreadName("HashedDictLoad"); - threadWorker(shard); + LOG_TRACE(dictionary.log, "Starting worker for dictionary {}, shard {}", dictionary_name, shard); + + threadWorker(shard, statistic); }); } } @@ -87,8 +95,28 @@ public: for (size_t shard = 0; shard < shards; ++shard) { - if (!shards_queues[shard]->push(std::move(shards_blocks[shard]))) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not push to shards queue #{}", shard); + const auto & current_block = shards_blocks[shard]; + while (!shards_queues[shard]->tryPush(current_block, /* milliseconds= */ 100)) + { + if (shards_queues[shard]->isFinished()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not push to finished shards queue #{}, dictionary {}", shard, dictionary_name); + + /// We need to check if some workers failed + if (pool.active() != shards) + { + LOG_DEBUG(dictionary.log, "Some workers for dictionary {} failed, stopping all workers", dictionary_name); + stop_all_workers = true; + pool.wait(); /// We expect exception to be thrown from the failed worker thread + throw Exception(ErrorCodes::LOGICAL_ERROR, "Worker threads for dictionary {} are not active", dictionary_name); + } + + if (loading_timeout.count() && std::chrono::milliseconds(total_loading_time.elapsedMilliseconds()) > loading_timeout) + { + stop_all_workers = true; + pool.wait(); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Timeout {} sec for dictionary {} loading is expired", loading_timeout.count(), dictionary_name); + } + } } } @@ -124,27 +152,49 @@ private: String dictionary_name; const size_t shards; ThreadPool pool; + std::atomic_bool stop_all_workers{false}; std::vector>> shards_queues; + std::chrono::seconds loading_timeout; + Stopwatch total_loading_time; + std::vector shards_slots; DictionaryKeysArenaHolder arena_holder; - void threadWorker(size_t shard) + struct WorkerStatistic + { + UInt64 total_elapsed_ms = 0; + UInt64 total_blocks = 0; + UInt64 total_rows = 0; + }; + + void threadWorker(size_t shard, WorkerStatistic & statistic) { Block block; DictionaryKeysArenaHolder arena_holder_; auto & shard_queue = *shards_queues[shard]; - while (shard_queue.pop(block)) + while (true) { + if (!shard_queue.tryPop(block, /* milliseconds= */ 100)) + { + /// Check if we need to stop + if (stop_all_workers || shard_queue.isFinished()) + break; + /// Timeout expired, but the queue is not finished yet, try again + continue; + } + Stopwatch watch; dictionary.blockToAttributes(block, arena_holder_, shard); UInt64 elapsed_ms = watch.elapsedMilliseconds(); - if (elapsed_ms > 1'000) - LOG_TRACE(dictionary.log, "Block processing for shard #{} is slow {}ms (rows {}).", shard, elapsed_ms, block.rows()); - } - if (!shard_queue.isFinished()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not pull non finished shards queue #{}", shard); + statistic.total_elapsed_ms += elapsed_ms; + statistic.total_blocks += 1; + statistic.total_rows += block.rows(); + + if (elapsed_ms > 1'000) + LOG_TRACE(dictionary.log, "Block processing for shard #{} is slow {}ms (rows {})", shard, elapsed_ms, block.rows()); + } } /// Split block to shards smaller block, using 'selector'. diff --git a/src/Dictionaries/registerHashedDictionary.cpp b/src/Dictionaries/registerHashedDictionary.cpp index 6b980e2d534..5fc4f5d5cb6 100644 --- a/src/Dictionaries/registerHashedDictionary.cpp +++ b/src/Dictionaries/registerHashedDictionary.cpp @@ -77,6 +77,7 @@ void registerDictionaryHashed(DictionaryFactory & factory) require_nonempty, dict_lifetime, use_async_executor, + std::chrono::seconds(settings.max_execution_time.totalSeconds()), }; if (source_ptr->hasUpdateField() && shards > 1) diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp index de7a71e8dc1..d25add625e8 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp @@ -23,7 +23,6 @@ namespace ErrorCodes extern const int CANNOT_OPEN_FILE; extern const int FILE_DOESNT_EXIST; extern const int BAD_FILE_TYPE; - extern const int FILE_ALREADY_EXISTS; extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; extern const int LOGICAL_ERROR; } @@ -593,14 +592,8 @@ void DiskObjectStorageTransaction::moveDirectory(const std::string & from_path, void DiskObjectStorageTransaction::moveFile(const String & from_path, const String & to_path) { operations_to_execute.emplace_back( - std::make_unique(object_storage, metadata_storage, [from_path, to_path, this](MetadataTransactionPtr tx) + std::make_unique(object_storage, metadata_storage, [from_path, to_path](MetadataTransactionPtr tx) { - if (metadata_storage.exists(to_path)) - throw Exception(ErrorCodes::FILE_ALREADY_EXISTS, "File already exists: {}", to_path); - - if (!metadata_storage.exists(from_path)) - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File {} doesn't exist, cannot move", from_path); - tx->moveFile(from_path, to_path); })); } diff --git a/src/Functions/DateTimeTransforms.cpp b/src/Functions/DateTimeTransforms.cpp index 7ec13be9d6d..006d1e94ccd 100644 --- a/src/Functions/DateTimeTransforms.cpp +++ b/src/Functions/DateTimeTransforms.cpp @@ -10,16 +10,17 @@ namespace ErrorCodes void throwDateIsNotSupported(const char * name) { - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type Date of argument for function {}", name); -} - -void throwDateTimeIsNotSupported(const char * name) -{ - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type DateTime of argument for function {}", name); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal argument of type Date for function {}", name); } void throwDate32IsNotSupported(const char * name) { - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type Date32 of argument for function {}", name); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal argument of type Date32 for function {}", name); } + +void throwDateTimeIsNotSupported(const char * name) +{ + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal argument of type DateTime for function {}", name); +} + } diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 9eda76f5d20..70b2a7a83b4 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -6,6 +6,7 @@ #include #include #include +#include "base/Decimal.h" #include #include #include @@ -54,8 +55,8 @@ constexpr time_t MAX_DATE_TIMESTAMP = 5662310399; // 2149-06-06 23:59:59 U constexpr time_t MAX_DATETIME_DAY_NUM = 49710; // 2106-02-07 [[noreturn]] void throwDateIsNotSupported(const char * name); -[[noreturn]] void throwDateTimeIsNotSupported(const char * name); [[noreturn]] void throwDate32IsNotSupported(const char * name); +[[noreturn]] void throwDateTimeIsNotSupported(const char * name); /// This factor transformation will say that the function is monotone everywhere. struct ZeroTransform @@ -481,7 +482,7 @@ struct ToStartOfInterval } static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { - throwDateIsNotSupported(TO_START_OF_INTERVAL_NAME); + throwDate32IsNotSupported(TO_START_OF_INTERVAL_NAME); } static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { @@ -516,7 +517,7 @@ struct ToStartOfInterval } static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { - throwDateIsNotSupported(TO_START_OF_INTERVAL_NAME); + throwDate32IsNotSupported(TO_START_OF_INTERVAL_NAME); } static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { @@ -559,7 +560,7 @@ struct ToStartOfInterval } static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { - throwDateIsNotSupported(TO_START_OF_INTERVAL_NAME); + throwDate32IsNotSupported(TO_START_OF_INTERVAL_NAME); } static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { @@ -602,7 +603,7 @@ struct ToStartOfInterval } static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { - throwDateIsNotSupported(TO_START_OF_INTERVAL_NAME); + throwDate32IsNotSupported(TO_START_OF_INTERVAL_NAME); } static UInt32 execute(UInt32 t, Int64 seconds, const DateLUTImpl & time_zone, Int64) { @@ -623,7 +624,7 @@ struct ToStartOfInterval } static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { - throwDateIsNotSupported(TO_START_OF_INTERVAL_NAME); + throwDate32IsNotSupported(TO_START_OF_INTERVAL_NAME); } static UInt32 execute(UInt32 t, Int64 minutes, const DateLUTImpl & time_zone, Int64) { @@ -644,7 +645,7 @@ struct ToStartOfInterval } static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { - throwDateIsNotSupported(TO_START_OF_INTERVAL_NAME); + throwDate32IsNotSupported(TO_START_OF_INTERVAL_NAME); } static UInt32 execute(UInt32 t, Int64 hours, const DateLUTImpl & time_zone, Int64) { @@ -777,7 +778,7 @@ struct ToTimeImpl } static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) { @@ -802,7 +803,7 @@ struct ToStartOfMinuteImpl } static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) { @@ -849,7 +850,7 @@ struct ToStartOfSecondImpl } static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) { @@ -897,7 +898,7 @@ struct ToStartOfMillisecondImpl } static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) { @@ -941,7 +942,7 @@ struct ToStartOfMicrosecondImpl } static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) { @@ -979,7 +980,7 @@ struct ToStartOfNanosecondImpl } static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) { @@ -1004,7 +1005,7 @@ struct ToStartOfFiveMinutesImpl } static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) { @@ -1036,7 +1037,7 @@ struct ToStartOfTenMinutesImpl } static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) { @@ -1068,7 +1069,7 @@ struct ToStartOfFifteenMinutesImpl } static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) { @@ -1103,7 +1104,7 @@ struct TimeSlotImpl static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) @@ -1142,7 +1143,7 @@ struct ToStartOfHourImpl static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) @@ -1429,7 +1430,7 @@ struct ToHourImpl } static UInt8 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt8 execute(UInt16, const DateLUTImpl &) { @@ -1456,7 +1457,7 @@ struct TimezoneOffsetImpl static time_t execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static time_t execute(UInt16, const DateLUTImpl &) @@ -1482,7 +1483,7 @@ struct ToMinuteImpl } static UInt8 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt8 execute(UInt16, const DateLUTImpl &) { @@ -1507,7 +1508,7 @@ struct ToSecondImpl } static UInt8 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt8 execute(UInt16, const DateLUTImpl &) { @@ -1518,6 +1519,32 @@ struct ToSecondImpl using FactorTransform = ToStartOfMinuteImpl; }; +struct ToMillisecondImpl +{ + static constexpr auto name = "toMillisecond"; + + static UInt16 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl & time_zone) + { + return time_zone.toMillisecond(datetime64, scale_multiplier); + } + + static UInt16 execute(UInt32, const DateLUTImpl &) + { + return 0; + } + static UInt16 execute(Int32, const DateLUTImpl &) + { + throwDate32IsNotSupported(name); + } + static UInt16 execute(UInt16, const DateLUTImpl &) + { + throwDateIsNotSupported(name); + } + static constexpr bool hasPreimage() { return false; } + + using FactorTransform = ZeroTransform; +}; + struct ToISOYearImpl { static constexpr auto name = "toISOYear"; diff --git a/src/Functions/FunctionBase64Conversion.h b/src/Functions/FunctionBase64Conversion.h index de922747ccd..979c589c64b 100644 --- a/src/Functions/FunctionBase64Conversion.h +++ b/src/Functions/FunctionBase64Conversion.h @@ -100,7 +100,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_arguments{ - {"value", &isStringOrFixedString, nullptr, "String or FixedString"} + {"value", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_arguments); diff --git a/src/Functions/FunctionHelpers.h b/src/Functions/FunctionHelpers.h index 5619ebdae49..9f44d3e95c2 100644 --- a/src/Functions/FunctionHelpers.h +++ b/src/Functions/FunctionHelpers.h @@ -108,8 +108,10 @@ struct FunctionArgumentDescriptor { const char * argument_name; - std::function type_validator_func; - std::function column_validator_func; + using TypeValidator = bool (*)(const IDataType &); + TypeValidator type_validator_func; + using ColumnValidator = bool (*)(const IColumn &); + ColumnValidator column_validator_func; const char * expected_type_description; diff --git a/src/Functions/FunctionStringReplace.h b/src/Functions/FunctionStringReplace.h index 4d723a5632c..aee04a5969a 100644 --- a/src/Functions/FunctionStringReplace.h +++ b/src/Functions/FunctionStringReplace.h @@ -35,9 +35,9 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"haystack", &isStringOrFixedString, nullptr, "String or FixedString"}, - {"pattern", &isString, nullptr, "String"}, - {"replacement", &isString, nullptr, "String"} + {"haystack", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"}, + {"pattern", static_cast(&isString), nullptr, "String"}, + {"replacement", static_cast(&isString), nullptr, "String"} }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/FunctionTokens.h b/src/Functions/FunctionTokens.h index 5c4e582c637..c80152bc71d 100644 --- a/src/Functions/FunctionTokens.h +++ b/src/Functions/FunctionTokens.h @@ -74,6 +74,8 @@ public: size_t getNumberOfArguments() const override { return Generator::getNumberOfArguments(); } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return Generator::getArgumentsThatAreAlwaysConstant(); } + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { Generator::checkArguments(*this, arguments); @@ -184,12 +186,12 @@ static inline void checkArgumentsWithSeparatorAndOptionalMaxSubstrings( const IFunction & func, const ColumnsWithTypeAndName & arguments) { FunctionArgumentDescriptors mandatory_args{ - {"separator", &isString, isColumnConst, "const String"}, - {"s", &isString, nullptr, "String"} + {"separator", static_cast(&isString), isColumnConst, "const String"}, + {"s", static_cast(&isString), nullptr, "String"} }; FunctionArgumentDescriptors optional_args{ - {"max_substrings", &isNativeInteger, isColumnConst, "const Number"}, + {"max_substrings", static_cast(&isNativeInteger), isColumnConst, "const Number"}, }; validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args); @@ -198,11 +200,11 @@ static inline void checkArgumentsWithSeparatorAndOptionalMaxSubstrings( static inline void checkArgumentsWithOptionalMaxSubstrings(const IFunction & func, const ColumnsWithTypeAndName & arguments) { FunctionArgumentDescriptors mandatory_args{ - {"s", &isString, nullptr, "String"}, + {"s", static_cast(&isString), nullptr, "String"}, }; FunctionArgumentDescriptors optional_args{ - {"max_substrings", &isNativeInteger, isColumnConst, "const Number"}, + {"max_substrings", static_cast(&isNativeInteger), isColumnConst, "const Number"}, }; validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args); diff --git a/src/Functions/FunctionUnixTimestamp64.h b/src/Functions/FunctionUnixTimestamp64.h index d74237afd77..53421a565cb 100644 --- a/src/Functions/FunctionUnixTimestamp64.h +++ b/src/Functions/FunctionUnixTimestamp64.h @@ -45,7 +45,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"value", &isDateTime64, nullptr, "DateTime64"} + {"value", static_cast(&isDateTime64), nullptr, "DateTime64"} }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/FunctionsAES.h b/src/Functions/FunctionsAES.h index 4792c997f51..a03f0b602b9 100644 --- a/src/Functions/FunctionsAES.h +++ b/src/Functions/FunctionsAES.h @@ -154,21 +154,21 @@ private: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { auto optional_args = FunctionArgumentDescriptors{ - {"IV", &isStringOrFixedString, nullptr, "Initialization vector binary string"}, + {"IV", static_cast(&isStringOrFixedString), nullptr, "Initialization vector binary string"}, }; if constexpr (compatibility_mode == OpenSSLDetails::CompatibilityMode::OpenSSL) { optional_args.emplace_back(FunctionArgumentDescriptor{ - "AAD", &isStringOrFixedString, nullptr, "Additional authenticated data binary string for GCM mode" + "AAD", static_cast(&isStringOrFixedString), nullptr, "Additional authenticated data binary string for GCM mode" }); } validateFunctionArgumentTypes(*this, arguments, FunctionArgumentDescriptors{ - {"mode", &isStringOrFixedString, isColumnConst, "encryption mode string"}, - {"input", &isStringOrFixedString, {}, "plaintext"}, - {"key", &isStringOrFixedString, {}, "encryption key binary string"}, + {"mode", static_cast(&isStringOrFixedString), isColumnConst, "encryption mode string"}, + {"input", static_cast(&isStringOrFixedString), {}, "plaintext"}, + {"key", static_cast(&isStringOrFixedString), {}, "encryption key binary string"}, }, optional_args ); @@ -425,21 +425,21 @@ private: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { auto optional_args = FunctionArgumentDescriptors{ - {"IV", &isStringOrFixedString, nullptr, "Initialization vector binary string"}, + {"IV", static_cast(&isStringOrFixedString), nullptr, "Initialization vector binary string"}, }; if constexpr (compatibility_mode == OpenSSLDetails::CompatibilityMode::OpenSSL) { optional_args.emplace_back(FunctionArgumentDescriptor{ - "AAD", &isStringOrFixedString, nullptr, "Additional authenticated data binary string for GCM mode" + "AAD", static_cast(&isStringOrFixedString), nullptr, "Additional authenticated data binary string for GCM mode" }); } validateFunctionArgumentTypes(*this, arguments, FunctionArgumentDescriptors{ - {"mode", &isStringOrFixedString, isColumnConst, "decryption mode string"}, - {"input", &isStringOrFixedString, {}, "ciphertext"}, - {"key", &isStringOrFixedString, {}, "decryption key binary string"}, + {"mode", static_cast(&isStringOrFixedString), isColumnConst, "decryption mode string"}, + {"input", static_cast(&isStringOrFixedString), {}, "ciphertext"}, + {"key", static_cast(&isStringOrFixedString), {}, "decryption key binary string"}, }, optional_args ); diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 19647e2f086..1522e76893e 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -2129,12 +2129,12 @@ public: if constexpr (to_decimal) { - mandatory_args.push_back({"scale", &isNativeInteger, &isColumnConst, "const Integer"}); + mandatory_args.push_back({"scale", static_cast(&isNativeInteger), &isColumnConst, "const Integer"}); } if (!to_decimal && isDateTime64(arguments)) { - mandatory_args.push_back({"scale", &isNativeInteger, &isColumnConst, "const Integer"}); + mandatory_args.push_back({"scale", static_cast(&isNativeInteger), &isColumnConst, "const Integer"}); } // toString(DateTime or DateTime64, [timezone: String]) @@ -2150,7 +2150,7 @@ public: // toDateTime64(value, scale : Integer[, timezone: String]) || std::is_same_v) { - optional_args.push_back({"timezone", &isString, nullptr, "String"}); + optional_args.push_back({"timezone", static_cast(&isString), nullptr, "String"}); } validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); @@ -2498,11 +2498,11 @@ public: if (isDateTime64(arguments)) { validateFunctionArgumentTypes(*this, arguments, - FunctionArgumentDescriptors{{"string", &isStringOrFixedString, nullptr, "String or FixedString"}}, + FunctionArgumentDescriptors{{"string", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"}}, // optional FunctionArgumentDescriptors{ - {"precision", &isUInt8, isColumnConst, "const UInt8"}, - {"timezone", &isStringOrFixedString, isColumnConst, "const String or FixedString"}, + {"precision", static_cast(&isUInt8), isColumnConst, "const UInt8"}, + {"timezone", static_cast(&isStringOrFixedString), isColumnConst, "const String or FixedString"}, }); UInt64 scale = to_datetime64 ? DataTypeDateTime64::default_scale : 0; diff --git a/src/Functions/JSONArrayLength.cpp b/src/Functions/JSONArrayLength.cpp index a82c50360f9..84e87061398 100644 --- a/src/Functions/JSONArrayLength.cpp +++ b/src/Functions/JSONArrayLength.cpp @@ -45,7 +45,7 @@ namespace DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { auto args = FunctionArgumentDescriptors{ - {"json", &isString, nullptr, "String"}, + {"json", static_cast(&isString), nullptr, "String"}, }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/URL/URLHierarchy.cpp b/src/Functions/URL/URLHierarchy.cpp index 25c6c9ef40b..a0c78c5c1a2 100644 --- a/src/Functions/URL/URLHierarchy.cpp +++ b/src/Functions/URL/URLHierarchy.cpp @@ -24,10 +24,12 @@ public: static bool isVariadic() { return false; } static size_t getNumberOfArguments() { return 1; } + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {}; } + static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { FunctionArgumentDescriptors mandatory_args{ - {"URL", &isString, nullptr, "String"}, + {"URL", static_cast(&isString), nullptr, "String"}, }; validateFunctionArgumentTypes(func, arguments, mandatory_args); diff --git a/src/Functions/URL/URLPathHierarchy.cpp b/src/Functions/URL/URLPathHierarchy.cpp index 9a60d4cf989..8f546ef6a56 100644 --- a/src/Functions/URL/URLPathHierarchy.cpp +++ b/src/Functions/URL/URLPathHierarchy.cpp @@ -22,10 +22,12 @@ public: static bool isVariadic() { return false; } static size_t getNumberOfArguments() { return 1; } + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {}; } + static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { FunctionArgumentDescriptors mandatory_args{ - {"URL", &isString, nullptr, "String"}, + {"URL", static_cast(&isString), nullptr, "String"}, }; validateFunctionArgumentTypes(func, arguments, mandatory_args); diff --git a/src/Functions/URL/extractURLParameterNames.cpp b/src/Functions/URL/extractURLParameterNames.cpp index 08da148b43e..16ace36d39b 100644 --- a/src/Functions/URL/extractURLParameterNames.cpp +++ b/src/Functions/URL/extractURLParameterNames.cpp @@ -22,10 +22,12 @@ public: static bool isVariadic() { return false; } static size_t getNumberOfArguments() { return 1; } + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {}; } + static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { FunctionArgumentDescriptors mandatory_args{ - {"URL", &isString, nullptr, "String"}, + {"URL", static_cast(&isString), nullptr, "String"}, }; validateFunctionArgumentTypes(func, arguments, mandatory_args); diff --git a/src/Functions/URL/extractURLParameters.cpp b/src/Functions/URL/extractURLParameters.cpp index 939622dd9d1..43079834872 100644 --- a/src/Functions/URL/extractURLParameters.cpp +++ b/src/Functions/URL/extractURLParameters.cpp @@ -23,10 +23,12 @@ public: static bool isVariadic() { return false; } static size_t getNumberOfArguments() { return 1; } + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {}; } + static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { FunctionArgumentDescriptors mandatory_args{ - {"URL", &isString, nullptr, "String"}, + {"URL", static_cast(&isString), nullptr, "String"}, }; validateFunctionArgumentTypes(func, arguments, mandatory_args); diff --git a/src/Functions/alphaTokens.cpp b/src/Functions/alphaTokens.cpp index 35cacdbdbb8..35f434e7498 100644 --- a/src/Functions/alphaTokens.cpp +++ b/src/Functions/alphaTokens.cpp @@ -32,6 +32,8 @@ public: static size_t getNumberOfArguments() { return 0; } + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {1}; } + static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { checkArgumentsWithOptionalMaxSubstrings(func, arguments); diff --git a/src/Functions/array/arrayJaccardIndex.cpp b/src/Functions/array/arrayJaccardIndex.cpp index c2a4fee4845..9cb74a7aa62 100644 --- a/src/Functions/array/arrayJaccardIndex.cpp +++ b/src/Functions/array/arrayJaccardIndex.cpp @@ -84,8 +84,8 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"array_1", &isArray, nullptr, "Array"}, - {"array_2", &isArray, nullptr, "Array"}, + {"array_1", static_cast(&isArray), nullptr, "Array"}, + {"array_2", static_cast(&isArray), nullptr, "Array"}, }; validateFunctionArgumentTypes(*this, arguments, args); return std::make_shared>(); diff --git a/src/Functions/array/arrayRandomSample.cpp b/src/Functions/array/arrayRandomSample.cpp index 40344efb077..b08a73b93f3 100644 --- a/src/Functions/array/arrayRandomSample.cpp +++ b/src/Functions/array/arrayRandomSample.cpp @@ -36,8 +36,8 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"array", &isArray, nullptr, "Array"}, - {"samples", &isUInt, isColumnConst, "const UInt*"}, + {"array", static_cast(&isArray), nullptr, "Array"}, + {"samples", static_cast(&isUInt), isColumnConst, "const UInt*"}, }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/array/arrayShingles.cpp b/src/Functions/array/arrayShingles.cpp index ade1cb862f7..8932482c69c 100644 --- a/src/Functions/array/arrayShingles.cpp +++ b/src/Functions/array/arrayShingles.cpp @@ -28,8 +28,8 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"array", &isArray, nullptr, "Array"}, - {"length", &isInteger, nullptr, "Integer"} + {"array", static_cast(&isArray), nullptr, "Array"}, + {"length", static_cast(&isInteger), nullptr, "Integer"} }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/arrayStringConcat.cpp b/src/Functions/arrayStringConcat.cpp index 0194cc4871a..c186c0ca7e6 100644 --- a/src/Functions/arrayStringConcat.cpp +++ b/src/Functions/arrayStringConcat.cpp @@ -151,12 +151,12 @@ public: { FunctionArgumentDescriptors mandatory_args { - {"arr", &isArray, nullptr, "Array"}, + {"arr", static_cast(&isArray), nullptr, "Array"}, }; FunctionArgumentDescriptors optional_args { - {"separator", &isString, isColumnConst, "const String"}, + {"separator", static_cast(&isString), isColumnConst, "const String"}, }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); diff --git a/src/Functions/castOrDefault.cpp b/src/Functions/castOrDefault.cpp index 26eaf4f5613..970e6fd6f75 100644 --- a/src/Functions/castOrDefault.cpp +++ b/src/Functions/castOrDefault.cpp @@ -210,10 +210,10 @@ private: FunctionArgumentDescriptors optional_args; if constexpr (IsDataTypeDecimal) - mandatory_args.push_back({"scale", &isNativeInteger, &isColumnConst, "const Integer"}); + mandatory_args.push_back({"scale", static_cast(&isNativeInteger), &isColumnConst, "const Integer"}); if (std::is_same_v || std::is_same_v) - optional_args.push_back({"timezone", &isString, isColumnConst, "const String"}); + optional_args.push_back({"timezone", static_cast(&isString), isColumnConst, "const String"}); optional_args.push_back({"default_value", nullptr, nullptr, nullptr}); diff --git a/src/Functions/countMatches.h b/src/Functions/countMatches.h index e9880e6e93f..fbbb9d017ee 100644 --- a/src/Functions/countMatches.h +++ b/src/Functions/countMatches.h @@ -35,8 +35,8 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"haystack", &isStringOrFixedString, nullptr, "String or FixedString"}, - {"pattern", &isString, isColumnConst, "constant String"} + {"haystack", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"}, + {"pattern", static_cast(&isString), isColumnConst, "constant String"} }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/extractAll.cpp b/src/Functions/extractAll.cpp index ad49f32f769..f0c18bf79b9 100644 --- a/src/Functions/extractAll.cpp +++ b/src/Functions/extractAll.cpp @@ -50,11 +50,13 @@ public: static bool isVariadic() { return false; } static size_t getNumberOfArguments() { return 2; } + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {1}; } + static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { FunctionArgumentDescriptors mandatory_args{ - {"haystack", &isString, nullptr, "String"}, - {"pattern", &isString, isColumnConst, "const String"} + {"haystack", static_cast(&isString), nullptr, "String"}, + {"pattern", static_cast(&isString), isColumnConst, "const String"} }; validateFunctionArgumentTypes(func, arguments, mandatory_args); diff --git a/src/Functions/extractAllGroups.h b/src/Functions/extractAllGroups.h index c64c9d6ccef..ac12cad1698 100644 --- a/src/Functions/extractAllGroups.h +++ b/src/Functions/extractAllGroups.h @@ -71,8 +71,8 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"haystack", &isStringOrFixedString, nullptr, "const String or const FixedString"}, - {"needle", &isStringOrFixedString, isColumnConst, "const String or const FixedString"}, + {"haystack", static_cast(&isStringOrFixedString), nullptr, "const String or const FixedString"}, + {"needle", static_cast(&isStringOrFixedString), isColumnConst, "const String or const FixedString"}, }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/extractGroups.cpp b/src/Functions/extractGroups.cpp index e22938f8565..f62352af0bd 100644 --- a/src/Functions/extractGroups.cpp +++ b/src/Functions/extractGroups.cpp @@ -45,8 +45,8 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"haystack", &isStringOrFixedString, nullptr, "const String or const FixedString"}, - {"needle", &isStringOrFixedString, isColumnConst, "const String or const FixedString"}, + {"haystack", static_cast(&isStringOrFixedString), nullptr, "const String or const FixedString"}, + {"needle", static_cast(&isStringOrFixedString), isColumnConst, "const String or const FixedString"}, }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/formatQuery.cpp b/src/Functions/formatQuery.cpp index 2f6bc6f9903..92403d2e88e 100644 --- a/src/Functions/formatQuery.cpp +++ b/src/Functions/formatQuery.cpp @@ -54,7 +54,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"query", &isString, nullptr, "String"} + {"query", static_cast(&isString), nullptr, "String"} }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/fromDaysSinceYearZero.cpp b/src/Functions/fromDaysSinceYearZero.cpp index a21d0cc25bf..b98c587d172 100644 --- a/src/Functions/fromDaysSinceYearZero.cpp +++ b/src/Functions/fromDaysSinceYearZero.cpp @@ -52,7 +52,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - FunctionArgumentDescriptors args{{"days", &isNativeInteger, nullptr, "Integer"}}; + FunctionArgumentDescriptors args{{"days", static_cast(&isNativeInteger), nullptr, "Integer"}}; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/identity.cpp b/src/Functions/identity.cpp index 7174f1fd318..43cca76c801 100644 --- a/src/Functions/identity.cpp +++ b/src/Functions/identity.cpp @@ -9,4 +9,9 @@ REGISTER_FUNCTION(Identity) factory.registerFunction(); } +REGISTER_FUNCTION(ScalarSubqueryResult) +{ + factory.registerFunction(); +} + } diff --git a/src/Functions/identity.h b/src/Functions/identity.h index efee95841f5..c753625caa7 100644 --- a/src/Functions/identity.h +++ b/src/Functions/identity.h @@ -6,11 +6,12 @@ namespace DB { -class FunctionIdentity : public IFunction +template +class FunctionIdentityBase : public IFunction { public: - static constexpr auto name = "identity"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } + static constexpr auto name = Name::name; + static FunctionPtr create(ContextPtr) { return std::make_shared>(); } String getName() const override { return name; } size_t getNumberOfArguments() const override { return 1; } @@ -28,4 +29,17 @@ public: } }; +struct IdentityName +{ + static constexpr auto name = "identity"; +}; + +struct ScalarSubqueryResultName +{ + static constexpr auto name = "__scalarSubqueryResult"; +}; + +using FunctionIdentity = FunctionIdentityBase; +using FunctionScalarSubqueryResult = FunctionIdentityBase; + } diff --git a/src/Functions/makeDate.cpp b/src/Functions/makeDate.cpp index 987cf4eb1a9..c7f3c195578 100644 --- a/src/Functions/makeDate.cpp +++ b/src/Functions/makeDate.cpp @@ -82,17 +82,17 @@ public: if (is_year_month_variant) { FunctionArgumentDescriptors args{ - {mandatory_argument_names_year_month_day[0], &isNumber, nullptr, "Number"}, - {mandatory_argument_names_year_month_day[1], &isNumber, nullptr, "Number"}, - {mandatory_argument_names_year_month_day[2], &isNumber, nullptr, "Number"} + {mandatory_argument_names_year_month_day[0], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names_year_month_day[1], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names_year_month_day[2], static_cast(&isNumber), nullptr, "Number"} }; validateFunctionArgumentTypes(*this, arguments, args); } else { FunctionArgumentDescriptors args{ - {mandatory_argument_names_year_dayofyear[0], &isNumber, nullptr, "Number"}, - {mandatory_argument_names_year_dayofyear[1], &isNumber, nullptr, "Number"} + {mandatory_argument_names_year_dayofyear[0], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names_year_dayofyear[1], static_cast(&isNumber), nullptr, "Number"} }; validateFunctionArgumentTypes(*this, arguments, args); } @@ -189,7 +189,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {mandatory_argument_names[0], &isNumber, nullptr, "Number"} + {mandatory_argument_names[0], static_cast(&isNumber), nullptr, "Number"} }; validateFunctionArgumentTypes(*this, arguments, args); @@ -344,16 +344,16 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args{ - {mandatory_argument_names[0], &isNumber, nullptr, "Number"}, - {mandatory_argument_names[1], &isNumber, nullptr, "Number"}, - {mandatory_argument_names[2], &isNumber, nullptr, "Number"}, - {mandatory_argument_names[3], &isNumber, nullptr, "Number"}, - {mandatory_argument_names[4], &isNumber, nullptr, "Number"}, - {mandatory_argument_names[5], &isNumber, nullptr, "Number"} + {mandatory_argument_names[0], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names[1], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names[2], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names[3], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names[4], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names[5], static_cast(&isNumber), nullptr, "Number"} }; FunctionArgumentDescriptors optional_args{ - {optional_argument_names[0], &isString, isColumnConst, "const String"} + {optional_argument_names[0], static_cast(&isString), isColumnConst, "const String"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); @@ -425,18 +425,18 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args{ - {mandatory_argument_names[0], &isNumber, nullptr, "Number"}, - {mandatory_argument_names[1], &isNumber, nullptr, "Number"}, - {mandatory_argument_names[2], &isNumber, nullptr, "Number"}, - {mandatory_argument_names[3], &isNumber, nullptr, "Number"}, - {mandatory_argument_names[4], &isNumber, nullptr, "Number"}, - {mandatory_argument_names[5], &isNumber, nullptr, "Number"} + {mandatory_argument_names[0], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names[1], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names[2], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names[3], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names[4], static_cast(&isNumber), nullptr, "Number"}, + {mandatory_argument_names[5], static_cast(&isNumber), nullptr, "Number"} }; FunctionArgumentDescriptors optional_args{ - {optional_argument_names[0], &isNumber, nullptr, "const Number"}, - {optional_argument_names[1], &isNumber, isColumnConst, "const Number"}, - {optional_argument_names[2], &isString, isColumnConst, "const String"} + {optional_argument_names[0], static_cast(&isNumber), nullptr, "const Number"}, + {optional_argument_names[1], static_cast(&isNumber), isColumnConst, "const Number"}, + {optional_argument_names[2], static_cast(&isString), isColumnConst, "const String"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); @@ -564,11 +564,11 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args{ - {mandatory_argument_names[0], &isNumber, nullptr, "Number"} + {mandatory_argument_names[0], static_cast(&isNumber), nullptr, "Number"} }; FunctionArgumentDescriptors optional_args{ - {optional_argument_names[0], &isString, isColumnConst, "const String"} + {optional_argument_names[0], static_cast(&isString), isColumnConst, "const String"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); @@ -643,12 +643,12 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args{ - {mandatory_argument_names[0], &isNumber, nullptr, "Number"} + {mandatory_argument_names[0], static_cast(&isNumber), nullptr, "Number"} }; FunctionArgumentDescriptors optional_args{ - {optional_argument_names[0], &isNumber, isColumnConst, "const Number"}, - {optional_argument_names[0], &isString, isColumnConst, "const String"} + {optional_argument_names[0], static_cast(&isNumber), isColumnConst, "const Number"}, + {optional_argument_names[0], static_cast(&isString), isColumnConst, "const String"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index 862ce9909e4..81304f3afbd 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -3,12 +3,20 @@ #include #include #include +#include #include #include #include #include #include #include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -20,7 +28,7 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int NOT_IMPLEMENTED; - extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; } namespace @@ -149,6 +157,10 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count) const override { + /// Fast path when data is empty + if (input_rows_count == 0) + return result_type->createColumn(); + ColumnsWithTypeAndName arguments = args; executeShortCircuitArguments(arguments); /** We will gather values from columns in branches to result column, @@ -249,64 +261,73 @@ public: } const WhichDataType which(removeNullable(result_type)); - bool execute_multiif_columnar - = allow_execute_multiif_columnar && !contains_short && (which.isInt() || which.isUInt() || which.isFloat()); + bool execute_multiif_columnar = allow_execute_multiif_columnar && !contains_short + && instructions.size() <= std::numeric_limits::max() + && (which.isInt() || which.isUInt() || which.isFloat() || which.isDecimal() || which.isDateOrDate32OrDateTimeOrDateTime64() + || which.isEnum() || which.isIPv4() || which.isIPv6()); size_t rows = input_rows_count; if (!execute_multiif_columnar) { MutableColumnPtr res = return_type->createColumn(); + res->reserve(rows); executeInstructions(instructions, rows, res); return std::move(res); } -#define EXECUTE_INSTRUCTIONS_COLUMNAR(TYPE, INDEX) \ +#define EXECUTE_INSTRUCTIONS_COLUMNAR(TYPE, FIELD, INDEX) \ if (which.is##TYPE()) \ { \ - MutableColumnPtr res = ColumnVector::create(rows); \ - MutableColumnPtr null_map = result_type->isNullable() ? ColumnUInt8::create(rows) : nullptr; \ - executeInstructionsColumnar(instructions, rows, res, null_map, result_type->isNullable()); \ - if (!result_type->isNullable()) \ - return std::move(res); \ + MutableColumnPtr res = result_type->createColumn(); \ + if (result_type->isNullable()) \ + { \ + auto & res_nullable = assert_cast(*res); \ + auto & res_data = assert_cast &>(res_nullable.getNestedColumn()).getData(); \ + auto & res_null_map = res_nullable.getNullMapData(); \ + executeInstructionsColumnar(instructions, rows, res_data, &res_null_map); \ + } \ else \ - return ColumnNullable::create(std::move(res), std::move(null_map)); \ + { \ + auto & res_data = assert_cast &>(*res).getData(); \ + executeInstructionsColumnar(instructions, rows, res_data, nullptr); \ + } \ + return std::move(res); \ } #define ENUMERATE_NUMERIC_TYPES(M, INDEX) \ - M(UInt8, INDEX) \ - M(UInt16, INDEX) \ - M(UInt32, INDEX) \ - M(UInt64, INDEX) \ - M(Int8, INDEX) \ - M(Int16, INDEX) \ - M(Int32, INDEX) \ - M(Int64, INDEX) \ - M(UInt128, INDEX) \ - M(UInt256, INDEX) \ - M(Int128, INDEX) \ - M(Int256, INDEX) \ - M(Float32, INDEX) \ - M(Float64, INDEX) \ + M(UInt8, UInt8, INDEX) \ + M(UInt16, UInt16, INDEX) \ + M(UInt32, UInt32, INDEX) \ + M(UInt64, UInt64, INDEX) \ + M(Int8, Int8, INDEX) \ + M(Int16, Int16, INDEX) \ + M(Int32, Int32, INDEX) \ + M(Int64, Int64, INDEX) \ + M(Float32, Float32, INDEX) \ + M(Float64, Float64, INDEX) \ + M(UInt128, UInt128, INDEX) \ + M(UInt256, UInt256, INDEX) \ + M(Int128, Int128, INDEX) \ + M(Int256, Int256, INDEX) \ + M(Decimal32, Decimal32, INDEX) \ + M(Decimal64, Decimal64, INDEX) \ + M(Decimal128, Decimal128, INDEX) \ + M(Decimal256, Decimal256, INDEX) \ + M(Date, UInt16, INDEX) \ + M(Date32, Int32, INDEX) \ + M(DateTime, UInt32, INDEX) \ + M(DateTime64, DateTime64, INDEX) \ + M(Enum8, Int8, INDEX) \ + M(Enum16, Int16, INDEX) \ + M(IPv4, IPv4, INDEX) \ + M(IPv6, IPv6, INDEX) \ throw Exception( \ ErrorCodes::NOT_IMPLEMENTED, "Columnar execution of function {} not implemented for type {}", getName(), result_type->getName()); - size_t num_instructions = instructions.size(); - if (num_instructions <= std::numeric_limits::max()) - { - ENUMERATE_NUMERIC_TYPES(EXECUTE_INSTRUCTIONS_COLUMNAR, Int16) - } - else if (num_instructions <= std::numeric_limits::max()) - { - ENUMERATE_NUMERIC_TYPES(EXECUTE_INSTRUCTIONS_COLUMNAR, Int32) - } - else if (num_instructions <= std::numeric_limits::max()) - { - ENUMERATE_NUMERIC_TYPES(EXECUTE_INSTRUCTIONS_COLUMNAR, Int64) - } - else - throw Exception( - ErrorCodes::LOGICAL_ERROR, "Instruction size({}) of function {} is out of range", getName(), result_type->getName()); + ENUMERATE_NUMERIC_TYPES(EXECUTE_INSTRUCTIONS_COLUMNAR, UInt8) } +#undef ENUMERATE_NUMERIC_TYPES +#undef EXECUTE_INSTRUCTIONS_COLUMNAR private: @@ -348,11 +369,11 @@ private: /// We should read source from which instruction on each row? template - static void calculateInserts(std::vector & instructions, size_t rows, PaddedPODArray & inserts) + static NO_INLINE void calculateInserts(const std::vector & instructions, size_t rows, PaddedPODArray & inserts) { - for (S i = static_cast(instructions.size() - 1); i >= 0; --i) + for (S i = instructions.size() - 1; i != static_cast(-1); --i) { - auto & instruction = instructions[i]; + const auto & instruction = instructions[i]; if (instruction.condition_always_true) { for (size_t row_i = 0; row_i < rows; ++row_i) @@ -388,60 +409,62 @@ private: } } - template - static void executeInstructionsColumnar(std::vector & instructions, size_t rows, const MutableColumnPtr & res, const MutableColumnPtr & null_map, bool nullable) + template + static NO_INLINE void executeInstructionsColumnar( + const std::vector & instructions, + size_t rows, + PaddedPODArray & res_data, + PaddedPODArray * res_null_map = nullptr) { PaddedPODArray inserts(rows, static_cast(instructions.size())); calculateInserts(instructions, rows, inserts); - PaddedPODArray & res_data = assert_cast &>(*res).getData(); - if (!nullable) + res_data.resize_exact(rows); + if constexpr (nullable_result) { - for (size_t row_i = 0; row_i < rows; ++row_i) + if (!res_null_map) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid result null_map while result type is nullable"); + + res_null_map->resize_exact(rows); + } + + std::vector data_cols(instructions.size(), nullptr); + std::vector null_map_cols(instructions.size(), nullptr); + for (size_t i = 0; i < instructions.size(); ++i) + { + const auto & instruction = instructions[i]; + const IColumn * non_const_col = instructions[i].source_is_constant + ? &assert_cast(*instruction.source).getDataColumn() + : instruction.source.get(); + const ColumnNullable * nullable_col = checkAndGetColumn(non_const_col); + data_cols[i] = nullable_col ? assert_cast &>(nullable_col->getNestedColumn()).getData().data() + : assert_cast &>(*non_const_col).getData().data(); + null_map_cols[i] = nullable_col ? assert_cast(nullable_col->getNullMapColumn()).getData().data() : nullptr; + } + + std::unique_ptr> shared_null_map; + if constexpr (nullable_result) + { + for (auto & col : null_map_cols) { - auto & instruction = instructions[inserts[row_i]]; - auto ref = instruction.source->getDataAt(row_i); - res_data[row_i] = *reinterpret_cast(ref.data); + if (!col) + { + if (!shared_null_map) + shared_null_map = std::make_unique>(rows, 0); + + col = shared_null_map->data(); + } } } - else + + for (size_t row_i = 0; row_i < rows; ++row_i) { - PaddedPODArray & null_map_data = assert_cast(*null_map).getData(); - std::vector data_cols(instructions.size()); - std::vector null_map_cols(instructions.size()); - ColumnPtr shared_null_map_col = nullptr; - for (size_t i = 0; i < instructions.size(); ++i) - { - if (instructions[i].source->isNullable()) - { - const ColumnNullable * nullable_col; - if (!instructions[i].source_is_constant) - nullable_col = assert_cast(instructions[i].source.get()); - else - { - const ColumnPtr data_column = assert_cast(*instructions[i].source).getDataColumnPtr(); - nullable_col = assert_cast(data_column.get()); - } - null_map_cols[i] = assert_cast(*nullable_col->getNullMapColumnPtr()).getData().data(); - data_cols[i] = assert_cast &>(*nullable_col->getNestedColumnPtr()).getData().data(); - } - else - { - if (!shared_null_map_col) - { - shared_null_map_col = ColumnUInt8::create(rows, 0); - } - null_map_cols[i] = assert_cast(*shared_null_map_col).getData().data(); - data_cols[i] = assert_cast &>(*instructions[i].source).getData().data(); - } - } - for (size_t row_i = 0; row_i < rows; ++row_i) - { - auto & instruction = instructions[inserts[row_i]]; - size_t index = instruction.source_is_constant ? 0 : row_i; - res_data[row_i] = *(data_cols[inserts[row_i]] + index); - null_map_data[row_i] = *(null_map_cols[inserts[row_i]] + index); - } + S insert = inserts[row_i]; + const auto & instruction = instructions[insert]; + size_t index = instruction.source_is_constant ? 0 : row_i; + res_data[row_i] = *(data_cols[insert] + index); + if constexpr (nullable_result) + (*res_null_map)[row_i] = *(null_map_cols[insert] + index); } } diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp index 860603dc503..18882177c90 100644 --- a/src/Functions/parseDateTime.cpp +++ b/src/Functions/parseDateTime.cpp @@ -489,12 +489,12 @@ namespace DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args{ - {"time", &isString, nullptr, "String"}, - {"format", &isString, nullptr, "String"} + {"time", static_cast(&isString), nullptr, "String"}, + {"format", static_cast(&isString), nullptr, "String"} }; FunctionArgumentDescriptors optional_args{ - {"timezone", &isString, &isColumnConst, "const String"} + {"timezone", static_cast(&isString), &isColumnConst, "const String"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); diff --git a/src/Functions/regexpExtract.cpp b/src/Functions/regexpExtract.cpp index f6bbd2f96f2..cfb42580cb0 100644 --- a/src/Functions/regexpExtract.cpp +++ b/src/Functions/regexpExtract.cpp @@ -47,12 +47,12 @@ public: arguments.size()); FunctionArgumentDescriptors args{ - {"haystack", &isString, nullptr, "String"}, - {"pattern", &isString, isColumnConst, "const String"}, + {"haystack", static_cast(&isString), nullptr, "String"}, + {"pattern", static_cast(&isString), isColumnConst, "const String"}, }; if (arguments.size() == 3) - args.emplace_back(FunctionArgumentDescriptor{"index", &isInteger, nullptr, "Integer"}); + args.emplace_back(FunctionArgumentDescriptor{"index", static_cast(&isInteger), nullptr, "Integer"}); validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/repeat.cpp b/src/Functions/repeat.cpp index c1b553ac6b3..11a2ca37a3b 100644 --- a/src/Functions/repeat.cpp +++ b/src/Functions/repeat.cpp @@ -186,8 +186,8 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"s", &isString, nullptr, "String"}, - {"n", &isInteger, nullptr, "Integer"}, + {"s", static_cast(&isString), nullptr, "String"}, + {"n", static_cast(&isInteger), nullptr, "Integer"}, }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/seriesDecomposeSTL.cpp b/src/Functions/seriesDecomposeSTL.cpp index fbabc801913..618808b64ed 100644 --- a/src/Functions/seriesDecomposeSTL.cpp +++ b/src/Functions/seriesDecomposeSTL.cpp @@ -42,8 +42,8 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"time_series", &isArray, nullptr, "Array"}, - {"period", &isNativeUInt, nullptr, "Unsigned Integer"}, + {"time_series", static_cast(&isArray), nullptr, "Array"}, + {"period", static_cast(&isNativeUInt), nullptr, "Unsigned Integer"}, }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/seriesOutliersDetectTukey.cpp b/src/Functions/seriesOutliersDetectTukey.cpp index 5bc8edf3a54..da04d3b78d3 100644 --- a/src/Functions/seriesOutliersDetectTukey.cpp +++ b/src/Functions/seriesOutliersDetectTukey.cpp @@ -45,11 +45,11 @@ public: getName(), arguments.size()); - FunctionArgumentDescriptors mandatory_args{{"time_series", &isArray, nullptr, "Array"}}; + FunctionArgumentDescriptors mandatory_args{{"time_series", static_cast(&isArray), nullptr, "Array"}}; FunctionArgumentDescriptors optional_args{ - {"min_percentile", &isFloat, isColumnConst, "Number"}, - {"max_percentile", &isFloat, isColumnConst, "Number"}, - {"k", &isNativeNumber, isColumnConst, "Number"}}; + {"min_percentile", static_cast(&isFloat), isColumnConst, "Number"}, + {"max_percentile", static_cast(&isFloat), isColumnConst, "Number"}, + {"k", static_cast(&isNativeNumber), isColumnConst, "Number"}}; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); diff --git a/src/Functions/seriesPeriodDetectFFT.cpp b/src/Functions/seriesPeriodDetectFFT.cpp index c01f6b7f07b..fbaa2b14e64 100644 --- a/src/Functions/seriesPeriodDetectFFT.cpp +++ b/src/Functions/seriesPeriodDetectFFT.cpp @@ -52,7 +52,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - FunctionArgumentDescriptors args{{"time_series", &isArray, nullptr, "Array"}}; + FunctionArgumentDescriptors args{{"time_series", static_cast(&isArray), nullptr, "Array"}}; validateFunctionArgumentTypes(*this, arguments, args); return std::make_shared(); diff --git a/src/Functions/snowflake.cpp b/src/Functions/snowflake.cpp index 6aafa2cb5cf..f2dd1f1c51d 100644 --- a/src/Functions/snowflake.cpp +++ b/src/Functions/snowflake.cpp @@ -47,7 +47,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"value", &isDateTime, nullptr, "DateTime"} + {"value", static_cast(&isDateTime), nullptr, "DateTime"} }; validateFunctionArgumentTypes(*this, arguments, args); @@ -91,10 +91,10 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args{ - {"value", &isInt64, nullptr, "Int64"} + {"value", static_cast(&isInt64), nullptr, "Int64"} }; FunctionArgumentDescriptors optional_args{ - {"time_zone", &isString, nullptr, "String"} + {"time_zone", static_cast(&isString), nullptr, "String"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); @@ -151,7 +151,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"value", &isDateTime64, nullptr, "DateTime64"} + {"value", static_cast(&isDateTime64), nullptr, "DateTime64"} }; validateFunctionArgumentTypes(*this, arguments, args); @@ -203,10 +203,10 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args{ - {"value", &isInt64, nullptr, "Int64"} + {"value", static_cast(&isInt64), nullptr, "Int64"} }; FunctionArgumentDescriptors optional_args{ - {"time_zone", &isString, nullptr, "String"} + {"time_zone", static_cast(&isString), nullptr, "String"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); diff --git a/src/Functions/space.cpp b/src/Functions/space.cpp index 009bc20e065..03dc0d06719 100644 --- a/src/Functions/space.cpp +++ b/src/Functions/space.cpp @@ -45,7 +45,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"n", &isInteger, nullptr, "Integer"} + {"n", static_cast(&isInteger), nullptr, "Integer"} }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/splitByChar.cpp b/src/Functions/splitByChar.cpp index d537039dc23..d3d5dc9fe4a 100644 --- a/src/Functions/splitByChar.cpp +++ b/src/Functions/splitByChar.cpp @@ -40,6 +40,8 @@ public: static bool isVariadic() { return true; } static size_t getNumberOfArguments() { return 0; } + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {0, 2}; } + static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { checkArgumentsWithSeparatorAndOptionalMaxSubstrings(func, arguments); diff --git a/src/Functions/splitByNonAlpha.cpp b/src/Functions/splitByNonAlpha.cpp index 467e7b0b5c3..4486a33aa88 100644 --- a/src/Functions/splitByNonAlpha.cpp +++ b/src/Functions/splitByNonAlpha.cpp @@ -42,6 +42,8 @@ public: static bool isVariadic() { return true; } static size_t getNumberOfArguments() { return 0; } + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {1}; } + static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { checkArgumentsWithOptionalMaxSubstrings(func, arguments); diff --git a/src/Functions/splitByRegexp.cpp b/src/Functions/splitByRegexp.cpp index 77328205c01..430089f14ee 100644 --- a/src/Functions/splitByRegexp.cpp +++ b/src/Functions/splitByRegexp.cpp @@ -44,6 +44,8 @@ public: static bool isVariadic() { return true; } static size_t getNumberOfArguments() { return 0; } + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {0, 2}; } + static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { checkArgumentsWithSeparatorAndOptionalMaxSubstrings(func, arguments); diff --git a/src/Functions/splitByString.cpp b/src/Functions/splitByString.cpp index 7d6803b2f27..5c97f9841e7 100644 --- a/src/Functions/splitByString.cpp +++ b/src/Functions/splitByString.cpp @@ -39,6 +39,8 @@ public: static bool isVariadic() { return true; } static size_t getNumberOfArguments() { return 0; } + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {0, 2}; } + static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { checkArgumentsWithSeparatorAndOptionalMaxSubstrings(func, arguments); diff --git a/src/Functions/splitByWhitespace.cpp b/src/Functions/splitByWhitespace.cpp index 168e429c6f5..cf21a218b15 100644 --- a/src/Functions/splitByWhitespace.cpp +++ b/src/Functions/splitByWhitespace.cpp @@ -30,6 +30,8 @@ public: static bool isVariadic() { return true; } static size_t getNumberOfArguments() { return 0; } + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {1}; } + static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments) { checkArgumentsWithOptionalMaxSubstrings(func, arguments); diff --git a/src/Functions/sqid.cpp b/src/Functions/sqid.cpp index cd3875e2607..a052f20d6fa 100644 --- a/src/Functions/sqid.cpp +++ b/src/Functions/sqid.cpp @@ -98,7 +98,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {"sqid", &isString, nullptr, "String"} + {"sqid", static_cast(&isString), nullptr, "String"} }; validateFunctionArgumentTypes(*this, arguments, args); diff --git a/src/Functions/timestamp.cpp b/src/Functions/timestamp.cpp index 48012c1376f..fbca08b0968 100644 --- a/src/Functions/timestamp.cpp +++ b/src/Functions/timestamp.cpp @@ -41,10 +41,10 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args{ - {"timestamp", &isStringOrFixedString, nullptr, "String or FixedString"} + {"timestamp", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"} }; FunctionArgumentDescriptors optional_args{ - {"time", &isString, nullptr, "String"} + {"time", static_cast(&isString), nullptr, "String"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); diff --git a/src/Functions/toDecimalString.cpp b/src/Functions/toDecimalString.cpp index cc2de8df0d4..fc621b272de 100644 --- a/src/Functions/toDecimalString.cpp +++ b/src/Functions/toDecimalString.cpp @@ -39,8 +39,8 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args = { - {"Value", &isNumber, nullptr, "Number"}, - {"precision", &isNativeInteger, &isColumnConst, "const Integer"} + {"Value", static_cast(&isNumber), nullptr, "Number"}, + {"precision", static_cast(&isNativeInteger), &isColumnConst, "const Integer"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, {}); diff --git a/src/Functions/toMillisecond.cpp b/src/Functions/toMillisecond.cpp new file mode 100644 index 00000000000..aaef517c996 --- /dev/null +++ b/src/Functions/toMillisecond.cpp @@ -0,0 +1,33 @@ +#include +#include +#include +#include + +namespace DB +{ + +using FunctionToMillisecond = FunctionDateOrDateTimeToSomething; + +REGISTER_FUNCTION(ToMillisecond) +{ + factory.registerFunction( + + + FunctionDocumentation{ + .description=R"( +Returns the millisecond component (0-999) of a date with time. + )", + .syntax="toMillisecond(value)", + .arguments={{"value", "DateTime or DateTime64"}}, + .returned_value="The millisecond in the minute (0 - 59) of the given date/time", + .examples{ + {"toMillisecond", "SELECT toMillisecond(toDateTime64('2023-04-21 10:20:30.456', 3)", "456"}}, + .categories{"Dates and Times"} + } + ); + + /// MySQL compatibility alias. + factory.registerAlias("MILLISECOND", "toMillisecond", FunctionFactory::CaseInsensitive); +} + +} diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 7240679abb7..37ef217cb6d 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -64,6 +64,37 @@ std::pair getFunctionArguments(const ActionsDAG::N return { std::move(arguments), all_const }; } +bool isConstantFromScalarSubquery(const ActionsDAG::Node * node) +{ + std::stack stack; + stack.push(node); + while (!stack.empty()) + { + const auto * arg = stack.top(); + stack.pop(); + + if (arg->column && isColumnConst(*arg->column)) + continue; + + while (arg->type == ActionsDAG::ActionType::ALIAS) + arg = arg->children.at(0); + + if (arg->type != ActionsDAG::ActionType::FUNCTION) + return false; + + if (arg->function_base->getName() == "__scalarSubqueryResult") + continue; + + if (arg->children.empty() || !arg->function_base->isSuitableForConstantFolding()) + return false; + + for (const auto * child : arg->children) + stack.push(child); + } + + return true; +} + } void ActionsDAG::Node::toTree(JSONBuilder::JSONMap & map) const @@ -196,6 +227,19 @@ const ActionsDAG::Node & ActionsDAG::addFunction( { auto [arguments, all_const] = getFunctionArguments(children); + auto constant_args = function->getArgumentsThatAreAlwaysConstant(); + for (size_t pos : constant_args) + { + if (pos >= children.size()) + continue; + + if (arguments[pos].column && isColumnConst(*arguments[pos].column)) + continue; + + if (isConstantFromScalarSubquery(children[pos])) + arguments[pos].column = arguments[pos].type->createColumnConstWithDefaultValue(0); + } + auto function_base = function->build(arguments); return addFunctionImpl( function_base, @@ -1318,7 +1362,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( size_t num_result_columns = result.size(); if (mode == MatchColumnsMode::Position && num_input_columns != num_result_columns) - throw Exception(ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH, "Number of columns doesn't match"); + throw Exception(ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH, "Number of columns doesn't match (source: {} and result: {})", num_input_columns, num_result_columns); if (add_casted_columns && mode != MatchColumnsMode::Name) throw Exception(ErrorCodes::LOGICAL_ERROR, "Converting with add_casted_columns supported only for MatchColumnsMode::Name"); diff --git a/src/Interpreters/AsynchronousInsertLog.cpp b/src/Interpreters/AsynchronousInsertLog.cpp index 5d851f6b47d..0fc39c77fb4 100644 --- a/src/Interpreters/AsynchronousInsertLog.cpp +++ b/src/Interpreters/AsynchronousInsertLog.cpp @@ -33,26 +33,26 @@ ColumnsDescription AsynchronousInsertLogElement::getColumnsDescription() }); return ColumnsDescription{ - {"hostname", std::make_shared(std::make_shared())}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"event_time_microseconds", std::make_shared(6)}, + {"hostname", std::make_shared(std::make_shared()), "Hostname of the server executing the query."}, + {"event_date", std::make_shared(), "The date when the async insert happened."}, + {"event_time", std::make_shared(), "The date and time when the async insert finished execution."}, + {"event_time_microseconds", std::make_shared(6), "The date and time when the async insert finished execution with microseconds precision."}, - {"query", std::make_shared()}, - {"database", std::make_shared(std::make_shared())}, - {"table", std::make_shared(std::make_shared())}, - {"format", std::make_shared(std::make_shared())}, - {"query_id", std::make_shared()}, - {"bytes", std::make_shared()}, - {"rows", std::make_shared()}, - {"exception", std::make_shared()}, - {"status", type_status}, - {"data_kind", type_data_kind}, + {"query", std::make_shared(), "Query string."}, + {"database", std::make_shared(std::make_shared()), "The name of the database the table is in."}, + {"table", std::make_shared(std::make_shared()), "Table name."}, + {"format", std::make_shared(std::make_shared()), "Format name."}, + {"query_id", std::make_shared(), "ID of the initial query."}, + {"bytes", std::make_shared(), "Number of inserted bytes."}, + {"rows", std::make_shared(), "Number of inserted rows."}, + {"exception", std::make_shared(), "Exception message."}, + {"status", type_status, "Status of the view. Values: 'Ok' = 1 — Successful insert, 'ParsingError' = 2 — Exception when parsing the data, 'FlushError' = 3 — Exception when flushing the data"}, + {"data_kind", type_data_kind, "The status of the data. Value: 'Parsed' and 'Preprocessed'."}, - {"flush_time", std::make_shared()}, - {"flush_time_microseconds", std::make_shared(6)}, - {"flush_query_id", std::make_shared()}, - {"timeout_milliseconds", std::make_shared()}, + {"flush_time", std::make_shared(), "The date and time when the flush happened."}, + {"flush_time_microseconds", std::make_shared(6), "The date and time when the flush happened with microseconds precision."}, + {"flush_query_id", std::make_shared(), "ID of the flush query."}, + {"timeout_milliseconds", std::make_shared(), "The adaptive timeout calculated for this entry."}, }; } diff --git a/src/Interpreters/BackupLog.cpp b/src/Interpreters/BackupLog.cpp index d5b69bc0728..af6c7cf6234 100644 --- a/src/Interpreters/BackupLog.cpp +++ b/src/Interpreters/BackupLog.cpp @@ -22,24 +22,24 @@ ColumnsDescription BackupLogElement::getColumnsDescription() { return ColumnsDescription { - {"hostname", std::make_shared(std::make_shared())}, - {"event_date", std::make_shared()}, - {"event_time_microseconds", std::make_shared(6)}, - {"id", std::make_shared()}, - {"name", std::make_shared()}, - {"base_backup_name", std::make_shared()}, - {"query_id", std::make_shared()}, - {"status", std::make_shared(getBackupStatusEnumValues())}, - {"error", std::make_shared()}, - {"start_time", std::make_shared()}, - {"end_time", std::make_shared()}, - {"num_files", std::make_shared()}, - {"total_size", std::make_shared()}, - {"num_entries", std::make_shared()}, - {"uncompressed_size", std::make_shared()}, - {"compressed_size", std::make_shared()}, - {"files_read", std::make_shared()}, - {"bytes_read", std::make_shared()}, + {"hostname", std::make_shared(std::make_shared()), "Hostname of the server executing the query."}, + {"event_date", std::make_shared(), "Date of the entry."}, + {"event_time_microseconds", std::make_shared(6), "Time of the entry with microseconds precision."}, + {"id", std::make_shared(), "Identifier of the backup or restore operation."}, + {"name", std::make_shared(), "Name of the backup storage (the contents of the FROM or TO clause)."}, + {"base_backup_name", std::make_shared(), "The name of base backup in case incremental one."}, + {"query_id", std::make_shared(), "The ID of a query associated with a backup operation."}, + {"status", std::make_shared(getBackupStatusEnumValues()), "Operation status."}, + {"error", std::make_shared(), "Error message of the failed operation (empty string for successful operations)."}, + {"start_time", std::make_shared(), "Start time of the operation."}, + {"end_time", std::make_shared(), "End time of the operation."}, + {"num_files", std::make_shared(), "Number of files stored in the backup."}, + {"total_size", std::make_shared(), "Total size of files stored in the backup."}, + {"num_entries", std::make_shared(), "Number of entries in the backup, i.e. the number of files inside the folder if the backup is stored as a folder, or the number of files inside the archive if the backup is stored as an archive. It is not the same as num_files if it's an incremental backup or if it contains empty files or duplicates. The following is always true: num_entries <= num_files."}, + {"uncompressed_size", std::make_shared(), "Uncompressed size of the backup."}, + {"compressed_size", std::make_shared(), "Compressed size of the backup. If the backup is not stored as an archive it equals to uncompressed_size."}, + {"files_read", std::make_shared(), "Number of files read during the restore operation."}, + {"bytes_read", std::make_shared(), "Total size of files read during the restore operation."}, }; } diff --git a/src/Interpreters/BlobStorageLog.cpp b/src/Interpreters/BlobStorageLog.cpp index 520405374ca..f9d5b0d6790 100644 --- a/src/Interpreters/BlobStorageLog.cpp +++ b/src/Interpreters/BlobStorageLog.cpp @@ -26,23 +26,23 @@ ColumnsDescription BlobStorageLogElement::getColumnsDescription() return ColumnsDescription { - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"event_time_microseconds", std::make_shared(6)}, + {"event_date", std::make_shared(), "Date of the event."}, + {"event_time", std::make_shared(), "Time of the event."}, + {"event_time_microseconds", std::make_shared(6), "Time of the event with microseconds precision."}, - {"event_type", event_enum_type}, + {"event_type", event_enum_type, "Type of the event. Possible values: 'Upload', 'Delete', 'MultiPartUploadCreate', 'MultiPartUploadWrite', 'MultiPartUploadComplete', 'MultiPartUploadAbort'"}, - {"query_id", std::make_shared()}, - {"thread_id", std::make_shared()}, - {"thread_name", std::make_shared()}, + {"query_id", std::make_shared(), "Identifier of the query associated with the event, if any."}, + {"thread_id", std::make_shared(), "Identifier of the thread performing the operation."}, + {"thread_name", std::make_shared(), "Name of the thread performing the operation."}, - {"disk_name", std::make_shared(std::make_shared())}, - {"bucket", std::make_shared()}, - {"remote_path", std::make_shared()}, - {"local_path", std::make_shared()}, - {"data_size", std::make_shared()}, + {"disk_name", std::make_shared(std::make_shared()), "Name of the associated disk."}, + {"bucket", std::make_shared(), "Name of the bucket."}, + {"remote_path", std::make_shared(), "Path to the remote resource."}, + {"local_path", std::make_shared(), "Path to the metadata file on the local system, which references the remote resource."}, + {"data_size", std::make_shared(), "Size of the data involved in the upload event."}, - {"error", std::make_shared()}, + {"error", std::make_shared(), "Error message associated with the event, if any."}, }; } diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index d242544f787..9c705ddc27c 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -188,6 +188,11 @@ CacheGuard::Lock FileCache::lockCache() const return cache_guard.lock(); } +CacheGuard::Lock FileCache::tryLockCache() const +{ + return cache_guard.tryLock(); +} + FileSegments FileCache::getImpl(const LockedKey & locked_key, const FileSegment::Range & range, size_t file_segments_limit) const { /// Given range = [left, right] and non-overlapping ordered set of file segments, diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index 2de2f347999..5b665ad0271 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -173,6 +173,7 @@ public: void deactivateBackgroundOperations(); CacheGuard::Lock lockCache() const; + CacheGuard::Lock tryLockCache() const; std::vector sync(); diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 8bd89465917..e58402dac03 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -23,6 +23,7 @@ namespace ProfileEvents extern const Event FileSegmentWriteMicroseconds; extern const Event FileSegmentUseMicroseconds; extern const Event FileSegmentHolderCompleteMicroseconds; + extern const Event FileSegmentFailToIncreasePriority; extern const Event FilesystemCacheHoldFileSegments; extern const Event FilesystemCacheUnusedHoldFileSegments; } @@ -965,8 +966,10 @@ void FileSegment::increasePriority() auto it = getQueueIterator(); if (it) { - auto cache_lock = cache->lockCache(); - hits_count = it->increasePriority(cache_lock); + if (auto cache_lock = cache->tryLockCache()) + hits_count = it->increasePriority(cache_lock); + else + ProfileEvents::increment(ProfileEvents::FileSegmentFailToIncreasePriority); } } diff --git a/src/Interpreters/Cache/Guards.h b/src/Interpreters/Cache/Guards.h index 09586b55c61..5729620d82f 100644 --- a/src/Interpreters/Cache/Guards.h +++ b/src/Interpreters/Cache/Guards.h @@ -65,10 +65,12 @@ struct CacheGuard : private boost::noncopyable /// so, we wouldn't be able to pass CacheGuard::Lock to a function which accepts KeyGuard::Lock, for example struct Lock : public std::unique_lock { - explicit Lock(std::mutex & mutex_) : std::unique_lock(mutex_) {} + using Base = std::unique_lock; + using Base::Base; }; Lock lock() { return Lock(mutex); } + Lock tryLock() { return Lock(mutex, std::try_to_lock); } std::mutex mutex; }; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index cdeaa46cff2..a81392cb3d8 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -3270,7 +3270,7 @@ bool checkZooKeeperConfigIsLocal(const Poco::Util::AbstractConfiguration & confi if (startsWith(key, "node")) { String host = config.getString(config_name + "." + key + ".host"); - if (isLocalAddress(DNSResolver::instance().resolveHost(host))) + if (isLocalAddress(DNSResolver::instance().resolveHostAllInOriginOrder(host).front())) return true; } } diff --git a/src/Interpreters/CrashLog.cpp b/src/Interpreters/CrashLog.cpp index 4fb81e4bcf7..410ea922429 100644 --- a/src/Interpreters/CrashLog.cpp +++ b/src/Interpreters/CrashLog.cpp @@ -23,18 +23,18 @@ ColumnsDescription CrashLogElement::getColumnsDescription() { return ColumnsDescription { - {"hostname", std::make_shared(std::make_shared())}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"timestamp_ns", std::make_shared()}, - {"signal", std::make_shared()}, - {"thread_id", std::make_shared()}, - {"query_id", std::make_shared()}, - {"trace", std::make_shared(std::make_shared())}, - {"trace_full", std::make_shared(std::make_shared())}, - {"version", std::make_shared()}, - {"revision", std::make_shared()}, - {"build_id", std::make_shared()}, + {"hostname", std::make_shared(std::make_shared()), "The hostname where the crash occurred."}, + {"event_date", std::make_shared(), "The date of the crash."}, + {"event_time", std::make_shared(), "The time of the crash."}, + {"timestamp_ns", std::make_shared(), "Timestamp of the event with nanoseconds."}, + {"signal", std::make_shared(), "Signal number."}, + {"thread_id", std::make_shared(), "Thread ID."}, + {"query_id", std::make_shared(), "Query ID."}, + {"trace", std::make_shared(std::make_shared()), "Stack trace at the moment of crash. Each element is a virtual memory address inside ClickHouse server process."}, + {"trace_full", std::make_shared(std::make_shared()), "Stack trace at the moment of crash. Each element contains a called method inside ClickHouse server process."}, + {"version", std::make_shared(), "ClickHouse server version."}, + {"revision", std::make_shared(), "ClickHouse server revision."}, + {"build_id", std::make_shared(), "BuildID that is generated by compiler."}, }; } diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 77a022e066b..0cf138c14f6 100644 --- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -281,7 +281,7 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr if (data.only_analyze) { ast->as()->alias.clear(); - auto func = makeASTFunction("identity", std::move(ast)); + auto func = makeASTFunction("__scalarSubqueryResult", std::move(ast)); func->alias = subquery_alias; func->prefer_alias_to_column_name = prefer_alias_to_column_name; ast = std::move(func); diff --git a/src/Interpreters/FilesystemCacheLog.cpp b/src/Interpreters/FilesystemCacheLog.cpp index ccfee49a66f..80fe1c3a8ef 100644 --- a/src/Interpreters/FilesystemCacheLog.cpp +++ b/src/Interpreters/FilesystemCacheLog.cpp @@ -38,20 +38,20 @@ ColumnsDescription FilesystemCacheLogElement::getColumnsDescription() return ColumnsDescription { - {"hostname", std::make_shared(std::make_shared())}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"query_id", std::make_shared()}, - {"source_file_path", std::make_shared()}, - {"file_segment_range", std::make_shared(types)}, - {"total_requested_range", std::make_shared(types)}, - {"key", std::make_shared()}, - {"offset", std::make_shared()}, - {"size", std::make_shared()}, - {"read_type", std::make_shared()}, - {"read_from_cache_attempted", std::make_shared()}, - {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared())}, - {"read_buffer_id", std::make_shared()}, + {"hostname", std::make_shared(std::make_shared()), "Hostname"}, + {"event_date", std::make_shared(), "Event date"}, + {"event_time", std::make_shared(), "Event time"}, + {"query_id", std::make_shared(), "Id of the query"}, + {"source_file_path", std::make_shared(), "File segment path on filesystem"}, + {"file_segment_range", std::make_shared(types), "File segment range"}, + {"total_requested_range", std::make_shared(types), "Full read range"}, + {"key", std::make_shared(), "File segment key"}, + {"offset", std::make_shared(), "File segment offset"}, + {"size", std::make_shared(), "Read size"}, + {"read_type", std::make_shared(), "Read type: READ_FROM_CACHE, READ_FROM_FS_AND_DOWNLOADED_TO_CACHE, READ_FROM_FS_BYPASSING_CACHE"}, + {"read_from_cache_attempted", std::make_shared(), "Whether reading from cache was attempted"}, + {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared()), "Profile events collected while reading this file segment"}, + {"read_buffer_id", std::make_shared(), "Internal implementation read buffer id"}, }; } diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index c53e54573c5..64b6eb5dce9 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -216,7 +216,7 @@ private: if (enable_parallel_processing_of_joins) { /// We don't enable parallel replicas for IN (subquery) - if (ast->as()) + if (!settings.parallel_replicas_allow_in_with_subquery && ast->as()) { if (settings.allow_experimental_parallel_reading_from_replicas == 1) { diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 0047ea1bc78..d05d8b8deb1 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -38,7 +38,6 @@ #include #include #include -#include #include #include @@ -812,24 +811,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti } else { - /** To get valid sample block we need to prepare query without only_analyze, because we need to execute scalar - * subqueries. Otherwise functions that expect only constant arguments will throw error during query analysis, - * because the result of scalar subquery is not a constant. - * - * Example: - * CREATE MATERIALIZED VIEW test_mv ENGINE=MergeTree ORDER BY arr - * AS - * WITH (SELECT '\d[a-z]') AS constant_value - * SELECT extractAll(concat(toString(number), 'a'), assumeNotNull(constant_value)) AS arr - * FROM test_table; - * - * For new analyzer this issue does not exists because we always execute scalar subqueries. - * We can improve this in new analyzer, and execute scalar subqueries only in contexts when we expect constant - * for example: LIMIT, OFFSET, functions parameters, functions constant only arguments. - */ - - InterpreterSelectWithUnionQuery interpreter(create.select->clone(), getContext(), SelectQueryOptions()); - as_select_sample = interpreter.getSampleBlock(); + as_select_sample = InterpreterSelectWithUnionQuery::getSampleBlock(create.select->clone(), getContext()); } properties.columns = ColumnsDescription(as_select_sample.getNamesAndTypesList()); @@ -894,24 +876,6 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Column {} already exists", backQuoteIfNeed(column.name)); } - /// Check if _row_exists for lightweight delete column in column_lists for merge tree family. - if (create.storage && create.storage->engine && endsWith(create.storage->engine->name, "MergeTree")) - { - auto search = all_columns.find(LightweightDeleteDescription::FILTER_COLUMN.name); - if (search != all_columns.end()) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Cannot create table with column '{}' for *MergeTree engines because it " - "is reserved for lightweight delete feature", - LightweightDeleteDescription::FILTER_COLUMN.name); - - auto search_block_number = all_columns.find(BlockNumberColumn::name); - if (search_block_number != all_columns.end()) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Cannot create table with column '{}' for *MergeTree engines because it " - "is reserved for storing block number", - BlockNumberColumn::name); - } - const auto & settings = getContext()->getSettingsRef(); /// If it's not attach and not materialized view to existing table, @@ -924,9 +888,23 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat } } +void validateVirtualColumns(const IStorage & storage) +{ + auto virtual_columns = storage.getVirtualsPtr(); + for (const auto & storage_column : storage.getInMemoryMetadataPtr()->getColumns()) + { + if (virtual_columns->tryGet(storage_column.name, VirtualsKind::Persistent)) + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Cannot create table with column '{}' for {} engines because it is reserved for persistent virtual column", + storage_column.name, storage.getName()); + } + } +} + namespace { - void checkTemporaryTableEngineName(const String& name) + void checkTemporaryTableEngineName(const String & name) { if (name.starts_with("Replicated") || name.starts_with("Shared") || name == "KeeperMap") throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with Replicated, Shared or KeeperMap table engines"); @@ -1246,7 +1224,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) { input_block = InterpreterSelectWithUnionQuery(create.select->clone(), getContext(), - {}).getSampleBlock(); + SelectQueryOptions().analyze()).getSampleBlock(); } Block output_block = to_table->getInMemoryMetadataPtr()->getSampleBlock(); @@ -1509,6 +1487,16 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, addColumnsDescriptionToCreateQueryIfNecessary(query_ptr->as(), res); } + validateVirtualColumns(*res); + + if (!res->supportsDynamicSubcolumns() && hasDynamicSubcolumns(res->getInMemoryMetadataPtr()->getColumns())) + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Cannot create table with column of type Object, " + "because storage {} doesn't support dynamic subcolumns", + res->getName()); + } + if (!create.attach && getContext()->getSettingsRef().database_replicated_allow_only_replicated_engine) { bool is_replicated_storage = typeid_cast(res.get()) != nullptr; @@ -1558,14 +1546,6 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, /// we can safely destroy the object without a call to "shutdown", because there is guarantee /// that no background threads/similar resources remain after exception from "startup". - if (!res->supportsDynamicSubcolumns() && hasDynamicSubcolumns(res->getInMemoryMetadataPtr()->getColumns())) - { - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Cannot create table with column of type Object, " - "because storage {} doesn't support dynamic subcolumns", - res->getName()); - } - res->startup(); return true; } diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index 5c13a1145d1..97ae9649ae8 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -15,7 +15,6 @@ #include #include #include -#include namespace DB diff --git a/src/Interpreters/InterpreterDescribeQuery.cpp b/src/Interpreters/InterpreterDescribeQuery.cpp index 1aab72afcc1..04d44e34fff 100644 --- a/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/src/Interpreters/InterpreterDescribeQuery.cpp @@ -123,28 +123,29 @@ BlockIO InterpreterDescribeQuery::execute() void InterpreterDescribeQuery::fillColumnsFromSubquery(const ASTTableExpression & table_expression) { - NamesAndTypesList names_and_types; + Block sample_block; auto select_query = table_expression.subquery->children.at(0); auto current_context = getContext(); if (settings.allow_experimental_analyzer) { SelectQueryOptions select_query_options; - names_and_types = InterpreterSelectQueryAnalyzer(select_query, current_context, select_query_options).getSampleBlock().getNamesAndTypesList(); + sample_block = InterpreterSelectQueryAnalyzer(select_query, current_context, select_query_options).getSampleBlock(); } else { - names_and_types = InterpreterSelectWithUnionQuery::getSampleBlock(select_query, current_context).getNamesAndTypesList(); + sample_block = InterpreterSelectWithUnionQuery::getSampleBlock(select_query, current_context); } - for (auto && [name, type] : names_and_types) - columns.emplace_back(std::move(name), std::move(type)); + for (auto && column : sample_block) + columns.emplace_back(std::move(column.name), std::move(column.type)); } void InterpreterDescribeQuery::fillColumnsFromTableFunction(const ASTTableExpression & table_expression) { auto current_context = getContext(); TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression.table_function, current_context); + auto column_descriptions = table_function_ptr->getActualTableStructure(getContext(), /*is_insert_query*/ true); for (const auto & column : column_descriptions) columns.emplace_back(column); @@ -154,14 +155,16 @@ void InterpreterDescribeQuery::fillColumnsFromTableFunction(const ASTTableExpres auto table = table_function_ptr->execute(table_expression.table_function, getContext(), table_function_ptr->getName()); if (table) { - for (const auto & column : table->getVirtuals()) + auto virtuals = table->getVirtualsPtr(); + for (const auto & column : *virtuals) { if (!column_descriptions.has(column.name)) - virtual_columns.emplace_back(column.name, column.type); + virtual_columns.push_back(column); } } } } + void InterpreterDescribeQuery::fillColumnsFromTable(const ASTTableExpression & table_expression) { auto table_id = getContext()->resolveStorageID(table_expression.database_and_table_name); @@ -176,10 +179,11 @@ void InterpreterDescribeQuery::fillColumnsFromTable(const ASTTableExpression & t if (settings.describe_include_virtual_columns) { - for (const auto & column : table->getVirtuals()) + auto virtuals = table->getVirtualsPtr(); + for (const auto & column : *virtuals) { if (!column_descriptions.has(column.name)) - virtual_columns.emplace_back(column.name, column.type); + virtual_columns.push_back(column); } } diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 5680857ed3d..70f9e0c51da 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -136,7 +136,7 @@ Block InterpreterInsertQuery::getSampleBlock( if (auto * window_view = dynamic_cast(table.get())) return window_view->getInputHeader(); else if (no_destination) - return metadata_snapshot->getSampleBlockWithVirtuals(table->getVirtuals()); + return metadata_snapshot->getSampleBlockWithVirtuals(table->getVirtualsList()); else return metadata_snapshot->getSampleBlockNonMaterialized(); } diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp index 4897101d80b..922f4a99b4a 100644 --- a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp +++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp @@ -103,7 +103,7 @@ QueryTreeNodePtr buildQueryTreeAndRunPasses(const ASTPtr & query, auto query_tree = buildQueryTree(query, context); QueryTreePassManager query_tree_pass_manager(context); - addQueryTreePasses(query_tree_pass_manager); + addQueryTreePasses(query_tree_pass_manager, select_query_options.only_analyze); /// We should not apply any query tree level optimizations on shards /// because it can lead to a changed header. diff --git a/src/Interpreters/InterpreterShowColumnsQuery.cpp b/src/Interpreters/InterpreterShowColumnsQuery.cpp index 149ba6d7575..f32ebceaa63 100644 --- a/src/Interpreters/InterpreterShowColumnsQuery.cpp +++ b/src/Interpreters/InterpreterShowColumnsQuery.cpp @@ -107,7 +107,7 @@ SELECT '' AS extra )"; // TODO Interpret query.extended. It is supposed to show internal/virtual columns. Need to fetch virtual column names, see - // IStorage::getVirtuals(). We can't easily do that via SQL. + // IStorage::getVirtualsList(). We can't easily do that via SQL. if (query.full) { diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp index 9be8bf178a1..49693332280 100644 --- a/src/Interpreters/JoinedTables.cpp +++ b/src/Interpreters/JoinedTables.cpp @@ -272,7 +272,7 @@ void JoinedTables::makeFakeTable(StoragePtr storage, const StorageMetadataPtr & auto & table = tables_with_columns.back(); table.addHiddenColumns(storage_columns.getMaterialized()); table.addHiddenColumns(storage_columns.getAliases()); - table.addHiddenColumns(storage->getVirtuals()); + table.addHiddenColumns(storage->getVirtualsList()); } else tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, source_header.getNamesAndTypesList()); diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index a3d1b84fdc1..3b1a499255b 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include @@ -265,7 +264,7 @@ MutationCommand createCommandToApplyDeletedMask(const MutationCommand & command) alter_command->partition = alter_command->children.emplace_back(command.partition).get(); auto row_exists_predicate = makeASTFunction("equals", - std::make_shared(LightweightDeleteDescription::FILTER_COLUMN.name), + std::make_shared(RowExistsColumn::name), std::make_shared(Field(0))); if (command.predicate) @@ -350,7 +349,8 @@ bool MutationsInterpreter::Source::isCompactPart() const static Names getAvailableColumnsWithVirtuals(StorageMetadataPtr metadata_snapshot, const IStorage & storage) { auto all_columns = metadata_snapshot->getColumns().getNamesOfPhysical(); - for (const auto & column : storage.getVirtuals()) + auto virtuals = storage.getVirtualsPtr(); + for (const auto & column : *virtuals) all_columns.push_back(column.name); return all_columns; } @@ -435,60 +435,54 @@ static NameSet getKeyColumns(const MutationsInterpreter::Source & source, const static void validateUpdateColumns( const MutationsInterpreter::Source & source, - const StorageMetadataPtr & metadata_snapshot, const NameSet & updated_columns, - const std::unordered_map & column_to_affected_materialized) + const StorageMetadataPtr & metadata_snapshot, + const NameSet & updated_columns, + const std::unordered_map & column_to_affected_materialized, + const ContextPtr & context) { + auto storage_snapshot = source.getStorageSnapshot(metadata_snapshot, context); NameSet key_columns = getKeyColumns(source, metadata_snapshot); - for (const String & column_name : updated_columns) + const auto & storage_columns = storage_snapshot->metadata->getColumns(); + const auto & virtual_columns = *storage_snapshot->virtual_columns; + + for (const auto & column_name : updated_columns) { - auto found = false; - for (const auto & col : metadata_snapshot->getColumns().getOrdinary()) - { - if (col.name == column_name) - { - found = true; - break; - } - } - - /// Allow to override value of lightweight delete filter virtual column - if (!found && column_name == LightweightDeleteDescription::FILTER_COLUMN.name) - { - if (!source.supportsLightweightDelete()) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Lightweight delete is not supported for table"); - found = true; - } - - /// Dont allow to override value of block number virtual column - if (!found && column_name == BlockNumberColumn::name) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Update is not supported for virtual column {} ", backQuote(column_name)); - } - - if (!found) - { - for (const auto & col : metadata_snapshot->getColumns().getMaterialized()) - { - if (col.name == column_name) - throw Exception(ErrorCodes::CANNOT_UPDATE_COLUMN, "Cannot UPDATE materialized column {}", backQuote(column_name)); - } - - throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column {} in table", backQuote(column_name)); - } - if (key_columns.contains(column_name)) throw Exception(ErrorCodes::CANNOT_UPDATE_COLUMN, "Cannot UPDATE key column {}", backQuote(column_name)); + if (storage_columns.tryGetColumn(GetColumnsOptions::Materialized, column_name)) + throw Exception(ErrorCodes::CANNOT_UPDATE_COLUMN, "Cannot UPDATE materialized column {}", backQuote(column_name)); + auto materialized_it = column_to_affected_materialized.find(column_name); if (materialized_it != column_to_affected_materialized.end()) { - for (const String & materialized : materialized_it->second) + for (const auto & materialized : materialized_it->second) { if (key_columns.contains(materialized)) + { throw Exception(ErrorCodes::CANNOT_UPDATE_COLUMN, "Updated column {} affects MATERIALIZED column {}, which is a key column. " "Cannot UPDATE it.", backQuote(column_name), backQuote(materialized)); + } + } + } + + if (!storage_columns.tryGetColumn(GetColumnsOptions::Ordinary, column_name)) + { + /// Allow to override value of lightweight delete filter virtual column + if (column_name == RowExistsColumn::name) + { + if (!source.supportsLightweightDelete()) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Lightweight delete is not supported for table"); + } + else if (virtual_columns.tryGet(column_name)) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Update is not supported for virtual column {} ", backQuote(column_name)); + } + else + { + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column {} in table", backQuote(column_name)); } } } @@ -546,8 +540,8 @@ void MutationsInterpreter::prepare(bool dry_run) /// Add _row_exists column if it is physically present in the part if (source.hasLightweightDeleteMask()) { - all_columns.push_back({LightweightDeleteDescription::FILTER_COLUMN}); - available_columns_set.insert(LightweightDeleteDescription::FILTER_COLUMN.name); + all_columns.emplace_back(RowExistsColumn::name, RowExistsColumn::type); + available_columns_set.insert(RowExistsColumn::name); } NameSet updated_columns; @@ -563,9 +557,7 @@ void MutationsInterpreter::prepare(bool dry_run) for (const auto & [name, _] : command.column_to_update_expression) { - if (!available_columns_set.contains(name) - && name != LightweightDeleteDescription::FILTER_COLUMN.name - && name != BlockNumberColumn::name) + if (!available_columns_set.contains(name) && name != RowExistsColumn::name) throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Column {} is updated but not requested to read", name); @@ -590,7 +582,7 @@ void MutationsInterpreter::prepare(bool dry_run) } } - validateUpdateColumns(source, metadata_snapshot, updated_columns, column_to_affected_materialized); + validateUpdateColumns(source, metadata_snapshot, updated_columns, column_to_affected_materialized, context); } StorageInMemoryMetadata::HasDependencyCallback has_dependency = @@ -666,15 +658,11 @@ void MutationsInterpreter::prepare(bool dry_run) { type = physical_column->type; } - else if (column_name == LightweightDeleteDescription::FILTER_COLUMN.name) + else if (column_name == RowExistsColumn::name) { - type = LightweightDeleteDescription::FILTER_COLUMN.type; + type = RowExistsColumn::type; deleted_mask_updated = true; } - else if (column_name == BlockNumberColumn::name) - { - type = BlockNumberColumn::type; - } else { throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown column {}", column_name); @@ -1028,7 +1016,7 @@ void MutationsInterpreter::prepareMutationStages(std::vector & prepared_s /// Add _row_exists column if it is present in the part if (source.hasLightweightDeleteMask() || deleted_mask_updated) - all_columns.push_back(LightweightDeleteDescription::FILTER_COLUMN); + all_columns.emplace_back(RowExistsColumn::name, RowExistsColumn::type); bool has_filters = false; /// Next, for each stage calculate columns changed by this and previous stages. @@ -1038,7 +1026,7 @@ void MutationsInterpreter::prepareMutationStages(std::vector & prepared_s { for (const auto & column : all_columns) { - if (column.name == LightweightDeleteDescription::FILTER_COLUMN.name && !deleted_mask_updated) + if (column.name == RowExistsColumn::name && !deleted_mask_updated) continue; prepared_stages[i].output_columns.insert(column.name); @@ -1057,7 +1045,7 @@ void MutationsInterpreter::prepareMutationStages(std::vector & prepared_s /// and so it is not in the list of AllPhysical columns. for (const auto & [column_name, _] : prepared_stages[i].column_to_updated) { - if (column_name == LightweightDeleteDescription::FILTER_COLUMN.name && has_filters && !deleted_mask_updated) + if (column_name == RowExistsColumn::name && has_filters && !deleted_mask_updated) continue; prepared_stages[i].output_columns.insert(column_name); @@ -1148,93 +1136,6 @@ void MutationsInterpreter::prepareMutationStages(std::vector & prepared_s } } -/// This structure re-implements adding virtual columns while reading from MergeTree part. -/// It would be good to unify it with IMergeTreeSelectAlgorithm. -struct VirtualColumns -{ - struct ColumnAndPosition - { - ColumnWithTypeAndName column; - size_t position; - }; - - using Columns = std::vector; - - Columns virtuals; - Names columns_to_read; - - VirtualColumns(Names required_columns, const MergeTreeData::DataPartPtr & part) : columns_to_read(std::move(required_columns)) - { - for (size_t i = 0; i < columns_to_read.size(); ++i) - { - if (columns_to_read[i] == LightweightDeleteDescription::FILTER_COLUMN.name) - { - if (!part->getColumns().contains(LightweightDeleteDescription::FILTER_COLUMN.name)) - { - ColumnWithTypeAndName mask_column; - mask_column.type = LightweightDeleteDescription::FILTER_COLUMN.type; - mask_column.column = mask_column.type->createColumnConst(0, 1); - mask_column.name = std::move(columns_to_read[i]); - - virtuals.emplace_back(ColumnAndPosition{.column = std::move(mask_column), .position = i}); - } - } - else if (columns_to_read[i] == "_partition_id") - { - ColumnWithTypeAndName column; - column.type = std::make_shared(); - column.column = column.type->createColumnConst(0, part->info.partition_id); - column.name = std::move(columns_to_read[i]); - - virtuals.emplace_back(ColumnAndPosition{.column = std::move(column), .position = i}); - } - else if (columns_to_read[i] == BlockNumberColumn::name) - { - if (!part->getColumns().contains(BlockNumberColumn::name)) - { - ColumnWithTypeAndName block_number_column; - block_number_column.type = BlockNumberColumn::type; - block_number_column.column = block_number_column.type->createColumnConst(0, part->info.min_block); - block_number_column.name = std::move(columns_to_read[i]); - - virtuals.emplace_back(ColumnAndPosition{.column = std::move(block_number_column), .position = i}); - } - } - } - - if (!virtuals.empty()) - { - Names columns_no_virtuals; - columns_no_virtuals.reserve(columns_to_read.size()); - size_t next_virtual = 0; - for (size_t i = 0; i < columns_to_read.size(); ++i) - { - if (next_virtual < virtuals.size() && i == virtuals[next_virtual].position) - ++next_virtual; - else - columns_no_virtuals.emplace_back(std::move(columns_to_read[i])); - } - - columns_to_read.swap(columns_no_virtuals); - } - } - - void addVirtuals(QueryPlan & plan) - { - auto dag = std::make_unique(plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); - - for (auto & column : virtuals) - { - const auto & adding_const = dag->addColumn(std::move(column.column)); - auto & outputs = dag->getOutputs(); - outputs.insert(outputs.begin() + column.position, &adding_const); - } - - auto step = std::make_unique(plan.getCurrentDataStream(), std::move(dag)); - plan.addStep(std::move(step)); - } -}; - void MutationsInterpreter::Source::read( Stage & first_stage, QueryPlan & plan, @@ -1277,16 +1178,12 @@ void MutationsInterpreter::Source::read( filter = ActionsDAG::buildFilterActionsDAG(nodes); } - VirtualColumns virtual_columns(std::move(required_columns), part); - createReadFromPartStep( MergeTreeSequentialSourceType::Mutation, - plan, *data, storage_snapshot, part, - std::move(virtual_columns.columns_to_read), + plan, *data, storage_snapshot, + part, required_columns, apply_deleted_mask_, filter, context_, getLogger("MutationsInterpreter")); - - virtual_columns.addVirtuals(plan); } else { diff --git a/src/Interpreters/OpenTelemetrySpanLog.cpp b/src/Interpreters/OpenTelemetrySpanLog.cpp index fffc1e50da0..aa11749f8a6 100644 --- a/src/Interpreters/OpenTelemetrySpanLog.cpp +++ b/src/Interpreters/OpenTelemetrySpanLog.cpp @@ -32,12 +32,17 @@ ColumnsDescription OpenTelemetrySpanLogElement::getColumnsDescription() return ColumnsDescription { - {"hostname", low_cardinality_string}, - {"trace_id", std::make_shared()}, - {"span_id", std::make_shared()}, - {"parent_span_id", std::make_shared()}, - {"operation_name", low_cardinality_string}, - {"kind", std::move(span_kind_type)}, + {"hostname", low_cardinality_string, "The hostname where this span was captured."}, + {"trace_id", std::make_shared(), "ID of the trace for executed query."}, + {"span_id", std::make_shared(), "ID of the trace span."}, + {"parent_span_id", std::make_shared(), "ID of the parent trace span."}, + {"operation_name", low_cardinality_string, "The name of the operation."}, + {"kind", std::move(span_kind_type), "The SpanKind of the span. " + "INTERNAL — Indicates that the span represents an internal operation within an application. " + "SERVER — Indicates that the span covers server-side handling of a synchronous RPC or other remote request. " + "CLIENT — Indicates that the span describes a request to some remote service. " + "PRODUCER — Indicates that the span describes the initiators of an asynchronous request. This parent span will often end before the corresponding child CONSUMER span, possibly even before the child span starts. " + "CONSUMER - Indicates that the span describes a child of an asynchronous PRODUCER request."}, // DateTime64 is really unwieldy -- there is no "normal" way to convert // it to an UInt64 count of microseconds, except: // 1) reinterpretAsUInt64(reinterpretAsFixedString(date)), which just @@ -48,10 +53,10 @@ ColumnsDescription OpenTelemetrySpanLogElement::getColumnsDescription() // Also subtraction of two DateTime64 points doesn't work, so you can't // get duration. // It is much less hassle to just use UInt64 of microseconds. - {"start_time_us", std::make_shared()}, - {"finish_time_us", std::make_shared()}, - {"finish_date", std::make_shared()}, - {"attribute", std::make_shared(low_cardinality_string, std::make_shared())}, + {"start_time_us", std::make_shared(), "The start time of the trace span (in microseconds)."}, + {"finish_time_us", std::make_shared(), "The finish time of the trace span (in microseconds)."}, + {"finish_date", std::make_shared(), "The finish date of the trace span."}, + {"attribute", std::make_shared(low_cardinality_string, std::make_shared()), "Attribute depending on the trace span. They are filled in according to the recommendations in the OpenTelemetry standard."}, }; } diff --git a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp index 13b6311a877..f3504f3f403 100644 --- a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp +++ b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp @@ -53,7 +53,7 @@ static bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & v } } } - else if (function->name == "toUInt8" || function->name == "toInt8" || function->name == "identity") + else if (function->name == "toUInt8" || function->name == "toInt8" || function->name == "identity" || function->name == "__scalarSubqueryResult") { if (const auto * expr_list = function->arguments->as()) { diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp index a7f20a06785..66f933f1afa 100644 --- a/src/Interpreters/PartLog.cpp +++ b/src/Interpreters/PartLog.cpp @@ -123,7 +123,7 @@ ColumnsDescription PartLogElement::getColumnsDescription() {"table_uuid", std::make_shared(), "UUID of the table the data part belongs to."}, {"part_name", std::make_shared(), "Name of the data part."}, {"partition_id", std::make_shared(), "ID of the partition that the data part was inserted to. The column takes the `all` value if the partitioning is by `tuple()`."}, - {"partition", std::make_shared()}, + {"partition", std::make_shared(), "The partition name."}, {"part_type", std::make_shared(), "The type of the part. Possible values: Wide and Compact."}, {"disk_name", std::make_shared(), "The disk name data part lies on."}, {"path_on_disk", std::make_shared(), "Absolute path to the folder with data part files."}, diff --git a/src/Interpreters/ProcessorsProfileLog.cpp b/src/Interpreters/ProcessorsProfileLog.cpp index 088d193257c..015b4abc712 100644 --- a/src/Interpreters/ProcessorsProfileLog.cpp +++ b/src/Interpreters/ProcessorsProfileLog.cpp @@ -21,26 +21,26 @@ ColumnsDescription ProcessorProfileLogElement::getColumnsDescription() { return ColumnsDescription { - {"hostname", std::make_shared(std::make_shared())}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"event_time_microseconds", std::make_shared(6)}, + {"hostname", std::make_shared(std::make_shared()), "Hostname of the server executing the query."}, + {"event_date", std::make_shared(), "The date when the event happened."}, + {"event_time", std::make_shared(), "The date and time when the event happened."}, + {"event_time_microseconds", std::make_shared(6), "The date and time with microseconds precision when the event happened."}, - {"id", std::make_shared()}, - {"parent_ids", std::make_shared(std::make_shared())}, - {"plan_step", std::make_shared()}, - {"plan_group", std::make_shared()}, + {"id", std::make_shared(), "ID of processor."}, + {"parent_ids", std::make_shared(std::make_shared()), "Parent processors IDs."}, + {"plan_step", std::make_shared(), "ID of the query plan step which created this processor. The value is zero if the processor was not added from any step."}, + {"plan_group", std::make_shared(), "Group of the processor if it was created by query plan step. A group is a logical partitioning of processors added from the same query plan step. Group is used only for beautifying the result of EXPLAIN PIPELINE result."}, - {"initial_query_id", std::make_shared()}, - {"query_id", std::make_shared()}, - {"name", std::make_shared(std::make_shared())}, - {"elapsed_us", std::make_shared()}, - {"input_wait_elapsed_us", std::make_shared()}, - {"output_wait_elapsed_us", std::make_shared()}, - {"input_rows", std::make_shared()}, - {"input_bytes", std::make_shared()}, - {"output_rows", std::make_shared()}, - {"output_bytes", std::make_shared()}, + {"initial_query_id", std::make_shared(), "ID of the initial query (for distributed query execution)."}, + {"query_id", std::make_shared(), "ID of the query."}, + {"name", std::make_shared(std::make_shared()), "Name of the processor."}, + {"elapsed_us", std::make_shared(), "Number of microseconds this processor was executed."}, + {"input_wait_elapsed_us", std::make_shared(), "Number of microseconds this processor was waiting for data (from other processor)."}, + {"output_wait_elapsed_us", std::make_shared(), "Number of microseconds this processor was waiting because output port was full."}, + {"input_rows", std::make_shared(), "The number of rows consumed by processor."}, + {"input_bytes", std::make_shared(), "The number of bytes consumed by processor."}, + {"output_rows", std::make_shared(), "The number of rows generated by processor."}, + {"output_bytes", std::make_shared(), "The number of bytes generated by processor."}, }; } diff --git a/src/Interpreters/QueryLog.cpp b/src/Interpreters/QueryLog.cpp index ad6e344655b..92f8ddae141 100644 --- a/src/Interpreters/QueryLog.cpp +++ b/src/Interpreters/QueryLog.cpp @@ -134,13 +134,13 @@ ColumnsDescription QueryLogElement::getColumnsDescription() {"used_storages", array_low_cardinality_string, "Canonical names of storages, which were used during query execution."}, {"used_table_functions", array_low_cardinality_string, "Canonical names of table functions, which were used during query execution."}, - {"used_row_policies", array_low_cardinality_string}, + {"used_row_policies", array_low_cardinality_string, "The list of row policies names that were used during query execution."}, - {"transaction_id", getTransactionIDDataType()}, + {"transaction_id", getTransactionIDDataType(), "The identifier of the transaction in scope of which this query was executed."}, {"query_cache_usage", std::move(query_cache_usage_datatype), "Usage of the query cache during query execution. Values: 'Unknown' = Status unknown, 'None' = The query result was neither written into nor read from the query cache, 'Write' = The query result was written into the query cache, 'Read' = The query result was read from the query cache."}, - {"asynchronous_read_counters", std::make_shared(low_cardinality_string, std::make_shared())}, + {"asynchronous_read_counters", std::make_shared(low_cardinality_string, std::make_shared()), "Metrics for asynchronous reading."}, }; } diff --git a/src/Interpreters/QueryThreadLog.cpp b/src/Interpreters/QueryThreadLog.cpp index d153e30a4ce..f50458745b9 100644 --- a/src/Interpreters/QueryThreadLog.cpp +++ b/src/Interpreters/QueryThreadLog.cpp @@ -28,58 +28,58 @@ ColumnsDescription QueryThreadLogElement::getColumnsDescription() return ColumnsDescription { - {"hostname", low_cardinality_string}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"event_time_microseconds", std::make_shared(6)}, - {"query_start_time", std::make_shared()}, - {"query_start_time_microseconds", std::make_shared(6)}, - {"query_duration_ms", std::make_shared()}, + {"hostname", low_cardinality_string, "Hostname of the server executing the query."}, + {"event_date", std::make_shared(), "The date when the thread has finished execution of the query."}, + {"event_time", std::make_shared(), "The date and time when the thread has finished execution of the query."}, + {"event_time_microseconds", std::make_shared(6), "The date and time when the thread has finished execution of the query with microseconds precision."}, + {"query_start_time", std::make_shared(), "Start time of query execution."}, + {"query_start_time_microseconds", std::make_shared(6), "Start time of query execution with microsecond precision."}, + {"query_duration_ms", std::make_shared(), "Duration of query execution."}, - {"read_rows", std::make_shared()}, - {"read_bytes", std::make_shared()}, - {"written_rows", std::make_shared()}, - {"written_bytes", std::make_shared()}, - {"memory_usage", std::make_shared()}, - {"peak_memory_usage", std::make_shared()}, + {"read_rows", std::make_shared(), "Number of read rows."}, + {"read_bytes", std::make_shared(), "Number of read bytes."}, + {"written_rows", std::make_shared(), "For INSERT queries, the number of written rows. For other queries, the column value is 0."}, + {"written_bytes", std::make_shared(), "For INSERT queries, the number of written bytes. For other queries, the column value is 0."}, + {"memory_usage", std::make_shared(), "The difference between the amount of allocated and freed memory in context of this thread."}, + {"peak_memory_usage", std::make_shared(), "The maximum difference between the amount of allocated and freed memory in context of this thread."}, - {"thread_name", low_cardinality_string}, - {"thread_id", std::make_shared()}, - {"master_thread_id", std::make_shared()}, - {"current_database", low_cardinality_string}, - {"query", std::make_shared()}, - {"normalized_query_hash", std::make_shared()}, + {"thread_name", low_cardinality_string, "Name of the thread."}, + {"thread_id", std::make_shared(), "Internal thread ID."}, + {"master_thread_id", std::make_shared(), "OS initial ID of initial thread."}, + {"current_database", low_cardinality_string, "Name of the current database."}, + {"query", std::make_shared(), "Query string."}, + {"normalized_query_hash", std::make_shared(), "The hash of normalized query - with wiped constanstans, etc."}, - {"is_initial_query", std::make_shared()}, - {"user", low_cardinality_string}, - {"query_id", std::make_shared()}, - {"address", DataTypeFactory::instance().get("IPv6")}, - {"port", std::make_shared()}, - {"initial_user", low_cardinality_string}, - {"initial_query_id", std::make_shared()}, - {"initial_address", DataTypeFactory::instance().get("IPv6")}, - {"initial_port", std::make_shared()}, - {"initial_query_start_time", std::make_shared()}, - {"initial_query_start_time_microseconds", std::make_shared(6)}, - {"interface", std::make_shared()}, - {"is_secure", std::make_shared()}, - {"os_user", low_cardinality_string}, - {"client_hostname", low_cardinality_string}, - {"client_name", low_cardinality_string}, - {"client_revision", std::make_shared()}, - {"client_version_major", std::make_shared()}, - {"client_version_minor", std::make_shared()}, - {"client_version_patch", std::make_shared()}, - {"http_method", std::make_shared()}, - {"http_user_agent", low_cardinality_string}, - {"http_referer", std::make_shared()}, - {"forwarded_for", std::make_shared()}, - {"quota_key", std::make_shared()}, - {"distributed_depth", std::make_shared()}, + {"is_initial_query", std::make_shared(), "Query type. Possible values: 1 — Query was initiated by the client, 0 — Query was initiated by another query for distributed query execution."}, + {"user", low_cardinality_string, "Name of the user who initiated the current query."}, + {"query_id", std::make_shared(), "ID of the query."}, + {"address", DataTypeFactory::instance().get("IPv6"), "IP address that was used to make the query."}, + {"port", std::make_shared(), "The client port that was used to make the query."}, + {"initial_user", low_cardinality_string, "Name of the user who ran the initial query (for distributed query execution)."}, + {"initial_query_id", std::make_shared(), "ID of the initial query (for distributed query execution)."}, + {"initial_address", DataTypeFactory::instance().get("IPv6"), "IP address that the parent query was launched from."}, + {"initial_port", std::make_shared(), "The client port that was used to make the parent query."}, + {"initial_query_start_time", std::make_shared(), "Start time of the initial query execution."}, + {"initial_query_start_time_microseconds", std::make_shared(6), "Start time of the initial query execution "}, + {"interface", std::make_shared(), "Interface that the query was initiated from. Possible values: 1 — TCP, 2 — HTTP."}, + {"is_secure", std::make_shared(), "The flag which shows whether the connection was secure."}, + {"os_user", low_cardinality_string, "OSs username who runs clickhouse-client."}, + {"client_hostname", low_cardinality_string, "Hostname of the client machine where the clickhouse-client or another TCP client is run."}, + {"client_name", low_cardinality_string, "The clickhouse-client or another TCP client name."}, + {"client_revision", std::make_shared(), "Revision of the clickhouse-client or another TCP client."}, + {"client_version_major", std::make_shared(), "Major version of the clickhouse-client or another TCP client."}, + {"client_version_minor", std::make_shared(), "Minor version of the clickhouse-client or another TCP client."}, + {"client_version_patch", std::make_shared(), "Patch component of the clickhouse-client or another TCP client version."}, + {"http_method", std::make_shared(), "HTTP method that initiated the query. Possible values: 0 — The query was launched from the TCP interface, 1 — GET method was used., 2 — POST method was used."}, + {"http_user_agent", low_cardinality_string, "The UserAgent header passed in the HTTP request."}, + {"http_referer", std::make_shared(), "HTTP header `Referer` passed in the HTTP query (contains an absolute or partial address of the page making the query)."}, + {"forwarded_for", std::make_shared(), "HTTP header `X-Forwarded-For` passed in the HTTP query."}, + {"quota_key", std::make_shared(), "The 'quota key' specified in the quotas setting."}, + {"distributed_depth", std::make_shared(), "How many times a query was forwarded between servers."}, - {"revision", std::make_shared()}, + {"revision", std::make_shared(), "ClickHouse revision."}, - {"ProfileEvents", std::make_shared(low_cardinality_string, std::make_shared())}, + {"ProfileEvents", std::make_shared(low_cardinality_string, std::make_shared()), "ProfileEvents that measure different metrics for this thread. The description of them could be found in the table system.events."}, }; } diff --git a/src/Interpreters/QueryViewsLog.cpp b/src/Interpreters/QueryViewsLog.cpp index c426f2d3cf0..a5441363340 100644 --- a/src/Interpreters/QueryViewsLog.cpp +++ b/src/Interpreters/QueryViewsLog.cpp @@ -35,30 +35,34 @@ ColumnsDescription QueryViewsLogElement::getColumnsDescription() return ColumnsDescription { - {"hostname", std::make_shared(std::make_shared())}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"event_time_microseconds", std::make_shared(6)}, - {"view_duration_ms", std::make_shared()}, + {"hostname", std::make_shared(std::make_shared()), "Hostname of the server executing the query."}, + {"event_date", std::make_shared(), "The date when the last event of the view happened."}, + {"event_time", std::make_shared(), "The date and time when the view finished execution."}, + {"event_time_microseconds", std::make_shared(6), "The date and time when the view finished execution with microseconds precision."}, + {"view_duration_ms", std::make_shared(), "Duration of view execution (sum of its stages) in milliseconds."}, - {"initial_query_id", std::make_shared()}, - {"view_name", std::make_shared()}, - {"view_uuid", std::make_shared()}, - {"view_type", std::move(view_type_datatype)}, - {"view_query", std::make_shared()}, - {"view_target", std::make_shared()}, + {"initial_query_id", std::make_shared(), "ID of the initial query (for distributed query execution)."}, + {"view_name", std::make_shared(), "Name of the view."}, + {"view_uuid", std::make_shared(), "UUID of the view."}, + {"view_type", std::move(view_type_datatype), "Type of the view. Values: 'Default' = 1 — Default views. Should not appear in this log, 'Materialized' = 2 — Materialized views, 'Live' = 3 — Live views."}, + {"view_query", std::make_shared(), "The query executed by the view."}, + {"view_target", std::make_shared(), "The name of the view target table."}, - {"read_rows", std::make_shared()}, - {"read_bytes", std::make_shared()}, - {"written_rows", std::make_shared()}, - {"written_bytes", std::make_shared()}, - {"peak_memory_usage", std::make_shared()}, - {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared())}, + {"read_rows", std::make_shared(), "Number of read rows."}, + {"read_bytes", std::make_shared(), "Number of read bytes."}, + {"written_rows", std::make_shared(), "Number of written rows."}, + {"written_bytes", std::make_shared(), "Number of written bytes."}, + {"peak_memory_usage", std::make_shared(), "The maximum difference between the amount of allocated and freed memory in context of this view."}, + {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared()), "ProfileEvents that measure different metrics. The description of them could be found in the table system.events."}, - {"status", std::move(view_status_datatype)}, - {"exception_code", std::make_shared()}, - {"exception", std::make_shared()}, - {"stack_trace", std::make_shared()} + {"status", std::move(view_status_datatype), "Status of the view. Values: " + "'QueryStart' = 1 — Successful start the view execution. Should not appear, " + "'QueryFinish' = 2 — Successful end of the view execution, " + "'ExceptionBeforeStart' = 3 — Exception before the start of the view execution., " + "'ExceptionWhileProcessing' = 4 — Exception during the view execution."}, + {"exception_code", std::make_shared(), "Code of an exception."}, + {"exception", std::make_shared(), "Exception message."}, + {"stack_trace", std::make_shared(), "Stack trace. An empty string, if the query was completed successfully."} }; } diff --git a/src/Interpreters/S3QueueLog.cpp b/src/Interpreters/S3QueueLog.cpp index 3ed58de0f87..ba990a8ac25 100644 --- a/src/Interpreters/S3QueueLog.cpp +++ b/src/Interpreters/S3QueueLog.cpp @@ -25,19 +25,19 @@ ColumnsDescription S3QueueLogElement::getColumnsDescription() return ColumnsDescription { - {"hostname", std::make_shared(std::make_shared())}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"uuid", std::make_shared()}, - {"file_name", std::make_shared()}, - {"rows_processed", std::make_shared()}, - {"status", status_datatype}, - {"processing_start_time", std::make_shared(std::make_shared())}, - {"processing_end_time", std::make_shared(std::make_shared())}, - {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared())}, - {"exception", std::make_shared()}, + {"hostname", std::make_shared(std::make_shared()), "Hostname"}, + {"event_date", std::make_shared(), "Event date of writing this log row"}, + {"event_time", std::make_shared(), "Event time of writing this log row"}, + {"database", std::make_shared(), "The name of a database where current S3Queue table lives."}, + {"table", std::make_shared(), "The name of S3Queue table."}, + {"uuid", std::make_shared(), "The UUID of S3Queue table"}, + {"file_name", std::make_shared(), "File name of the processing file"}, + {"rows_processed", std::make_shared(), "Number of processed rows"}, + {"status", status_datatype, "Status of the processing file"}, + {"processing_start_time", std::make_shared(std::make_shared()), "Time of the start of processing the file"}, + {"processing_end_time", std::make_shared(std::make_shared()), "Time of the end of processing the file"}, + {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared()), "Profile events collected while loading this file"}, + {"exception", std::make_shared(), "Exception message if happened"}, }; } diff --git a/src/Interpreters/SessionLog.cpp b/src/Interpreters/SessionLog.cpp index a5bc5012292..dc0ac963d0b 100644 --- a/src/Interpreters/SessionLog.cpp +++ b/src/Interpreters/SessionLog.cpp @@ -121,33 +121,36 @@ ColumnsDescription SessionLogElement::getColumnsDescription() return ColumnsDescription { - {"hostname", lc_string_datatype}, - {"type", std::move(event_type)}, - {"auth_id", std::make_shared()}, - {"session_id", std::make_shared()}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"event_time_microseconds", std::make_shared(6)}, + {"hostname", lc_string_datatype, "Hostname of the server executing the query."}, + {"type", std::move(event_type), "Login/logout result. Possible values: " + "LoginFailure — Login error. " + "LoginSuccess — Successful login. " + "Logout — Logout from the system."}, + {"auth_id", std::make_shared(), "Authentication ID, which is a UUID that is automatically generated each time user logins."}, + {"session_id", std::make_shared(), "Session ID that is passed by client via HTTP interface."}, + {"event_date", std::make_shared(), "Login/logout date."}, + {"event_time", std::make_shared(), "Login/logout time."}, + {"event_time_microseconds", std::make_shared(6), "Login/logout starting time with microseconds precision."}, - {"user", std::make_shared(std::make_shared())}, - {"auth_type", std::make_shared(std::move(identified_with_column))}, + {"user", std::make_shared(std::make_shared()), "User name."}, + {"auth_type", std::make_shared(std::move(identified_with_column)), "The authentication type."}, - {"profiles", std::make_shared(lc_string_datatype)}, - {"roles", std::make_shared(lc_string_datatype)}, - {"settings", std::move(settings_type_column)}, + {"profiles", std::make_shared(lc_string_datatype), "The list of profiles set for all roles and/or users."}, + {"roles", std::make_shared(lc_string_datatype), "The list of roles to which the profile is applied."}, + {"settings", std::move(settings_type_column), "Settings that were changed when the client logged in/out."}, - {"client_address", DataTypeFactory::instance().get("IPv6")}, - {"client_port", std::make_shared()}, - {"interface", std::move(interface_type_column)}, + {"client_address", DataTypeFactory::instance().get("IPv6"), "The IP address that was used to log in/out."}, + {"client_port", std::make_shared(), "The client port that was used to log in/out."}, + {"interface", std::move(interface_type_column), "The interface from which the login was initiated."}, - {"client_hostname", std::make_shared()}, - {"client_name", std::make_shared()}, - {"client_revision", std::make_shared()}, - {"client_version_major", std::make_shared()}, - {"client_version_minor", std::make_shared()}, - {"client_version_patch", std::make_shared()}, + {"client_hostname", std::make_shared(), "The hostname of the client machine where the clickhouse-client or another TCP client is run."}, + {"client_name", std::make_shared(), "The clickhouse-client or another TCP client name."}, + {"client_revision", std::make_shared(), "Revision of the clickhouse-client or another TCP client."}, + {"client_version_major", std::make_shared(), "The major version of the clickhouse-client or another TCP client."}, + {"client_version_minor", std::make_shared(), "The minor version of the clickhouse-client or another TCP client."}, + {"client_version_patch", std::make_shared(), "Patch component of the clickhouse-client or another TCP client version."}, - {"failure_reason", std::make_shared()}, + {"failure_reason", std::make_shared(), "The exception message containing the reason for the login/logout failure."}, }; } diff --git a/src/Interpreters/TraceLog.cpp b/src/Interpreters/TraceLog.cpp index 26adb0cfc3f..01bedf34f15 100644 --- a/src/Interpreters/TraceLog.cpp +++ b/src/Interpreters/TraceLog.cpp @@ -29,20 +29,27 @@ ColumnsDescription TraceLogElement::getColumnsDescription() { return ColumnsDescription { - {"hostname", std::make_shared(std::make_shared())}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared()}, - {"event_time_microseconds", std::make_shared(6)}, - {"timestamp_ns", std::make_shared()}, - {"revision", std::make_shared()}, - {"trace_type", std::make_shared(trace_values)}, - {"thread_id", std::make_shared()}, - {"query_id", std::make_shared()}, - {"trace", std::make_shared(std::make_shared())}, - {"size", std::make_shared()}, - {"ptr", std::make_shared()}, - {"event", std::make_shared(std::make_shared())}, - {"increment", std::make_shared()}, + {"hostname", std::make_shared(std::make_shared()), "Hostname of the server executing the query."}, + {"event_date", std::make_shared(), "Date of sampling moment."}, + {"event_time", std::make_shared(), "Timestamp of the sampling moment."}, + {"event_time_microseconds", std::make_shared(6), "Timestamp of the sampling moment with microseconds precision."}, + {"timestamp_ns", std::make_shared(), "Timestamp of the sampling moment in nanoseconds."}, + {"revision", std::make_shared(), "ClickHouse server build revision."}, + {"trace_type", std::make_shared(trace_values), "Trace type: " + "`Real` represents collecting stack traces by wall-clock time. " + "`CPU` represents collecting stack traces by CPU time. " + "`Memory` represents collecting allocations and deallocations when memory allocation exceeds the subsequent watermark. " + "`MemorySample` represents collecting random allocations and deallocations. " + "`MemoryPeak` represents collecting updates of peak memory usage. " + "`ProfileEvent` represents collecting of increments of profile events." + }, + {"thread_id", std::make_shared(), "Thread identifier."}, + {"query_id", std::make_shared(), "Query identifier that can be used to get details about a query that was running from the query_log system table."}, + {"trace", std::make_shared(std::make_shared()), "Stack trace at the moment of sampling. Each element is a virtual memory address inside ClickHouse server process."}, + {"size", std::make_shared(), "For trace types Memory, MemorySample or MemoryPeak is the amount of memory allocated, for other trace types is 0."}, + {"ptr", std::make_shared(), "The address of the allocated chunk."}, + {"event", std::make_shared(std::make_shared()), "For trace type ProfileEvent is the name of updated profile event, for other trace types is an empty string."}, + {"increment", std::make_shared(), "For trace type ProfileEvent is the amount of increment of profile event, for other trace types is 0."}, }; } diff --git a/src/Interpreters/TransactionsInfoLog.cpp b/src/Interpreters/TransactionsInfoLog.cpp index 4a413439671..d13b31518d2 100644 --- a/src/Interpreters/TransactionsInfoLog.cpp +++ b/src/Interpreters/TransactionsInfoLog.cpp @@ -34,22 +34,22 @@ ColumnsDescription TransactionsInfoLogElement::getColumnsDescription() return ColumnsDescription { - {"hostname", std::make_shared(std::make_shared())}, - {"type", std::move(type_enum)}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared(6)}, - {"thread_id", std::make_shared()}, + {"hostname", std::make_shared(std::make_shared()), "The hostname where transaction was executed."}, + {"type", std::move(type_enum), "The type of the transaction. Possible values: Begin, Commit, Rollback, AddPart, LockPart, UnlockPart."}, + {"event_date", std::make_shared(), "Date of the entry."}, + {"event_time", std::make_shared(6), "Time of the entry"}, + {"thread_id", std::make_shared(), "The identifier of a thread."}, /// which thread? - {"query_id", std::make_shared()}, - {"tid", getTransactionIDDataType()}, - {"tid_hash", std::make_shared()}, + {"query_id", std::make_shared(), "The ID of a query executed in a scope of transaction."}, + {"tid", getTransactionIDDataType(), "The identifier of a transaction."}, + {"tid_hash", std::make_shared(), "The hash of the identifier."}, - {"csn", std::make_shared()}, + {"csn", std::make_shared(), "The Commit Sequence Number"}, - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"uuid", std::make_shared()}, - {"part", std::make_shared()}, + {"database", std::make_shared(), "The name of the database the transaction was executed against."}, + {"table", std::make_shared(), "The name of the table the transaction was executed against."}, + {"uuid", std::make_shared(), "The uuid of the table the transaction was executed against."}, + {"part", std::make_shared(), "The name of the part participated in the transaction."}, // ? }; } diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 960fad5dec6..5588fc55a64 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -56,6 +56,7 @@ #include #include #include +#include #include #include @@ -990,8 +991,7 @@ void TreeRewriterResult::collectSourceColumns(bool add_special) { auto options = GetColumnsOptions(add_special ? GetColumnsOptions::All : GetColumnsOptions::AllPhysical); options.withExtendedObjects(); - if (storage->supportsSubcolumns()) - options.withSubcolumns(); + options.withSubcolumns(storage->supportsSubcolumns()); auto columns_from_storage = storage_snapshot->getColumns(options); @@ -1001,8 +1001,7 @@ void TreeRewriterResult::collectSourceColumns(bool add_special) source_columns.insert(source_columns.end(), columns_from_storage.begin(), columns_from_storage.end()); auto metadata_snapshot = storage->getInMemoryMetadataPtr(); - auto metadata_column_descriptions = metadata_snapshot->getColumns(); - source_columns_ordinary = metadata_column_descriptions.getOrdinary(); + source_columns_ordinary = metadata_snapshot->getColumns().getOrdinary(); } source_columns_set = removeDuplicateColumns(source_columns); @@ -1109,16 +1108,16 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select const auto & partition_desc = storage_snapshot->metadata->getPartitionKey(); if (partition_desc.expression) { - auto partition_source_columns = partition_desc.expression->getRequiredColumns(); - partition_source_columns.push_back("_part"); - partition_source_columns.push_back("_partition_id"); - partition_source_columns.push_back("_part_uuid"); - partition_source_columns.push_back("_partition_value"); + auto partition_columns = partition_desc.expression->getRequiredColumns(); + NameSet partition_columns_set(partition_columns.begin(), partition_columns.end()); + + const auto & parititon_virtuals = MergeTreeData::virtuals_useful_for_filter; + partition_columns_set.insert(parititon_virtuals.begin(), parititon_virtuals.end()); + optimize_trivial_count = true; for (const auto & required_column : required) { - if (std::find(partition_source_columns.begin(), partition_source_columns.end(), required_column) - == partition_source_columns.end()) + if (!partition_columns_set.contains(required_column)) { optimize_trivial_count = false; break; @@ -1129,7 +1128,7 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select NameSet unknown_required_source_columns = required; - for (NamesAndTypesList::iterator it = source_columns.begin(); it != source_columns.end();) + for (auto it = source_columns.begin(); it != source_columns.end();) { const String & column_name = it->name; unknown_required_source_columns.erase(column_name); @@ -1143,32 +1142,23 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select has_virtual_shard_num = false; /// If there are virtual columns among the unknown columns. Remove them from the list of unknown and add /// in columns list, so that when further processing they are also considered. - if (storage) + if (storage_snapshot) { - const auto storage_virtuals = storage->getVirtuals(); + const auto & virtuals = storage_snapshot->virtual_columns; for (auto it = unknown_required_source_columns.begin(); it != unknown_required_source_columns.end();) { - auto column = storage_virtuals.tryGetByName(*it); - if (column) + if (auto column = virtuals->tryGet(*it)) { source_columns.push_back(*column); it = unknown_required_source_columns.erase(it); } else - ++it; - } - - if (is_remote_storage) - { - for (const auto & name_type : storage_virtuals) { - if (name_type.name == "_shard_num" && storage->isVirtualColumn("_shard_num", storage_snapshot->getMetadataForQuery())) - { - has_virtual_shard_num = true; - break; - } + ++it; } } + + has_virtual_shard_num = is_remote_storage && storage->isVirtualColumn("_shard_num", storage_snapshot->getMetadataForQuery()) && virtuals->has("_shard_num"); } /// Collect missed object subcolumns diff --git a/src/Interpreters/ZooKeeperLog.cpp b/src/Interpreters/ZooKeeperLog.cpp index 9cc31edfe56..6f6d4568064 100644 --- a/src/Interpreters/ZooKeeperLog.cpp +++ b/src/Interpreters/ZooKeeperLog.cpp @@ -122,49 +122,49 @@ ColumnsDescription ZooKeeperLogElement::getColumnsDescription() return ColumnsDescription { - {"hostname", std::make_shared(std::make_shared())}, - {"type", std::move(type_enum)}, - {"event_date", std::make_shared()}, - {"event_time", std::make_shared(6)}, - {"thread_id", std::make_shared()}, - {"query_id", std::make_shared()}, - {"address", DataTypeFactory::instance().get("IPv6")}, - {"port", std::make_shared()}, - {"session_id", std::make_shared()}, - {"duration_ms", std::make_shared()}, + {"hostname", std::make_shared(std::make_shared()), "Hostname of the server executing the query."}, + {"type", std::move(type_enum), "Event type in the ZooKeeper client. Can have one of the following values: Request — The request has been sent, Response — The response was received, Finalize — The connection is lost, no response was received."}, + {"event_date", std::make_shared(), "The date when the event happened."}, + {"event_time", std::make_shared(6), "The date and time when the event happened."}, + {"thread_id", std::make_shared(), "The ID of the thread executed this request."}, + {"query_id", std::make_shared(), "The ID of a query in scope of which this request was executed."}, + {"address", DataTypeFactory::instance().get("IPv6"), "IP address of ZooKeeper server that was used to make the request."}, + {"port", std::make_shared(), "The port of ZooKeeper server that was used to make the request."}, + {"session_id", std::make_shared(), "The session ID that the ZooKeeper server sets for each connection."}, + {"duration_ms", std::make_shared(), "The time taken by ZooKeeper to execute the request."}, - {"xid", std::make_shared()}, - {"has_watch", std::make_shared()}, - {"op_num", op_num_enum}, - {"path", std::make_shared()}, + {"xid", std::make_shared(), "The ID of the request within the session. This is usually a sequential request number. It is the same for the request row and the paired response/finalize row."}, + {"has_watch", std::make_shared(), "The request whether the watch has been set."}, + {"op_num", op_num_enum, "The type of request or response."}, + {"path", std::make_shared(), "The path to the ZooKeeper node specified in the request, or an empty string if the request not requires specifying a path."}, - {"data", std::make_shared()}, + {"data", std::make_shared(), "The data written to the ZooKeeper node (for the SET and CREATE requests — what the request wanted to write, for the response to the GET request — what was read) or an empty string."}, - {"is_ephemeral", std::make_shared()}, - {"is_sequential", std::make_shared()}, + {"is_ephemeral", std::make_shared(), "Is the ZooKeeper node being created as an ephemeral."}, + {"is_sequential", std::make_shared(), "Is the ZooKeeper node being created as an sequential."}, - {"version", std::make_shared(std::make_shared())}, + {"version", std::make_shared(std::make_shared()), "The version of the ZooKeeper node that the request expects when executing. This is supported for CHECK, SET, REMOVE requests (is relevant -1 if the request does not check the version or NULL for other requests that do not support version checking)."}, - {"requests_size", std::make_shared()}, - {"request_idx", std::make_shared()}, + {"requests_size", std::make_shared(), "The number of requests included in the multi request (this is a special request that consists of several consecutive ordinary requests and executes them atomically). All requests included in multi request will have the same xid."}, + {"request_idx", std::make_shared(), "The number of the request included in multi request (for multi request — 0, then in order from 1)."}, - {"zxid", std::make_shared()}, - {"error", std::make_shared(error_enum)}, + {"zxid", std::make_shared(), "ZooKeeper transaction ID. The serial number issued by the ZooKeeper server in response to a successfully executed request (0 if the request was not executed/returned an error/the client does not know whether the request was executed)."}, + {"error", std::make_shared(error_enum), "Error code. Can have many values, here are just some of them: ZOK — The request was executed successfully, ZCONNECTIONLOSS — The connection was lost, ZOPERATIONTIMEOUT — The request execution timeout has expired, ZSESSIONEXPIRED — The session has expired, NULL — The request is completed."}, - {"watch_type", std::make_shared(watch_type_enum)}, - {"watch_state", std::make_shared(watch_state_enum)}, + {"watch_type", std::make_shared(watch_type_enum), "The type of the watch event (for responses with op_num = Watch), for the remaining responses: NULL."}, + {"watch_state", std::make_shared(watch_state_enum), "The status of the watch event (for responses with op_num = Watch), for the remaining responses: NULL."}, - {"path_created", std::make_shared()}, + {"path_created", std::make_shared(), "The path to the created ZooKeeper node (for responses to the CREATE request), may differ from the path if the node is created as a sequential."}, - {"stat_czxid", std::make_shared()}, - {"stat_mzxid", std::make_shared()}, - {"stat_pzxid", std::make_shared()}, - {"stat_version", std::make_shared()}, - {"stat_cversion", std::make_shared()}, - {"stat_dataLength", std::make_shared()}, - {"stat_numChildren", std::make_shared()}, + {"stat_czxid", std::make_shared(), "The zxid of the change that caused this ZooKeeper node to be created."}, + {"stat_mzxid", std::make_shared(), "The zxid of the change that last modified this ZooKeeper node."}, + {"stat_pzxid", std::make_shared(), "The transaction ID of the change that last modified children of this ZooKeeper node."}, + {"stat_version", std::make_shared(), "The number of changes to the data of this ZooKeeper node."}, + {"stat_cversion", std::make_shared(), "The number of changes to the children of this ZooKeeper node."}, + {"stat_dataLength", std::make_shared(), "The length of the data field of this ZooKeeper node."}, + {"stat_numChildren", std::make_shared(), "The number of children of this ZooKeeper node."}, - {"children", std::make_shared(std::make_shared())}, + {"children", std::make_shared(std::make_shared()), "The list of child ZooKeeper nodes (for responses to LIST request)."}, }; } diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index 6122ec6180a..df8236c11f4 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -252,7 +252,8 @@ BlockIO getDistributedDDLStatus(const String & node_path, const DDLLogEntry & en auto source = std::make_shared(node_path, entry, context, hosts_to_wait); io.pipeline = QueryPipeline(std::move(source)); - if (context->getSettingsRef().distributed_ddl_output_mode == DistributedDDLOutputMode::NONE) + if (context->getSettingsRef().distributed_ddl_output_mode == DistributedDDLOutputMode::NONE || + context->getSettingsRef().distributed_ddl_output_mode == DistributedDDLOutputMode::NONE_ONLY_ACTIVE) io.pipeline.complete(std::make_shared(io.pipeline.getHeader())); return io; @@ -264,7 +265,9 @@ Block DDLQueryStatusSource::getSampleBlock(ContextPtr context_, bool hosts_to_wa auto maybe_make_nullable = [&](const DataTypePtr & type) -> DataTypePtr { - if (output_mode == DistributedDDLOutputMode::THROW || output_mode == DistributedDDLOutputMode::NONE) + if (output_mode == DistributedDDLOutputMode::THROW || + output_mode == DistributedDDLOutputMode::NONE || + output_mode == DistributedDDLOutputMode::NONE_ONLY_ACTIVE) return type; return std::make_shared(type); }; @@ -313,14 +316,15 @@ DDLQueryStatusSource::DDLQueryStatusSource( { auto output_mode = context->getSettingsRef().distributed_ddl_output_mode; throw_on_timeout = output_mode == DistributedDDLOutputMode::THROW || output_mode == DistributedDDLOutputMode::THROW_ONLY_ACTIVE - || output_mode == DistributedDDLOutputMode::NONE; + || output_mode == DistributedDDLOutputMode::NONE || output_mode == DistributedDDLOutputMode::NONE_ONLY_ACTIVE; if (hosts_to_wait) { waiting_hosts = NameSet(hosts_to_wait->begin(), hosts_to_wait->end()); is_replicated_database = true; only_running_hosts = output_mode == DistributedDDLOutputMode::THROW_ONLY_ACTIVE || - output_mode == DistributedDDLOutputMode::NULL_STATUS_ON_TIMEOUT_ONLY_ACTIVE; + output_mode == DistributedDDLOutputMode::NULL_STATUS_ON_TIMEOUT_ONLY_ACTIVE || + output_mode == DistributedDDLOutputMode::NONE_ONLY_ACTIVE; } else { @@ -442,14 +446,16 @@ Chunk DDLQueryStatusSource::generate() size_t num_unfinished_hosts = waiting_hosts.size() - num_hosts_finished; size_t num_active_hosts = current_active_hosts.size(); - constexpr auto msg_format = "Watching task {} is executing longer than distributed_ddl_task_timeout (={}) seconds. " - "There are {} unfinished hosts ({} of them are currently executing the task), " - "they are going to execute the query in background"; + constexpr auto msg_format = "Distributed DDL task {} is not finished on {} of {} hosts " + "({} of them are currently executing the task, {} are inactive). " + "They are going to execute the query in background. Was waiting for {} seconds{}"; + if (throw_on_timeout) { if (!first_exception) first_exception = std::make_unique(Exception(ErrorCodes::TIMEOUT_EXCEEDED, - msg_format, node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts)); + msg_format, node_path, num_unfinished_hosts, waiting_hosts.size(), num_active_hosts, offline_hosts.size(), + watch.elapsedSeconds(), stop_waiting_offline_hosts ? "" : ", which is longer than distributed_ddl_task_timeout")); /// For Replicated database print a list of unfinished hosts as well. Will return empty block on next iteration. if (is_replicated_database) @@ -457,7 +463,8 @@ Chunk DDLQueryStatusSource::generate() return {}; } - LOG_INFO(log, msg_format, node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts); + LOG_INFO(log, msg_format, node_path, num_unfinished_hosts, waiting_hosts.size(), num_active_hosts, offline_hosts.size(), + watch.elapsedSeconds(), stop_waiting_offline_hosts ? "" : "which is longer than distributed_ddl_task_timeout"); return generateChunkWithUnfinishedHosts(); } diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index f318f363eda..88021038ebb 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -104,6 +104,7 @@ namespace ErrorCodes extern const int QUERY_WAS_CANCELLED; extern const int INCORRECT_DATA; extern const int SYNTAX_ERROR; + extern const int SUPPORT_IS_DISABLED; extern const int INCORRECT_QUERY; } @@ -1023,6 +1024,21 @@ static std::tuple executeQueryImpl( if (settings.implicit_transaction && settings.throw_on_unsupported_query_inside_transaction) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Async inserts with 'implicit_transaction' are not supported"); + /// Let's agree on terminology and say that a mini-INSERT is an asynchronous INSERT + /// which typically contains not a lot of data inside and a big-INSERT in an INSERT + /// which was formed by concatenating several mini-INSERTs together. + /// In case when the client had to retry some mini-INSERTs then they will be properly deduplicated + /// by the source tables. This functionality is controlled by a setting `async_insert_deduplicate`. + /// But then they will be glued together into a block and pushed through a chain of Materialized Views if any. + /// The process of forming such blocks is not deteministic so each time we retry mini-INSERTs the resulting + /// block may be concatenated differently. + /// That's why deduplication in dependent Materialized Views doesn't make sense in presence of async INSERTs. + if (settings.throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert && + settings.deduplicate_blocks_in_dependent_materialized_views) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Deduplication is dependent materialized view cannot work together with async inserts. "\ + "Please disable eiher `deduplicate_blocks_in_dependent_materialized_views` or `async_insert` setting."); + quota = context->getQuota(); if (quota) { diff --git a/src/Interpreters/getHeaderForProcessingStage.cpp b/src/Interpreters/getHeaderForProcessingStage.cpp index 75b0e710fbe..06c5d424d2f 100644 --- a/src/Interpreters/getHeaderForProcessingStage.cpp +++ b/src/Interpreters/getHeaderForProcessingStage.cpp @@ -142,8 +142,9 @@ Block getHeaderForProcessingStage( if (context->getSettingsRef().allow_experimental_analyzer) { - auto storage = std::make_shared( - storage_snapshot->storage.getStorageID(), storage_snapshot->metadata->getColumns(), storage_snapshot); + auto storage = std::make_shared(storage_snapshot->storage.getStorageID(), + storage_snapshot->getAllColumnsDescription(), + storage_snapshot); InterpreterSelectQueryAnalyzer interpreter(query, context, storage, SelectQueryOptions(processed_stage).analyze()); result = interpreter.getSampleBlock(); } diff --git a/src/Interpreters/getTableExpressions.cpp b/src/Interpreters/getTableExpressions.cpp index 70e38526648..2853be4c05e 100644 --- a/src/Interpreters/getTableExpressions.cpp +++ b/src/Interpreters/getTableExpressions.cpp @@ -99,7 +99,7 @@ static NamesAndTypesList getColumnsFromTableExpression( names_and_type_list = columns.getOrdinary(); materialized = columns.getMaterialized(); aliases = columns.getAliases(); - virtuals = function_storage->getVirtuals(); + virtuals = function_storage->getVirtualsList(); } else if (table_expression.database_and_table_name) { @@ -110,7 +110,7 @@ static NamesAndTypesList getColumnsFromTableExpression( names_and_type_list = columns.getOrdinary(); materialized = columns.getMaterialized(); aliases = columns.getAliases(); - virtuals = table->getVirtuals(); + virtuals = table->getVirtualsList(); } return names_and_type_list; diff --git a/src/Interpreters/inplaceBlockConversions.cpp b/src/Interpreters/inplaceBlockConversions.cpp index fd8f5b154c4..239cce5b427 100644 --- a/src/Interpreters/inplaceBlockConversions.cpp +++ b/src/Interpreters/inplaceBlockConversions.cpp @@ -20,7 +20,6 @@ #include #include #include -#include namespace DB @@ -280,7 +279,7 @@ void fillMissingColumns( const NamesAndTypesList & requested_columns, const NamesAndTypesList & available_columns, const NameSet & partially_read_columns, - StorageMetadataPtr metadata_snapshot, size_t block_number) + StorageMetadataPtr metadata_snapshot) { size_t num_columns = requested_columns.size(); if (num_columns != res_columns.size()) @@ -359,14 +358,9 @@ void fillMissingColumns( } else { - if (requested_column->name == BlockNumberColumn::name) - res_columns[i] = type->createColumnConst(num_rows, block_number)->convertToFullColumnIfConst(); - else - /// We must turn a constant column into a full column because the interpreter could infer - /// that it is constant everywhere but in some blocks (from other parts) it can be a full column. - res_columns[i] = type->createColumnConstWithDefaultValue(num_rows)->convertToFullColumnIfConst(); - - + /// We must turn a constant column into a full column because the interpreter could infer + /// that it is constant everywhere but in some blocks (from other parts) it can be a full column. + res_columns[i] = type->createColumnConstWithDefaultValue(num_rows)->convertToFullColumnIfConst(); } } } diff --git a/src/Interpreters/inplaceBlockConversions.h b/src/Interpreters/inplaceBlockConversions.h index 7a13a75ec8b..bea44bf6db9 100644 --- a/src/Interpreters/inplaceBlockConversions.h +++ b/src/Interpreters/inplaceBlockConversions.h @@ -46,6 +46,6 @@ void fillMissingColumns( const NamesAndTypesList & requested_columns, const NamesAndTypesList & available_columns, const NameSet & partially_read_columns, - StorageMetadataPtr metadata_snapshot, size_t block_number = 0); + StorageMetadataPtr metadata_snapshot); } diff --git a/src/Interpreters/processColumnTransformers.cpp b/src/Interpreters/processColumnTransformers.cpp index 2a704d4a937..5ef331eb119 100644 --- a/src/Interpreters/processColumnTransformers.cpp +++ b/src/Interpreters/processColumnTransformers.cpp @@ -32,7 +32,7 @@ ASTPtr processColumnTransformers( tables_with_columns[0].addHiddenColumns(columns.getMaterialized()); tables_with_columns[0].addHiddenColumns(columns.getAliases()); - tables_with_columns[0].addHiddenColumns(table->getVirtuals()); + tables_with_columns[0].addHiddenColumns(table->getVirtualsList()); NameSet source_columns_set; for (const auto & identifier : query_columns->children) diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index a93ad1d1746..f104e715452 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -634,6 +634,7 @@ void ASTAlterQuery::formatQueryImpl(const FormatSettings & settings, FormatState settings.ostr << '.'; } + chassert(table); table->formatImpl(settings, state, frame); } else if (alter_object == AlterObjectType::DATABASE && database) diff --git a/src/Parsers/ASTCheckQuery.h b/src/Parsers/ASTCheckQuery.h index 5e9032e77fb..eca08b2b094 100644 --- a/src/Parsers/ASTCheckQuery.h +++ b/src/Parsers/ASTCheckQuery.h @@ -53,6 +53,7 @@ protected: settings.ostr << '.'; } + chassert(table); table->formatImpl(settings, state, frame); } diff --git a/src/Parsers/ASTCreateIndexQuery.cpp b/src/Parsers/ASTCreateIndexQuery.cpp index 84493ff22f1..f6def3ed85c 100644 --- a/src/Parsers/ASTCreateIndexQuery.cpp +++ b/src/Parsers/ASTCreateIndexQuery.cpp @@ -52,6 +52,7 @@ void ASTCreateIndexQuery::formatQueryImpl(const FormatSettings & settings, Forma settings.ostr << '.'; } + chassert(table); table->formatImpl(settings, state, frame); } diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index 1315ea5784c..de5eb40837f 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -337,6 +337,7 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat settings.ostr << '.'; } + chassert(table); table->formatImpl(settings, state, frame); if (uuid != UUIDHelpers::Nil) @@ -370,6 +371,7 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat settings.ostr << '.'; } + chassert(table); table->formatImpl(settings, state, frame); if (uuid != UUIDHelpers::Nil) diff --git a/src/Parsers/ASTDeleteQuery.cpp b/src/Parsers/ASTDeleteQuery.cpp index 1e8303dac62..67f3a85c9a5 100644 --- a/src/Parsers/ASTDeleteQuery.cpp +++ b/src/Parsers/ASTDeleteQuery.cpp @@ -40,6 +40,7 @@ void ASTDeleteQuery::formatQueryImpl(const FormatSettings & settings, FormatStat settings.ostr << '.'; } + chassert(table); table->formatImpl(settings, state, frame); formatOnCluster(settings); diff --git a/src/Parsers/ASTDropIndexQuery.cpp b/src/Parsers/ASTDropIndexQuery.cpp index 1a1481ea27f..1109f32f019 100644 --- a/src/Parsers/ASTDropIndexQuery.cpp +++ b/src/Parsers/ASTDropIndexQuery.cpp @@ -47,6 +47,7 @@ void ASTDropIndexQuery::formatQueryImpl(const FormatSettings & settings, FormatS settings.ostr << '.'; } + chassert(table); table->formatImpl(settings, state, frame); } diff --git a/src/Parsers/ASTDropQuery.cpp b/src/Parsers/ASTDropQuery.cpp index f41f28f745d..ca47ceccb85 100644 --- a/src/Parsers/ASTDropQuery.cpp +++ b/src/Parsers/ASTDropQuery.cpp @@ -76,6 +76,7 @@ void ASTDropQuery::formatQueryImpl(const FormatSettings & settings, FormatState settings.ostr << '.'; } + chassert(table); table->formatImpl(settings, state, frame); } diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index 69a2e7c9ada..07eea86ef81 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -36,508 +37,6 @@ namespace ErrorCodes } -namespace -{ - /// Finds arguments of a specified function which should not be displayed for most users for security reasons. - /// That involves passwords and secret keys. - class FunctionSecretArgumentsFinder - { - public: - explicit FunctionSecretArgumentsFinder(const ASTFunction & function_) : function(function_) - { - if (!function.arguments) - return; - - const auto * expr_list = function.arguments->as(); - if (!expr_list) - return; - - arguments = &expr_list->children; - switch (function.kind) - { - case ASTFunction::Kind::ORDINARY_FUNCTION: findOrdinaryFunctionSecretArguments(); break; - case ASTFunction::Kind::WINDOW_FUNCTION: break; - case ASTFunction::Kind::LAMBDA_FUNCTION: break; - case ASTFunction::Kind::TABLE_ENGINE: findTableEngineSecretArguments(); break; - case ASTFunction::Kind::DATABASE_ENGINE: findDatabaseEngineSecretArguments(); break; - case ASTFunction::Kind::BACKUP_NAME: findBackupNameSecretArguments(); break; - } - } - - struct Result - { - /// Result constructed by default means no arguments will be hidden. - size_t start = static_cast(-1); - size_t count = 0; /// Mostly it's either 0 or 1. There are only a few cases where `count` can be greater than 1 (e.g. see `encrypt`). - /// In all known cases secret arguments are consecutive - bool are_named = false; /// Arguments like `password = 'password'` are considered as named arguments. - /// E.g. "headers" in `url('..', headers('foo' = '[HIDDEN]'))` - std::vector nested_maps; - - bool hasSecrets() const - { - return count != 0 || !nested_maps.empty(); - } - }; - - Result getResult() const { return result; } - - private: - const ASTFunction & function; - const ASTs * arguments = nullptr; - Result result; - - void markSecretArgument(size_t index, bool argument_is_named = false) - { - if (index >= arguments->size()) - return; - if (!result.count) - { - result.start = index; - result.are_named = argument_is_named; - } - chassert(index >= result.start); /// We always check arguments consecutively - result.count = index + 1 - result.start; - if (!argument_is_named) - result.are_named = false; - } - - void findOrdinaryFunctionSecretArguments() - { - if ((function.name == "mysql") || (function.name == "postgresql") || (function.name == "mongodb")) - { - /// mysql('host:port', 'database', 'table', 'user', 'password', ...) - /// postgresql('host:port', 'database', 'table', 'user', 'password', ...) - /// mongodb('host:port', 'database', 'collection', 'user', 'password', ...) - findMySQLFunctionSecretArguments(); - } - else if ((function.name == "s3") || (function.name == "cosn") || (function.name == "oss") || - (function.name == "deltaLake") || (function.name == "hudi") || (function.name == "iceberg")) - { - /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) - findS3FunctionSecretArguments(/* is_cluster_function= */ false); - } - else if (function.name == "s3Cluster") - { - /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...) - findS3FunctionSecretArguments(/* is_cluster_function= */ true); - } - else if ((function.name == "remote") || (function.name == "remoteSecure")) - { - /// remote('addresses_expr', 'db', 'table', 'user', 'password', ...) - findRemoteFunctionSecretArguments(); - } - else if ((function.name == "encrypt") || (function.name == "decrypt") || - (function.name == "aes_encrypt_mysql") || (function.name == "aes_decrypt_mysql") || - (function.name == "tryDecrypt")) - { - /// encrypt('mode', 'plaintext', 'key' [, iv, aad]) - findEncryptionFunctionSecretArguments(); - } - else if (function.name == "url") - { - findURLSecretArguments(); - } - } - - void findMySQLFunctionSecretArguments() - { - if (isNamedCollectionName(0)) - { - /// mysql(named_collection, ..., password = 'password', ...) - findSecretNamedArgument("password", 1); - } - else - { - /// mysql('host:port', 'database', 'table', 'user', 'password', ...) - markSecretArgument(4); - } - } - - /// Returns the number of arguments excluding "headers" and "extra_credentials" (which should - /// always be at the end). Marks "headers" as secret, if found. - size_t excludeS3OrURLNestedMaps() - { - size_t count = arguments->size(); - while (count > 0) - { - const ASTFunction * f = arguments->at(count - 1)->as(); - if (!f) - break; - if (f->name == "headers") - result.nested_maps.push_back(f->name); - else if (f->name != "extra_credentials") - break; - count -= 1; - } - return count; - } - - void findS3FunctionSecretArguments(bool is_cluster_function) - { - /// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument. - size_t url_arg_idx = is_cluster_function ? 1 : 0; - - if (!is_cluster_function && isNamedCollectionName(0)) - { - /// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...) - findSecretNamedArgument("secret_access_key", 1); - return; - } - - /// We should check other arguments first because we don't need to do any replacement in case of - /// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)]) - /// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)]) - size_t count = excludeS3OrURLNestedMaps(); - if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4)) - { - String second_arg; - if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg)) - { - if (boost::iequals(second_arg, "NOSIGN")) - return; /// The argument after 'url' is "NOSIGN". - - if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg)) - return; /// The argument after 'url' is a format: s3('url', 'format', ...) - } - } - - /// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures: - /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) - /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') - if (url_arg_idx + 2 < count) - markSecretArgument(url_arg_idx + 2); - } - - void findURLSecretArguments() - { - if (!isNamedCollectionName(0)) - excludeS3OrURLNestedMaps(); - } - - bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const - { - if (arg_idx >= arguments->size()) - return false; - - return tryGetStringFromArgument(*(*arguments)[arg_idx], res, allow_identifier); - } - - static bool tryGetStringFromArgument(const IAST & argument, String * res, bool allow_identifier = true) - { - if (const auto * literal = argument.as()) - { - if (literal->value.getType() != Field::Types::String) - return false; - if (res) - *res = literal->value.safeGet(); - return true; - } - - if (allow_identifier) - { - if (const auto * id = argument.as()) - { - if (res) - *res = id->name(); - return true; - } - } - - return false; - } - - void findRemoteFunctionSecretArguments() - { - if (isNamedCollectionName(0)) - { - /// remote(named_collection, ..., password = 'password', ...) - findSecretNamedArgument("password", 1); - return; - } - - /// We're going to replace 'password' with '[HIDDEN'] for the following signatures: - /// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key]) - /// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key]) - /// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key]) - - /// But we should check the number of arguments first because we don't need to do any replacements in case of - /// remote('addresses_expr', db.table) - if (arguments->size() < 3) - return; - - size_t arg_num = 1; - - /// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'. - const auto * table_function = (*arguments)[arg_num]->as(); - if (table_function && KnownTableFunctionNames::instance().exists(table_function->name)) - { - ++arg_num; - } - else - { - std::optional database; - std::optional qualified_table_name; - if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name)) - { - /// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'. - /// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user' - /// before the argument 'password'. So it's safer to wipe two arguments just in case. - /// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string - /// before wiping it (because the `password` argument is always a literal string). - if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false)) - { - /// Wipe either `password` or `user`. - markSecretArgument(arg_num + 2); - } - if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false)) - { - /// Wipe either `password` or `sharding_key`. - markSecretArgument(arg_num + 3); - } - return; - } - - /// Skip the current argument (which is either a database name or a qualified table name). - ++arg_num; - if (database) - { - /// Skip the 'table' argument if the previous argument was a database name. - ++arg_num; - } - } - - /// Skip username. - ++arg_num; - - /// Do our replacement: - /// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...) - /// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string - /// before wiping it (because the `password` argument is always a literal string). - bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false); - if (can_be_password) - markSecretArgument(arg_num); - } - - /// Tries to get either a database name or a qualified table name from an argument. - /// Empty string is also allowed (it means the default database). - /// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password. - bool tryGetDatabaseNameOrQualifiedTableName( - size_t arg_idx, - std::optional & res_database, - std::optional & res_qualified_table_name) const - { - res_database.reset(); - res_qualified_table_name.reset(); - - String str; - if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true)) - return false; - - if (str.empty()) - { - res_database = ""; - return true; - } - - auto qualified_table_name = QualifiedTableName::tryParseFromString(str); - if (!qualified_table_name) - return false; - - if (qualified_table_name->database.empty()) - res_database = std::move(qualified_table_name->table); - else - res_qualified_table_name = std::move(qualified_table_name); - return true; - } - - void findEncryptionFunctionSecretArguments() - { - if (arguments->empty()) - return; - - /// We replace all arguments after 'mode' with '[HIDDEN]': - /// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]') - result.start = 1; - result.count = arguments->size() - 1; - } - - void findTableEngineSecretArguments() - { - const String & engine_name = function.name; - if (engine_name == "ExternalDistributed") - { - /// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password') - findExternalDistributedTableEngineSecretArguments(); - } - else if ((engine_name == "MySQL") || (engine_name == "PostgreSQL") || - (engine_name == "MaterializedPostgreSQL") || (engine_name == "MongoDB")) - { - /// MySQL('host:port', 'database', 'table', 'user', 'password', ...) - /// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...) - /// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...) - /// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...) - findMySQLFunctionSecretArguments(); - } - else if ((engine_name == "S3") || (engine_name == "COSN") || (engine_name == "OSS") || - (engine_name == "DeltaLake") || (engine_name == "Hudi") || (engine_name == "Iceberg") || (engine_name == "S3Queue")) - { - /// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...) - findS3TableEngineSecretArguments(); - } - else if (engine_name == "URL") - { - findURLSecretArguments(); - } - } - - void findExternalDistributedTableEngineSecretArguments() - { - if (isNamedCollectionName(1)) - { - /// ExternalDistributed('engine', named_collection, ..., password = 'password', ...) - findSecretNamedArgument("password", 2); - } - else - { - /// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password') - markSecretArgument(5); - } - } - - void findS3TableEngineSecretArguments() - { - if (isNamedCollectionName(0)) - { - /// S3(named_collection, ..., secret_access_key = 'secret_access_key') - findSecretNamedArgument("secret_access_key", 1); - return; - } - - /// We should check other arguments first because we don't need to do any replacement in case of - /// S3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)]) - /// S3('url', 'format', 'compression' [, extra_credentials(..)] [, headers(..)]) - size_t count = excludeS3OrURLNestedMaps(); - if ((3 <= count) && (count <= 4)) - { - String second_arg; - if (tryGetStringFromArgument(1, &second_arg)) - { - if (boost::iequals(second_arg, "NOSIGN")) - return; /// The argument after 'url' is "NOSIGN". - - if (count == 3) - { - if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg)) - return; /// The argument after 'url' is a format: S3('url', 'format', ...) - } - } - } - - /// We replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures: - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') - if (2 < count) - markSecretArgument(2); - } - - void findDatabaseEngineSecretArguments() - { - const String & engine_name = function.name; - if ((engine_name == "MySQL") || (engine_name == "MaterializeMySQL") || - (engine_name == "MaterializedMySQL") || (engine_name == "PostgreSQL") || - (engine_name == "MaterializedPostgreSQL")) - { - /// MySQL('host:port', 'database', 'user', 'password') - /// PostgreSQL('host:port', 'database', 'user', 'password') - findMySQLDatabaseSecretArguments(); - } - else if (engine_name == "S3") - { - /// S3('url', 'access_key_id', 'secret_access_key') - findS3DatabaseSecretArguments(); - } - } - - void findMySQLDatabaseSecretArguments() - { - if (isNamedCollectionName(0)) - { - /// MySQL(named_collection, ..., password = 'password', ...) - findSecretNamedArgument("password", 1); - } - else - { - /// MySQL('host:port', 'database', 'user', 'password') - markSecretArgument(3); - } - } - - void findS3DatabaseSecretArguments() - { - if (isNamedCollectionName(0)) - { - /// S3(named_collection, ..., secret_access_key = 'password', ...) - findSecretNamedArgument("secret_access_key", 1); - } - else - { - /// S3('url', 'access_key_id', 'secret_access_key') - markSecretArgument(2); - } - } - - void findBackupNameSecretArguments() - { - const String & engine_name = function.name; - if (engine_name == "S3") - { - /// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key]) - markSecretArgument(2); - } - } - - /// Whether a specified argument can be the name of a named collection? - bool isNamedCollectionName(size_t arg_idx) const - { - if (arguments->size() <= arg_idx) - return false; - - const auto * identifier = (*arguments)[arg_idx]->as(); - return identifier != nullptr; - } - - /// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified. - void findSecretNamedArgument(const std::string_view & key, size_t start = 0) - { - for (size_t i = start; i < arguments->size(); ++i) - { - const auto & argument = (*arguments)[i]; - const auto * equals_func = argument->as(); - if (!equals_func || (equals_func->name != "equals")) - continue; - - const auto * expr_list = equals_func->arguments->as(); - if (!expr_list) - continue; - - const auto & equal_args = expr_list->children; - if (equal_args.size() != 2) - continue; - - String found_key; - if (!tryGetStringFromArgument(*equal_args[0], &found_key)) - continue; - - if (found_key == key) - markSecretArgument(i, /* argument_is_named= */ true); - } - } - }; -} - - void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr) const { /// These functions contain some unexpected ASTs in arguments (e.g. SETTINGS or even a SELECT query) @@ -1195,7 +694,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format FunctionSecretArgumentsFinder::Result secret_arguments; if (!settings.show_secrets) - secret_arguments = FunctionSecretArgumentsFinder{*this}.getResult(); + secret_arguments = FunctionSecretArgumentsFinderAST(*this).getResult(); for (size_t i = 0, size = arguments->children.size(); i < size; ++i) { @@ -1260,7 +759,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format bool ASTFunction::hasSecretParts() const { - return (FunctionSecretArgumentsFinder{*this}.getResult().hasSecrets()) || childrenHaveSecretParts(); + return (FunctionSecretArgumentsFinderAST(*this).getResult().hasSecrets()) || childrenHaveSecretParts(); } String getFunctionName(const IAST * ast) diff --git a/src/Parsers/ASTInsertQuery.cpp b/src/Parsers/ASTInsertQuery.cpp index eb4bcce6578..72a569fe047 100644 --- a/src/Parsers/ASTInsertQuery.cpp +++ b/src/Parsers/ASTInsertQuery.cpp @@ -74,6 +74,7 @@ void ASTInsertQuery::formatImpl(const FormatSettings & settings, FormatState & s settings.ostr << '.'; } + chassert(table); table->formatImpl(settings, state, frame); } diff --git a/src/Parsers/ASTOptimizeQuery.cpp b/src/Parsers/ASTOptimizeQuery.cpp index d9406a9eca0..397a37586fc 100644 --- a/src/Parsers/ASTOptimizeQuery.cpp +++ b/src/Parsers/ASTOptimizeQuery.cpp @@ -15,6 +15,7 @@ void ASTOptimizeQuery::formatQueryImpl(const FormatSettings & settings, FormatSt settings.ostr << '.'; } + chassert(table); table->formatImpl(settings, state, frame); formatOnCluster(settings); diff --git a/src/Parsers/ASTQueryWithTableAndOutput.h b/src/Parsers/ASTQueryWithTableAndOutput.h index 1b8621fb63b..29dbd30e54b 100644 --- a/src/Parsers/ASTQueryWithTableAndOutput.h +++ b/src/Parsers/ASTQueryWithTableAndOutput.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -61,6 +62,7 @@ protected: settings.ostr << '.'; } + chassert(table); table->formatImpl(settings, state, frame); } }; diff --git a/src/Parsers/ASTRenameQuery.h b/src/Parsers/ASTRenameQuery.h index 901f0121a28..73d12be094a 100644 --- a/src/Parsers/ASTRenameQuery.h +++ b/src/Parsers/ASTRenameQuery.h @@ -127,6 +127,7 @@ protected: settings.ostr << '.'; } + chassert(it->from.table); it->from.table->formatImpl(settings, state, frame); settings.ostr << (settings.hilite ? hilite_keyword : "") << (exchange ? " AND " : " TO ") << (settings.hilite ? hilite_none : ""); @@ -137,6 +138,7 @@ protected: settings.ostr << '.'; } + chassert(it->to.table); it->to.table->formatImpl(settings, state, frame); } diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index 63311a70e42..e2ebaee8438 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -114,6 +114,7 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState & s settings.ostr << '.'; } + chassert(table); table->formatImpl(settings, state, frame); return settings.ostr; }; diff --git a/src/Parsers/ASTUndropQuery.cpp b/src/Parsers/ASTUndropQuery.cpp index ab96ca4711e..7212e264c0e 100644 --- a/src/Parsers/ASTUndropQuery.cpp +++ b/src/Parsers/ASTUndropQuery.cpp @@ -36,6 +36,7 @@ void ASTUndropQuery::formatQueryImpl(const FormatSettings & settings, FormatStat settings.ostr << '.'; } + chassert(table); table->formatImpl(settings, state, frame); } diff --git a/src/Parsers/ASTWatchQuery.h b/src/Parsers/ASTWatchQuery.h index 80b675f513f..a5b76c07605 100644 --- a/src/Parsers/ASTWatchQuery.h +++ b/src/Parsers/ASTWatchQuery.h @@ -52,6 +52,7 @@ protected: settings.ostr << '.'; } + chassert(table); table->formatImpl(settings, state, frame); if (is_watch_events) diff --git a/src/Parsers/FunctionSecretArgumentsFinder.h b/src/Parsers/FunctionSecretArgumentsFinder.h new file mode 100644 index 00000000000..002ad94f6ea --- /dev/null +++ b/src/Parsers/FunctionSecretArgumentsFinder.h @@ -0,0 +1,28 @@ +#pragma once + +#include + +namespace DB +{ + +class FunctionSecretArgumentsFinder +{ +public: + struct Result + { + /// Result constructed by default means no arguments will be hidden. + size_t start = static_cast(-1); + size_t count = 0; /// Mostly it's either 0 or 1. There are only a few cases where `count` can be greater than 1 (e.g. see `encrypt`). + /// In all known cases secret arguments are consecutive + bool are_named = false; /// Arguments like `password = 'password'` are considered as named arguments. + /// E.g. "headers" in `url('..', headers('foo' = '[HIDDEN]'))` + std::vector nested_maps; + + bool hasSecrets() const + { + return count != 0 || !nested_maps.empty(); + } + }; +}; + +} diff --git a/src/Parsers/FunctionSecretArgumentsFinderAST.h b/src/Parsers/FunctionSecretArgumentsFinderAST.h new file mode 100644 index 00000000000..348b2ca9e3a --- /dev/null +++ b/src/Parsers/FunctionSecretArgumentsFinderAST.h @@ -0,0 +1,499 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + + +namespace DB +{ + + +/// Finds arguments of a specified function which should not be displayed for most users for security reasons. +/// That involves passwords and secret keys. +class FunctionSecretArgumentsFinderAST +{ +public: + explicit FunctionSecretArgumentsFinderAST(const ASTFunction & function_) : function(function_) + { + if (!function.arguments) + return; + + const auto * expr_list = function.arguments->as(); + if (!expr_list) + return; + + arguments = &expr_list->children; + switch (function.kind) + { + case ASTFunction::Kind::ORDINARY_FUNCTION: findOrdinaryFunctionSecretArguments(); break; + case ASTFunction::Kind::WINDOW_FUNCTION: break; + case ASTFunction::Kind::LAMBDA_FUNCTION: break; + case ASTFunction::Kind::TABLE_ENGINE: findTableEngineSecretArguments(); break; + case ASTFunction::Kind::DATABASE_ENGINE: findDatabaseEngineSecretArguments(); break; + case ASTFunction::Kind::BACKUP_NAME: findBackupNameSecretArguments(); break; + } + } + + FunctionSecretArgumentsFinder::Result getResult() const { return result; } + +private: + const ASTFunction & function; + const ASTs * arguments = nullptr; + FunctionSecretArgumentsFinder::Result result; + + void markSecretArgument(size_t index, bool argument_is_named = false) + { + if (index >= arguments->size()) + return; + if (!result.count) + { + result.start = index; + result.are_named = argument_is_named; + } + chassert(index >= result.start); /// We always check arguments consecutively + result.count = index + 1 - result.start; + if (!argument_is_named) + result.are_named = false; + } + + void findOrdinaryFunctionSecretArguments() + { + if ((function.name == "mysql") || (function.name == "postgresql") || (function.name == "mongodb")) + { + /// mysql('host:port', 'database', 'table', 'user', 'password', ...) + /// postgresql('host:port', 'database', 'table', 'user', 'password', ...) + /// mongodb('host:port', 'database', 'collection', 'user', 'password', ...) + findMySQLFunctionSecretArguments(); + } + else if ((function.name == "s3") || (function.name == "cosn") || (function.name == "oss") || + (function.name == "deltaLake") || (function.name == "hudi") || (function.name == "iceberg")) + { + /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) + findS3FunctionSecretArguments(/* is_cluster_function= */ false); + } + else if (function.name == "s3Cluster") + { + /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...) + findS3FunctionSecretArguments(/* is_cluster_function= */ true); + } + else if ((function.name == "remote") || (function.name == "remoteSecure")) + { + /// remote('addresses_expr', 'db', 'table', 'user', 'password', ...) + findRemoteFunctionSecretArguments(); + } + else if ((function.name == "encrypt") || (function.name == "decrypt") || + (function.name == "aes_encrypt_mysql") || (function.name == "aes_decrypt_mysql") || + (function.name == "tryDecrypt")) + { + /// encrypt('mode', 'plaintext', 'key' [, iv, aad]) + findEncryptionFunctionSecretArguments(); + } + else if (function.name == "url") + { + findURLSecretArguments(); + } + } + + void findMySQLFunctionSecretArguments() + { + if (isNamedCollectionName(0)) + { + /// mysql(named_collection, ..., password = 'password', ...) + findSecretNamedArgument("password", 1); + } + else + { + /// mysql('host:port', 'database', 'table', 'user', 'password', ...) + markSecretArgument(4); + } + } + + /// Returns the number of arguments excluding "headers" and "extra_credentials" (which should + /// always be at the end). Marks "headers" as secret, if found. + size_t excludeS3OrURLNestedMaps() + { + size_t count = arguments->size(); + while (count > 0) + { + const ASTFunction * f = arguments->at(count - 1)->as(); + if (!f) + break; + if (f->name == "headers") + result.nested_maps.push_back(f->name); + else if (f->name != "extra_credentials") + break; + count -= 1; + } + return count; + } + + void findS3FunctionSecretArguments(bool is_cluster_function) + { + /// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument. + size_t url_arg_idx = is_cluster_function ? 1 : 0; + + if (!is_cluster_function && isNamedCollectionName(0)) + { + /// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...) + findSecretNamedArgument("secret_access_key", 1); + return; + } + + /// We should check other arguments first because we don't need to do any replacement in case of + /// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)]) + /// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)]) + size_t count = excludeS3OrURLNestedMaps(); + if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4)) + { + String second_arg; + if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg)) + { + if (boost::iequals(second_arg, "NOSIGN")) + return; /// The argument after 'url' is "NOSIGN". + + if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg)) + return; /// The argument after 'url' is a format: s3('url', 'format', ...) + } + } + + /// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures: + /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) + /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') + if (url_arg_idx + 2 < count) + markSecretArgument(url_arg_idx + 2); + } + + void findURLSecretArguments() + { + if (!isNamedCollectionName(0)) + excludeS3OrURLNestedMaps(); + } + + bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const + { + if (arg_idx >= arguments->size()) + return false; + + return tryGetStringFromArgument(*(*arguments)[arg_idx], res, allow_identifier); + } + + static bool tryGetStringFromArgument(const IAST & argument, String * res, bool allow_identifier = true) + { + if (const auto * literal = argument.as()) + { + if (literal->value.getType() != Field::Types::String) + return false; + if (res) + *res = literal->value.safeGet(); + return true; + } + + if (allow_identifier) + { + if (const auto * id = argument.as()) + { + if (res) + *res = id->name(); + return true; + } + } + + return false; + } + + void findRemoteFunctionSecretArguments() + { + if (isNamedCollectionName(0)) + { + /// remote(named_collection, ..., password = 'password', ...) + findSecretNamedArgument("password", 1); + return; + } + + /// We're going to replace 'password' with '[HIDDEN'] for the following signatures: + /// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key]) + /// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key]) + /// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key]) + + /// But we should check the number of arguments first because we don't need to do any replacements in case of + /// remote('addresses_expr', db.table) + if (arguments->size() < 3) + return; + + size_t arg_num = 1; + + /// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'. + const auto * table_function = (*arguments)[arg_num]->as(); + if (table_function && KnownTableFunctionNames::instance().exists(table_function->name)) + { + ++arg_num; + } + else + { + std::optional database; + std::optional qualified_table_name; + if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name)) + { + /// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'. + /// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user' + /// before the argument 'password'. So it's safer to wipe two arguments just in case. + /// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string + /// before wiping it (because the `password` argument is always a literal string). + if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false)) + { + /// Wipe either `password` or `user`. + markSecretArgument(arg_num + 2); + } + if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false)) + { + /// Wipe either `password` or `sharding_key`. + markSecretArgument(arg_num + 3); + } + return; + } + + /// Skip the current argument (which is either a database name or a qualified table name). + ++arg_num; + if (database) + { + /// Skip the 'table' argument if the previous argument was a database name. + ++arg_num; + } + } + + /// Skip username. + ++arg_num; + + /// Do our replacement: + /// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...) + /// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string + /// before wiping it (because the `password` argument is always a literal string). + bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false); + if (can_be_password) + markSecretArgument(arg_num); + } + + /// Tries to get either a database name or a qualified table name from an argument. + /// Empty string is also allowed (it means the default database). + /// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password. + bool tryGetDatabaseNameOrQualifiedTableName( + size_t arg_idx, + std::optional & res_database, + std::optional & res_qualified_table_name) const + { + res_database.reset(); + res_qualified_table_name.reset(); + + String str; + if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true)) + return false; + + if (str.empty()) + { + res_database = ""; + return true; + } + + auto qualified_table_name = QualifiedTableName::tryParseFromString(str); + if (!qualified_table_name) + return false; + + if (qualified_table_name->database.empty()) + res_database = std::move(qualified_table_name->table); + else + res_qualified_table_name = std::move(qualified_table_name); + return true; + } + + void findEncryptionFunctionSecretArguments() + { + if (arguments->empty()) + return; + + /// We replace all arguments after 'mode' with '[HIDDEN]': + /// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]') + result.start = 1; + result.count = arguments->size() - 1; + } + + void findTableEngineSecretArguments() + { + const String & engine_name = function.name; + if (engine_name == "ExternalDistributed") + { + /// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password') + findExternalDistributedTableEngineSecretArguments(); + } + else if ((engine_name == "MySQL") || (engine_name == "PostgreSQL") || + (engine_name == "MaterializedPostgreSQL") || (engine_name == "MongoDB")) + { + /// MySQL('host:port', 'database', 'table', 'user', 'password', ...) + /// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...) + /// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...) + /// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...) + findMySQLFunctionSecretArguments(); + } + else if ((engine_name == "S3") || (engine_name == "COSN") || (engine_name == "OSS") || + (engine_name == "DeltaLake") || (engine_name == "Hudi") || (engine_name == "Iceberg") || (engine_name == "S3Queue")) + { + /// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...) + findS3TableEngineSecretArguments(); + } + else if (engine_name == "URL") + { + findURLSecretArguments(); + } + } + + void findExternalDistributedTableEngineSecretArguments() + { + if (isNamedCollectionName(1)) + { + /// ExternalDistributed('engine', named_collection, ..., password = 'password', ...) + findSecretNamedArgument("password", 2); + } + else + { + /// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password') + markSecretArgument(5); + } + } + + void findS3TableEngineSecretArguments() + { + if (isNamedCollectionName(0)) + { + /// S3(named_collection, ..., secret_access_key = 'secret_access_key') + findSecretNamedArgument("secret_access_key", 1); + return; + } + + /// We should check other arguments first because we don't need to do any replacement in case of + /// S3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)]) + /// S3('url', 'format', 'compression' [, extra_credentials(..)] [, headers(..)]) + size_t count = excludeS3OrURLNestedMaps(); + if ((3 <= count) && (count <= 4)) + { + String second_arg; + if (tryGetStringFromArgument(1, &second_arg)) + { + if (boost::iequals(second_arg, "NOSIGN")) + return; /// The argument after 'url' is "NOSIGN". + + if (count == 3) + { + if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg)) + return; /// The argument after 'url' is a format: S3('url', 'format', ...) + } + } + } + + /// We replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures: + /// S3('url', 'aws_access_key_id', 'aws_secret_access_key') + /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format') + /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') + if (2 < count) + markSecretArgument(2); + } + + void findDatabaseEngineSecretArguments() + { + const String & engine_name = function.name; + if ((engine_name == "MySQL") || (engine_name == "MaterializeMySQL") || + (engine_name == "MaterializedMySQL") || (engine_name == "PostgreSQL") || + (engine_name == "MaterializedPostgreSQL")) + { + /// MySQL('host:port', 'database', 'user', 'password') + /// PostgreSQL('host:port', 'database', 'user', 'password') + findMySQLDatabaseSecretArguments(); + } + else if (engine_name == "S3") + { + /// S3('url', 'access_key_id', 'secret_access_key') + findS3DatabaseSecretArguments(); + } + } + + void findMySQLDatabaseSecretArguments() + { + if (isNamedCollectionName(0)) + { + /// MySQL(named_collection, ..., password = 'password', ...) + findSecretNamedArgument("password", 1); + } + else + { + /// MySQL('host:port', 'database', 'user', 'password') + markSecretArgument(3); + } + } + + void findS3DatabaseSecretArguments() + { + if (isNamedCollectionName(0)) + { + /// S3(named_collection, ..., secret_access_key = 'password', ...) + findSecretNamedArgument("secret_access_key", 1); + } + else + { + /// S3('url', 'access_key_id', 'secret_access_key') + markSecretArgument(2); + } + } + + void findBackupNameSecretArguments() + { + const String & engine_name = function.name; + if (engine_name == "S3") + { + /// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key]) + markSecretArgument(2); + } + } + + /// Whether a specified argument can be the name of a named collection? + bool isNamedCollectionName(size_t arg_idx) const + { + if (arguments->size() <= arg_idx) + return false; + + const auto * identifier = (*arguments)[arg_idx]->as(); + return identifier != nullptr; + } + + /// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified. + void findSecretNamedArgument(const std::string_view & key, size_t start = 0) + { + for (size_t i = start; i < arguments->size(); ++i) + { + const auto & argument = (*arguments)[i]; + const auto * equals_func = argument->as(); + if (!equals_func || (equals_func->name != "equals")) + continue; + + const auto * expr_list = equals_func->arguments->as(); + if (!expr_list) + continue; + + const auto & equal_args = expr_list->children; + if (equal_args.size() != 2) + continue; + + String found_key; + if (!tryGetStringFromArgument(*equal_args[0], &found_key)) + continue; + + if (found_key == key) + markSecretArgument(i, /* argument_is_named= */ true); + } + } +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 29b26b4e3fe..a54a2b0eda9 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -416,8 +416,9 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserToken s_dash(TokenType::Minus); if (s_dash.ignore(pos, expected)) { - String tmp_op(op_pos_begin->begin, pos->end); - kql_operator = tmp_op; + if (!isValidKQLPos(pos)) + return false; + kql_operator = String(op_pos_begin->begin, pos->end); } else --pos; diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index efd8f4faa42..bc1fb30781d 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -1181,7 +1181,7 @@ PlannerContextPtr buildPlannerContext(const QueryTreeNodePtr & query_tree_node, if (select_query_options.is_subquery) updateContextForSubqueryExecution(mutable_context); - return std::make_shared(mutable_context, std::move(global_planner_context)); + return std::make_shared(mutable_context, std::move(global_planner_context), select_query_options); } Planner::Planner(const QueryTreeNodePtr & query_tree_, @@ -1373,7 +1373,7 @@ void Planner::buildPlanForQueryNode() const auto & settings = query_context->getSettingsRef(); if (query_context->canUseTaskBasedParallelReplicas()) { - if (planner_context->getPreparedSets().hasSubqueries()) + if (!settings.parallel_replicas_allow_in_with_subquery && planner_context->getPreparedSets().hasSubqueries()) { if (settings.allow_experimental_parallel_reading_from_replicas >= 2) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "IN with subquery is not supported with parallel replicas"); diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index c417d463c73..af23e684f23 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -44,6 +44,27 @@ namespace ErrorCodes namespace { +/* Calculates Action node name for ConstantNode. + * + * If converting to AST will add a '_CAST' function call, + * the result action name will also include it. + */ +String calculateActionNodeNameWithCastIfNeeded(const ConstantNode & constant_node) +{ + WriteBufferFromOwnString buffer; + if (constant_node.requiresCastCall()) + buffer << "_CAST("; + + buffer << calculateConstantActionNodeName(constant_node.getValue(), constant_node.getResultType()); + + if (constant_node.requiresCastCall()) + { + buffer << ", '" << constant_node.getResultType()->getName() << "'_String)"; + } + + return buffer.str(); +} + class ActionNodeNameHelper { public: @@ -88,7 +109,49 @@ public: case QueryTreeNodeType::CONSTANT: { const auto & constant_node = node->as(); - result = calculateConstantActionNodeName(constant_node.getValue(), constant_node.getResultType()); + /* To ensure that headers match during distributed query we need to simulate action node naming on + * secondary servers. If we don't do that headers will mismatch due to constant folding. + * + * +--------+ + * -----------------| Server |---------------- + * / +--------+ \ + * / \ + * v v + * +-----------+ +-----------+ + * | Initiator | ------ | Secondary |------ + * +-----------+ / +-----------+ \ + * | / \ + * | / \ + * v / \ + * +---------------+ v v + * | Wrap in _CAST | +----------------------------+ +----------------------+ + * | if needed | | Constant folded from _CAST | | Constant folded from | + * +---------------+ +----------------------------+ | another expression | + * | +----------------------+ + * v | + * +----------------------------+ v + * | Name ConstantNode the same | +--------------------------+ + * | as on initiator server | | Generate action name for | + * | (wrap in _CAST if needed) | | original expression | + * +----------------------------+ +--------------------------+ + */ + if (planner_context.isASTLevelOptimizationAllowed()) + { + result = calculateActionNodeNameWithCastIfNeeded(constant_node); + } + else + { + // Need to check if constant folded from QueryNode until https://github.com/ClickHouse/ClickHouse/issues/60847 is fixed. + if (constant_node.hasSourceExpression() && constant_node.getSourceExpression()->getNodeType() != QueryTreeNodeType::QUERY) + { + if (constant_node.receivedFromInitiatorServer()) + result = calculateActionNodeNameWithCastIfNeeded(constant_node); + else + result = calculateActionNodeName(constant_node.getSourceExpression()); + } + else + result = calculateConstantActionNodeName(constant_node.getValue(), constant_node.getResultType()); + } break; } case QueryTreeNodeType::FUNCTION: @@ -530,7 +593,52 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi const auto & constant_literal = constant_node.getValue(); const auto & constant_type = constant_node.getResultType(); - auto constant_node_name = calculateConstantActionNodeName(constant_literal, constant_type); + auto constant_node_name = [&]() + { + /* To ensure that headers match during distributed query we need to simulate action node naming on + * secondary servers. If we don't do that headers will mismatch due to constant folding. + * + * +--------+ + * -----------------| Server |---------------- + * / +--------+ \ + * / \ + * v v + * +-----------+ +-----------+ + * | Initiator | ------ | Secondary |------ + * +-----------+ / +-----------+ \ + * | / \ + * | / \ + * v / \ + * +---------------+ v v + * | Wrap in _CAST | +----------------------------+ +----------------------+ + * | if needed | | Constant folded from _CAST | | Constant folded from | + * +---------------+ +----------------------------+ | another expression | + * | +----------------------+ + * v | + * +----------------------------+ v + * | Name ConstantNode the same | +--------------------------+ + * | as on initiator server | | Generate action name for | + * | (wrap in _CAST if needed) | | original expression | + * +----------------------------+ +--------------------------+ + */ + if (planner_context->isASTLevelOptimizationAllowed()) + { + return calculateActionNodeNameWithCastIfNeeded(constant_node); + } + else + { + // Need to check if constant folded from QueryNode until https://github.com/ClickHouse/ClickHouse/issues/60847 is fixed. + if (constant_node.hasSourceExpression() && constant_node.getSourceExpression()->getNodeType() != QueryTreeNodeType::QUERY) + { + if (constant_node.receivedFromInitiatorServer()) + return calculateActionNodeNameWithCastIfNeeded(constant_node); + else + return action_node_name_helper.calculateActionNodeName(constant_node.getSourceExpression()); + } + else + return calculateConstantActionNodeName(constant_literal, constant_type); + } + }(); ColumnWithTypeAndName column; column.name = constant_node_name; diff --git a/src/Planner/PlannerContext.cpp b/src/Planner/PlannerContext.cpp index 422c8c1d01f..f33255f0a44 100644 --- a/src/Planner/PlannerContext.cpp +++ b/src/Planner/PlannerContext.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { @@ -41,9 +42,10 @@ bool GlobalPlannerContext::hasColumnIdentifier(const ColumnIdentifier & column_i return column_identifiers.contains(column_identifier); } -PlannerContext::PlannerContext(ContextMutablePtr query_context_, GlobalPlannerContextPtr global_planner_context_) +PlannerContext::PlannerContext(ContextMutablePtr query_context_, GlobalPlannerContextPtr global_planner_context_, const SelectQueryOptions & select_query_options_) : query_context(std::move(query_context_)) , global_planner_context(std::move(global_planner_context_)) + , is_ast_level_optimization_allowed(!(query_context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY || select_query_options_.ignore_ast_optimizations)) {} TableExpressionData & PlannerContext::getOrCreateTableExpressionData(const QueryTreeNodePtr & table_expression_node) diff --git a/src/Planner/PlannerContext.h b/src/Planner/PlannerContext.h index 853f55a5f00..4d9ba037cac 100644 --- a/src/Planner/PlannerContext.h +++ b/src/Planner/PlannerContext.h @@ -10,6 +10,7 @@ #include #include +#include namespace DB { @@ -78,7 +79,7 @@ class PlannerContext { public: /// Create planner context with query context and global planner context - PlannerContext(ContextMutablePtr query_context_, GlobalPlannerContextPtr global_planner_context_); + PlannerContext(ContextMutablePtr query_context_, GlobalPlannerContextPtr global_planner_context_, const SelectQueryOptions & select_query_options_); /// Get planner context query context ContextPtr getQueryContext() const @@ -165,6 +166,12 @@ public: static SetKey createSetKey(const DataTypePtr & left_operand_type, const QueryTreeNodePtr & set_source_node); PreparedSets & getPreparedSets() { return prepared_sets; } + + /// Returns false if any of following conditions met: + /// 1. Query is executed on a follower node. + /// 2. ignore_ast_optimizations is set. + bool isASTLevelOptimizationAllowed() const { return is_ast_level_optimization_allowed; } + private: /// Query context ContextMutablePtr query_context; @@ -172,6 +179,8 @@ private: /// Global planner context GlobalPlannerContextPtr global_planner_context; + bool is_ast_level_optimization_allowed; + /// Column node to column identifier std::unordered_map column_node_to_column_identifier; diff --git a/src/Planner/findParallelReplicasQuery.cpp b/src/Planner/findParallelReplicasQuery.cpp index 95afcb605b3..ef640bcd42d 100644 --- a/src/Planner/findParallelReplicasQuery.cpp +++ b/src/Planner/findParallelReplicasQuery.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -156,7 +157,8 @@ QueryTreeNodePtr replaceTablesWithDummyTables(const QueryTreeNodePtr & query, co /// Otherwise we can execute current query up to WithMergableStage only. const QueryNode * findQueryForParallelReplicas( std::stack stack, - const std::unordered_map & mapping) + const std::unordered_map & mapping, + const Settings & settings) { const QueryPlan::Node * prev_checked_node = nullptr; const QueryNode * res = nullptr; @@ -192,7 +194,11 @@ const QueryNode * findQueryForParallelReplicas( { const auto * expression = typeid_cast(step); const auto * filter = typeid_cast(step); - if (!expression && !filter) + + const auto * creating_sets = typeid_cast(step); + bool allowed_creating_sets = settings.parallel_replicas_allow_in_with_subquery && creating_sets; + + if (!expression && !filter && !allowed_creating_sets) can_distribute_full_node = false; next_node_to_check = children.front(); @@ -274,7 +280,7 @@ const QueryNode * findQueryForParallelReplicas(const QueryTreeNodePtr & query_tr /// So that we build a list of candidates again, and call findQueryForParallelReplicas for it. auto new_stack = getSupportingParallelReplicasQuery(updated_query_tree.get()); const auto & mapping = planner.getQueryNodeToPlanStepMapping(); - const auto * res = findQueryForParallelReplicas(new_stack, mapping); + const auto * res = findQueryForParallelReplicas(new_stack, mapping, context->getSettingsRef()); /// Now, return a query from initial stack. if (res) diff --git a/src/Processors/Formats/Impl/PrometheusTextOutputFormat.cpp b/src/Processors/Formats/Impl/PrometheusTextOutputFormat.cpp index c2ee5923c01..3578401a0f8 100644 --- a/src/Processors/Formats/Impl/PrometheusTextOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrometheusTextOutputFormat.cpp @@ -12,6 +12,7 @@ #include #include +#include "DataTypes/IDataType.h" #include #include @@ -35,9 +36,12 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +namespace +{ + constexpr auto FORMAT_NAME = "Prometheus"; -static bool isDataTypeMapString(const DataTypePtr & type) +bool isDataTypeMapString(const DataTypePtr & type) { if (!isMap(type)) return false; @@ -45,8 +49,8 @@ static bool isDataTypeMapString(const DataTypePtr & type) return isStringOrFixedString(type_map->getKeyType()) && isStringOrFixedString(type_map->getValueType()); } -template -static void getColumnPos(const Block & header, const String & col_name, Pred pred, ResType & res) +template +void getColumnPos(const Block & header, const String & col_name, bool (*pred)(const DataTypePtr &), ResType & res) { static_assert(std::is_same_v || std::is_same_v>, "Illegal ResType"); @@ -71,7 +75,7 @@ static void getColumnPos(const Block & header, const String & col_name, Pred pre } } -static Float64 tryParseFloat(const String & s) +Float64 tryParseFloat(const String & s) { Float64 t = 0; ReadBufferFromString buf(s); @@ -79,6 +83,8 @@ static Float64 tryParseFloat(const String & s) return t; } +} + PrometheusTextOutputFormat::PrometheusTextOutputFormat( WriteBuffer & out_, const Block & header_, @@ -89,12 +95,12 @@ PrometheusTextOutputFormat::PrometheusTextOutputFormat( { const Block & header = getPort(PortKind::Main).getHeader(); - getColumnPos(header, "name", isStringOrFixedString, pos.name); - getColumnPos(header, "value", isNumber, pos.value); + getColumnPos(header, "name", isStringOrFixedString, pos.name); + getColumnPos(header, "value", isNumber, pos.value); - getColumnPos(header, "help", isStringOrFixedString, pos.help); - getColumnPos(header, "type", isStringOrFixedString, pos.type); - getColumnPos(header, "timestamp", isNumber, pos.timestamp); + getColumnPos(header, "help", isStringOrFixedString, pos.help); + getColumnPos(header, "type", isStringOrFixedString, pos.type); + getColumnPos(header, "timestamp", isNumber, pos.timestamp); getColumnPos(header, "labels", isDataTypeMapString, pos.labels); } diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp index 845cf561968..28160b18269 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index 28f0f979499..91f4213ff43 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -431,7 +431,7 @@ AggregateProjectionCandidates getAggregateProjectionCandidates( { const auto & keys = aggregating.getParams().keys; const auto & aggregates = aggregating.getParams().aggregates; - Block key_virtual_columns = reading.getMergeTreeData().getSampleBlockWithVirtualColumns(); + Block key_virtual_columns = reading.getMergeTreeData().getHeaderWithVirtualsForFilter(); AggregateProjectionCandidates candidates; diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp index 73caf70627f..cac172a856f 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp @@ -135,7 +135,7 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) std::list candidates; NormalProjectionCandidate * best_candidate = nullptr; - const Names & required_columns = reading->getRealColumnNames(); + const Names & required_columns = reading->getAllColumnNames(); const auto & parts = reading->getParts(); const auto & alter_conversions = reading->getAlterConvertionsForParts(); const auto & query_info = reading->getQueryInfo(); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 0665aec59ea..21e3cfcceab 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -261,30 +261,24 @@ void ReadFromMergeTree::AnalysisResult::checkLimits(const Settings & settings, c ReadFromMergeTree::ReadFromMergeTree( MergeTreeData::DataPartsVector parts_, std::vector alter_conversions_, - const Names & column_names_, - Names real_column_names_, - Names virt_column_names_, + Names all_column_names_, const MergeTreeData & data_, const SelectQueryInfo & query_info_, const StorageSnapshotPtr & storage_snapshot_, const ContextPtr & context_, size_t max_block_size_, size_t num_streams_, - bool sample_factor_column_queried_, std::shared_ptr max_block_numbers_to_read_, LoggerPtr log_, AnalysisResultPtr analyzed_result_ptr_, bool enable_parallel_reading) : SourceStepWithFilter(DataStream{.header = MergeTreeSelectProcessor::transformHeader( - storage_snapshot_->getSampleBlockForColumns(real_column_names_), - query_info_.prewhere_info, - data_.getPartitionValueType(), - virt_column_names_)}, column_names_, query_info_, storage_snapshot_, context_) + storage_snapshot_->getSampleBlockForColumns(all_column_names_), + query_info_.prewhere_info)}, all_column_names_, query_info_, storage_snapshot_, context_) , reader_settings(getMergeTreeReaderSettings(context_, query_info_)) , prepared_parts(std::move(parts_)) , alter_conversions_for_parts(std::move(alter_conversions_)) - , real_column_names(std::move(real_column_names_)) - , virt_column_names(std::move(virt_column_names_)) + , all_column_names(std::move(all_column_names_)) , data(data_) , actions_settings(ExpressionActionsSettings::fromContext(context_)) , metadata_for_reading(storage_snapshot->getMetadataForQuery()) @@ -293,20 +287,11 @@ ReadFromMergeTree::ReadFromMergeTree( .preferred_block_size_bytes = context->getSettingsRef().preferred_block_size_bytes, .preferred_max_column_in_block_size_bytes = context->getSettingsRef().preferred_max_column_in_block_size_bytes} , requested_num_streams(num_streams_) - , sample_factor_column_queried(sample_factor_column_queried_) , max_block_numbers_to_read(std::move(max_block_numbers_to_read_)) , log(std::move(log_)) , analyzed_result_ptr(analyzed_result_ptr_) , is_parallel_reading_from_replicas(enable_parallel_reading) { - if (sample_factor_column_queried) - { - /// Only _sample_factor virtual column is added by ReadFromMergeTree - /// Other virtual columns are added by MergeTreeSelectProcessor. - auto type = std::make_shared(); - output_stream->header.insert({type->createColumn(), type, "_sample_factor"}); - } - if (is_parallel_reading_from_replicas) { all_ranges_callback = context->getMergeTreeAllRangesCallback(); @@ -368,12 +353,12 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas( auto pool = std::make_shared( std::move(extension), std::move(parts_with_range), + shared_virtual_fields, storage_snapshot, prewhere_info, actions_settings, reader_settings, required_columns, - virt_column_names, pool_settings, context); @@ -387,8 +372,8 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas( auto algorithm = std::make_unique(i); auto processor = std::make_unique( - pool, std::move(algorithm), data, prewhere_info, - actions_settings, block_size_copy, reader_settings, virt_column_names); + pool, std::move(algorithm), storage_snapshot, prewhere_info, + actions_settings, block_size_copy, reader_settings); auto source = std::make_shared(std::move(processor)); pipes.emplace_back(std::move(source)); @@ -449,12 +434,12 @@ Pipe ReadFromMergeTree::readFromPool( { pool = std::make_shared( std::move(parts_with_range), + shared_virtual_fields, storage_snapshot, prewhere_info, actions_settings, reader_settings, required_columns, - virt_column_names, pool_settings, context); } @@ -462,12 +447,12 @@ Pipe ReadFromMergeTree::readFromPool( { pool = std::make_shared( std::move(parts_with_range), + shared_virtual_fields, storage_snapshot, prewhere_info, actions_settings, reader_settings, required_columns, - virt_column_names, pool_settings, context); } @@ -486,8 +471,8 @@ Pipe ReadFromMergeTree::readFromPool( auto algorithm = std::make_unique(i); auto processor = std::make_unique( - pool, std::move(algorithm), data, prewhere_info, - actions_settings, block_size_copy, reader_settings, virt_column_names); + pool, std::move(algorithm), storage_snapshot, prewhere_info, + actions_settings, block_size_copy, reader_settings); auto source = std::make_shared(std::move(processor)); @@ -538,12 +523,12 @@ Pipe ReadFromMergeTree::readInOrder( std::move(extension), mode, parts_with_ranges, + shared_virtual_fields, storage_snapshot, prewhere_info, actions_settings, reader_settings, required_columns, - virt_column_names, pool_settings, context); } @@ -553,12 +538,12 @@ Pipe ReadFromMergeTree::readInOrder( has_limit_below_one_block, read_type, parts_with_ranges, + shared_virtual_fields, storage_snapshot, prewhere_info, actions_settings, reader_settings, required_columns, - virt_column_names, pool_settings, context); } @@ -592,8 +577,8 @@ Pipe ReadFromMergeTree::readInOrder( algorithm = std::make_unique(i); auto processor = std::make_unique( - pool, std::move(algorithm), data, prewhere_info, - actions_settings, block_size, reader_settings, virt_column_names); + pool, std::move(algorithm), storage_snapshot, prewhere_info, + actions_settings, block_size, reader_settings); processor->addPartLevelToChunk(isQueryWithFinal()); @@ -1302,8 +1287,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( requested_num_streams, max_block_numbers_to_read, data, - real_column_names, - sample_factor_column_queried, + all_column_names, log, indexes); } @@ -1489,8 +1473,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( size_t num_streams, std::shared_ptr max_block_numbers_to_read, const MergeTreeData & data, - const Names & real_column_names, - bool sample_factor_column_queried, + const Names & all_column_names, LoggerPtr log, std::optional & indexes) { @@ -1503,8 +1486,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( num_streams, max_block_numbers_to_read, data, - real_column_names, - sample_factor_column_queried, + all_column_names, log, indexes); } @@ -1518,8 +1500,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( size_t num_streams, std::shared_ptr max_block_numbers_to_read, const MergeTreeData & data, - const Names & real_column_names, - bool sample_factor_column_queried, + const Names & all_column_names, LoggerPtr log, std::optional & indexes) { @@ -1528,7 +1509,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( size_t total_parts = parts.size(); - result.column_names_to_read = real_column_names; + result.column_names_to_read = all_column_names; /// If there are only virtual columns in the query, you must request at least one non-virtual one. if (result.column_names_to_read.empty()) @@ -1587,7 +1568,6 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( data, metadata_snapshot, context_, - sample_factor_column_queried, log); if (result.sampling.read_nothing) @@ -1704,10 +1684,8 @@ void ReadFromMergeTree::updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info prewhere_info = prewhere_info_value; output_stream = DataStream{.header = MergeTreeSelectProcessor::transformHeader( - storage_snapshot->getSampleBlockForColumns(real_column_names), - prewhere_info_value, - data.getPartitionValueType(), - virt_column_names)}; + storage_snapshot->getSampleBlockForColumns(all_column_names), + prewhere_info_value)}; updateSortDescriptionForOutputStream( *output_stream, @@ -1901,6 +1879,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons storage_snapshot->data = std::make_unique(); result.checkLimits(context->getSettingsRef(), query_info); + shared_virtual_fields.emplace("_sample_factor", result.sampling.used_sample_factor); LOG_DEBUG( log, @@ -1985,18 +1964,6 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons result_projection = ActionsDAG::merge(std::move(*result_projection), std::move(*actions)); }; - /// By the way, if a distributed query or query to a Merge table is made, then the `_sample_factor` column can have different values. - if (sample_factor_column_queried) - { - ColumnWithTypeAndName column; - column.name = "_sample_factor"; - column.type = std::make_shared(); - column.column = column.type->createColumnConst(0, Field(result.sampling.used_sample_factor)); - - auto adding_column = ActionsDAG::makeAddingColumnActions(std::move(column)); - append_actions(std::move(adding_column)); - } - if (result_projection) cur_header = result_projection->updateHeader(cur_header); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 8efe5ed7b13..5ed742a9bfd 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -110,16 +110,13 @@ public: ReadFromMergeTree( MergeTreeData::DataPartsVector parts_, std::vector alter_conversions_, - const Names & column_names_, - Names real_column_names_, - Names virt_column_names_, + Names all_column_names_, const MergeTreeData & data_, const SelectQueryInfo & query_info_, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context_, size_t max_block_size_, size_t num_streams_, - bool sample_factor_column_queried_, std::shared_ptr max_block_numbers_to_read_, LoggerPtr log_, AnalysisResultPtr analyzed_result_ptr_, @@ -136,8 +133,7 @@ public: void describeActions(JSONBuilder::JSONMap & map) const override; void describeIndexes(JSONBuilder::JSONMap & map) const override; - const Names & getRealColumnNames() const { return real_column_names; } - const Names & getVirtualColumnNames() const { return virt_column_names; } + const Names & getAllColumnNames() const { return all_column_names; } StorageID getStorageID() const { return data.getStorageID(); } UInt64 getSelectedParts() const { return selected_parts; } @@ -164,8 +160,7 @@ public: size_t num_streams, std::shared_ptr max_block_numbers_to_read, const MergeTreeData & data, - const Names & real_column_names, - bool sample_factor_column_queried, + const Names & all_column_names, LoggerPtr log, std::optional & indexes); @@ -209,8 +204,7 @@ private: size_t num_streams, std::shared_ptr max_block_numbers_to_read, const MergeTreeData & data, - const Names & real_column_names, - bool sample_factor_column_queried, + const Names & all_column_names, LoggerPtr log, std::optional & indexes); @@ -227,8 +221,7 @@ private: MergeTreeData::DataPartsVector prepared_parts; std::vector alter_conversions_for_parts; - Names real_column_names; - Names virt_column_names; + Names all_column_names; const MergeTreeData & data; ExpressionActionsSettings actions_settings; @@ -239,7 +232,6 @@ private: size_t requested_num_streams; size_t output_streams_limit = 0; - const bool sample_factor_column_queried; /// Used for aggregation optimization (see DB::QueryPlanOptimizations::tryAggregateEachPartitionIndependently). bool output_each_partition_through_separate_port = false; @@ -280,7 +272,9 @@ private: RangesInDataParts && parts, size_t num_streams, const Names & origin_column_names, const Names & column_names, ActionsDAGPtr & out_projection); ReadFromMergeTree::AnalysisResult getAnalysisResult() const; + AnalysisResultPtr analyzed_result_ptr; + VirtualFields shared_virtual_fields; bool is_parallel_reading_from_replicas; std::optional all_ranges_callback; diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index 33bd824dd5c..3d7dd3f76b8 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -71,19 +71,36 @@ ISource::Status RemoteSource::prepare() if (is_async_state) return Status::Async; + if (executor_finished) + return Status::Finished; + Status status = ISource::prepare(); /// To avoid resetting the connection (because of "unfinished" query) in the /// RemoteQueryExecutor it should be finished explicitly. if (status == Status::Finished) { - query_executor->finish(); is_async_state = false; - return status; + need_drain = true; + return Status::Ready; } return status; } +void RemoteSource::work() +{ + /// Connection drain is a heavy operation that may take a long time. + /// Therefore we move connection drain from prepare() to work(), and drain multiple connections in parallel. + /// See issue: https://github.com/ClickHouse/ClickHouse/issues/60844 + if (need_drain) + { + query_executor->finish(); + executor_finished = true; + return; + } + ISource::work(); +} + std::optional RemoteSource::tryGenerate() { /// onCancel() will do the cancel if the query was sent. diff --git a/src/Processors/Sources/RemoteSource.h b/src/Processors/Sources/RemoteSource.h index 4e179371cf5..052567bc261 100644 --- a/src/Processors/Sources/RemoteSource.h +++ b/src/Processors/Sources/RemoteSource.h @@ -22,6 +22,7 @@ public: ~RemoteSource() override; Status prepare() override; + void work() override; String getName() const override { return "Remote"; } void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) override { rows_before_limit.swap(counter); } @@ -39,6 +40,8 @@ protected: private: bool was_query_sent = false; + bool need_drain = false; + bool executor_finished = false; bool add_aggregation_info = false; RemoteQueryExecutorPtr query_executor; RowsBeforeLimitCounterPtr rows_before_limit; diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index dd07d043599..aa5a1c0cc1a 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -451,7 +451,7 @@ Chain buildPushingToViewsChain( /// If we don't write directly to the destination /// then expect that we're inserting with precalculated virtual columns - auto storage_header = no_destination ? metadata_snapshot->getSampleBlockWithVirtuals(storage->getVirtuals()) + auto storage_header = no_destination ? metadata_snapshot->getSampleBlockWithVirtuals(storage->getVirtualsList()) : metadata_snapshot->getSampleBlock(); /** TODO This is a very important line. At any insertion into the table one of chains should own lock. @@ -597,7 +597,7 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat views_data.source_storage_id, views_data.source_metadata_snapshot->getColumns(), std::move(block), - views_data.source_storage->getVirtuals())); + *views_data.source_storage->getVirtualsPtr())); QueryPipelineBuilder pipeline; diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index d883029408c..9637e5bf9ec 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -933,9 +933,26 @@ void TCPHandler::processInsertQuery() if (insert_queue && async_insert_enabled && !insert_query.select) { + /// Let's agree on terminology and say that a mini-INSERT is an asynchronous INSERT + /// which typically contains not a lot of data inside and a big-INSERT in an INSERT + /// which was formed by concatenating several mini-INSERTs together. + /// In case when the client had to retry some mini-INSERTs then they will be properly deduplicated + /// by the source tables. This functionality is controlled by a setting `async_insert_deduplicate`. + /// But then they will be glued together into a block and pushed through a chain of Materialized Views if any. + /// The process of forming such blocks is not deteministic so each time we retry mini-INSERTs the resulting + /// block may be concatenated differently. + /// That's why deduplication in dependent Materialized Views doesn't make sense in presence of async INSERTs. + if (settings.throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert && + settings.deduplicate_blocks_in_dependent_materialized_views) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Deduplication is dependent materialized view cannot work together with async inserts. "\ + "Please disable eiher `deduplicate_blocks_in_dependent_materialized_views` or `async_insert` setting."); + auto result = processAsyncInsertQuery(*insert_queue); if (result.status == AsynchronousInsertQueue::PushResult::OK) { + /// Reset pipeline because it may hold write lock for some storages. + state.io.pipeline.reset(); if (settings.wait_for_async_insert) { size_t timeout_ms = settings.wait_for_async_insert_timeout.totalMilliseconds(); @@ -968,7 +985,7 @@ void TCPHandler::processInsertQuery() else { PushingPipelineExecutor executor(state.io.pipeline); - run_executor(executor, processed_block); + run_executor(executor, std::move(processed_block)); } sendInsertProfileEvents(); diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index b09200f06ff..eae5e1a8a47 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -31,8 +31,6 @@ #include #include #include -#include -#include #include #include #include @@ -965,8 +963,7 @@ bool AlterCommand::isRequireMutationStage(const StorageInMemoryMetadata & metada /// Drop alias is metadata alter, in other case mutation is required. if (type == DROP_COLUMN) - return metadata.columns.hasColumnOrNested(GetColumnsOptions::AllPhysical, column_name) || - column_name == LightweightDeleteDescription::FILTER_COLUMN.name || column_name == BlockNumberColumn::name; + return metadata.columns.hasColumnOrNested(GetColumnsOptions::AllPhysical, column_name); if (type != MODIFY_COLUMN || data_type == nullptr) return false; @@ -1256,7 +1253,9 @@ void AlterCommands::prepare(const StorageInMemoryMetadata & metadata) void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const { - const StorageInMemoryMetadata & metadata = table->getInMemoryMetadata(); + const auto & metadata = table->getInMemoryMetadata(); + auto virtuals = table->getVirtualsPtr(); + auto all_columns = metadata.columns; /// Default expression for all added/modified columns ASTPtr default_expr_list = std::make_shared(); @@ -1292,16 +1291,20 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const if (command.data_type->hasDynamicSubcolumns()) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Adding a new column of a type which has dynamic subcolumns to an existing table is not allowed. It has known bugs"); - if (column_name == LightweightDeleteDescription::FILTER_COLUMN.name && std::dynamic_pointer_cast(table)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot add column {}: " - "this column name is reserved for lightweight delete feature", backQuote(column_name)); - - if (column_name == BlockNumberColumn::name && std::dynamic_pointer_cast(table)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot add column {}: " - "this column name is reserved for _block_number persisting feature", backQuote(column_name)); + if (virtuals->tryGet(column_name, VirtualsKind::Persistent)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Cannot add column {}: this column name is reserved for persistent virtual column", backQuote(column_name)); if (command.codec) - CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_deflate_qpl_codec, context->getSettingsRef().enable_zstd_qat_codec); + { + const auto & settings = context->getSettingsRef(); + CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST( + command.codec, command.data_type, + !settings.allow_suspicious_codecs, + settings.allow_experimental_codecs, + settings.enable_deflate_qpl_codec, + settings.enable_zstd_qat_codec); + } all_columns.add(ColumnDescription(column_name, command.data_type)); } @@ -1415,9 +1418,7 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const } else if (command.type == AlterCommand::DROP_COLUMN) { - if (all_columns.has(command.column_name) || - all_columns.hasNested(command.column_name) || - (command.clear && column_name == LightweightDeleteDescription::FILTER_COLUMN.name)) + if (all_columns.has(command.column_name) || all_columns.hasNested(command.column_name)) { if (!command.clear) /// CLEAR column is Ok even if there are dependencies. { @@ -1501,16 +1502,12 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const } if (all_columns.has(command.rename_to)) - throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Cannot rename to {}: " - "column with this name already exists", backQuote(command.rename_to)); + throw Exception(ErrorCodes::DUPLICATE_COLUMN, + "Cannot rename to {}: column with this name already exists", backQuote(command.rename_to)); - if (command.rename_to == LightweightDeleteDescription::FILTER_COLUMN.name && std::dynamic_pointer_cast(table)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot rename to {}: " - "this column name is reserved for lightweight delete feature", backQuote(command.rename_to)); - - if (command.rename_to == BlockNumberColumn::name && std::dynamic_pointer_cast(table)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot rename to {}: " - "this column name is reserved for _block_number persisting feature", backQuote(command.rename_to)); + if (virtuals->tryGet(command.rename_to, VirtualsKind::Persistent)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Cannot rename to {}: this column name is reserved for persistent virtual column", backQuote(command.rename_to)); if (modified_columns.contains(column_name)) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot rename and modify the same column {} " diff --git a/src/Storages/BlockNumberColumn.cpp b/src/Storages/BlockNumberColumn.cpp deleted file mode 100644 index 8c9e1fd902a..00000000000 --- a/src/Storages/BlockNumberColumn.cpp +++ /dev/null @@ -1,23 +0,0 @@ -#include -#include - -namespace DB -{ - -CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size); - -CompressionCodecPtr getCompressionCodecForBlockNumberColumn() -{ - std::vector codecs; - codecs.reserve(2); - auto data_bytes_size = BlockNumberColumn::type->getSizeOfValueInMemory(); - codecs.emplace_back(getCompressionCodecDelta(data_bytes_size)); - codecs.emplace_back(CompressionCodecFactory::instance().get("LZ4", {})); - return std::make_shared(codecs); -} - -const String BlockNumberColumn::name = "_block_number"; -const DataTypePtr BlockNumberColumn::type = std::make_shared(); -const CompressionCodecPtr BlockNumberColumn::compression_codec = getCompressionCodecForBlockNumberColumn(); - -} diff --git a/src/Storages/BlockNumberColumn.h b/src/Storages/BlockNumberColumn.h deleted file mode 100644 index fffa68bfd49..00000000000 --- a/src/Storages/BlockNumberColumn.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once -#include -#include -#include - -namespace DB -{ - -struct BlockNumberColumn -{ - static const String name; - static const DataTypePtr type; - static const CompressionCodecPtr compression_codec; -}; - -} diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index d6a241da032..e08dac3a332 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -31,15 +31,11 @@ #include #include #include -#include namespace DB { -CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size); - - namespace ErrorCodes { extern const int NO_SUCH_COLUMN_IN_TABLE; @@ -482,6 +478,10 @@ NamesAndTypesList ColumnsDescription::get(const GetColumnsOptions & options) con NamesAndTypesList res; switch (options.kind) { + case GetColumnsOptions::None: + { + break; + } case GetColumnsOptions::All: { res = getAll(); @@ -559,6 +559,12 @@ const ColumnDescription & ColumnsDescription::get(const String & column_name) co return *it; } +const ColumnDescription * ColumnsDescription::tryGet(const String & column_name) const +{ + auto it = columns.get<1>().find(column_name); + return it == columns.get<1>().end() ? nullptr : &(*it); +} + static GetColumnsOptions::Kind defaultKindToGetKind(ColumnDefaultKind kind) { switch (kind) @@ -572,7 +578,8 @@ static GetColumnsOptions::Kind defaultKindToGetKind(ColumnDefaultKind kind) case ColumnDefaultKind::Ephemeral: return GetColumnsOptions::Ephemeral; } - UNREACHABLE(); + + return GetColumnsOptions::None; } NamesAndTypesList ColumnsDescription::getByNames(const GetColumnsOptions & options, const Names & names) const @@ -784,33 +791,6 @@ bool ColumnsDescription::hasCompressionCodec(const String & column_name) const return it != columns.get<1>().end() && it->codec != nullptr; } -CompressionCodecPtr ColumnsDescription::getCodecOrDefault(const String & column_name, CompressionCodecPtr default_codec) const -{ - const auto it = columns.get<1>().find(column_name); - - if (it == columns.get<1>().end() || !it->codec) - return default_codec; - - return CompressionCodecFactory::instance().get(it->codec, it->type, default_codec); -} - -CompressionCodecPtr ColumnsDescription::getCodecOrDefault(const String & column_name) const -{ - assert (column_name != BlockNumberColumn::name); - return getCodecOrDefault(column_name, CompressionCodecFactory::instance().getDefaultCodec()); -} - -ASTPtr ColumnsDescription::getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const -{ - assert (column_name != BlockNumberColumn::name); - const auto it = columns.get<1>().find(column_name); - - if (it == columns.get<1>().end() || !it->codec) - return default_codec->getFullCodecDesc(); - - return it->codec; -} - ColumnsDescription::ColumnTTLs ColumnsDescription::getColumnTTLs() const { ColumnTTLs ret; diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index 5ba655ee10d..82e55e29073 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -29,10 +29,19 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +enum class VirtualsKind : UInt8 +{ + None = 0, + Ephemeral = 1, + Persistent = 2, + All = Ephemeral | Persistent, +}; + struct GetColumnsOptions { enum Kind : UInt8 { + None = 0, Ordinary = 1, Materialized = 2, Aliases = 4, @@ -51,9 +60,9 @@ struct GetColumnsOptions return *this; } - GetColumnsOptions & withVirtuals(bool value = true) + GetColumnsOptions & withVirtuals(VirtualsKind value = VirtualsKind::All) { - with_virtuals = value; + virtuals_kind = value; return *this; } @@ -63,17 +72,11 @@ struct GetColumnsOptions return *this; } - GetColumnsOptions & withSystemColumns(bool value = true) - { - with_system_columns = value; - return *this; - } - Kind kind; + VirtualsKind virtuals_kind = VirtualsKind::None; + bool with_subcolumns = false; - bool with_virtuals = false; bool with_extended_objects = false; - bool with_system_columns = false; }; /// Description of a single table column (in CREATE TABLE for example). @@ -160,6 +163,7 @@ public: bool hasNested(const String & column_name) const; bool hasSubcolumn(const String & column_name) const; const ColumnDescription & get(const String & column_name) const; + const ColumnDescription * tryGet(const String & column_name) const; template void modify(const String & column_name, F && f) @@ -213,9 +217,6 @@ public: /// Does column has non default specified compression codec bool hasCompressionCodec(const String & column_name) const; - CompressionCodecPtr getCodecOrDefault(const String & column_name, CompressionCodecPtr default_codec) const; - CompressionCodecPtr getCodecOrDefault(const String & column_name) const; - ASTPtr getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const; String toString() const; static ColumnsDescription parse(const String & str); @@ -269,4 +270,5 @@ private: /// don't have strange constructions in default expression like SELECT query or /// arrayJoin function. Block validateColumnsDefaultsAndGetSampleBlock(ASTPtr default_expr_list, const NamesAndTypesList & all_columns, ContextPtr context); + } diff --git a/src/Storages/FileLog/FileLogSource.cpp b/src/Storages/FileLog/FileLogSource.cpp index b1192af4ced..eb3ff0436a5 100644 --- a/src/Storages/FileLog/FileLogSource.cpp +++ b/src/Storages/FileLog/FileLogSource.cpp @@ -31,7 +31,7 @@ FileLogSource::FileLogSource( , max_streams_number(max_streams_number_) , handle_error_mode(handle_error_mode_) , non_virtual_header(storage_snapshot->metadata->getSampleBlockNonMaterialized()) - , virtual_header(storage_snapshot->getSampleBlockForColumns(storage.getVirtuals().getNames())) + , virtual_header(storage_snapshot->virtual_columns->getSampleBlock()) { consumer = std::make_unique(storage, max_block_size, poll_time_out, context, stream_number_, max_streams_number_); diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index b0bac081d38..a5f2331a068 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -147,6 +147,7 @@ StorageFileLog::StorageFileLog( storage_metadata.setColumns(columns_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); + setVirtuals(createVirtuals(filelog_settings->handle_error_mode)); if (!fileOrSymlinkPathStartsWith(path, getContext()->getUserFilesPath())) { @@ -203,6 +204,22 @@ StorageFileLog::StorageFileLog( } } +VirtualColumnsDescription StorageFileLog::createVirtuals(StreamingHandleErrorMode handle_error_mode) +{ + VirtualColumnsDescription desc; + + desc.addEphemeral("_filename", std::make_shared(std::make_shared()), ""); + desc.addEphemeral("_offset", std::make_shared(), ""); + + if (handle_error_mode == StreamingHandleErrorMode::STREAM) + { + desc.addEphemeral("_raw_record", std::make_shared(std::make_shared()), ""); + desc.addEphemeral("_error", std::make_shared(std::make_shared()), ""); + } + + return desc; +} + void StorageFileLog::loadMetaFiles(bool attach) { /// Attach table @@ -1009,19 +1026,4 @@ bool StorageFileLog::updateFileInfos() return events.empty() || file_infos.file_names.empty(); } -NamesAndTypesList StorageFileLog::getVirtuals() const -{ - auto virtuals = NamesAndTypesList{ - {"_filename", std::make_shared(std::make_shared())}, - {"_offset", std::make_shared()}}; - - if (filelog_settings->handle_error_mode == StreamingHandleErrorMode::STREAM) - { - virtuals.push_back({"_raw_record", std::make_shared(std::make_shared())}); - virtuals.push_back({"_error", std::make_shared(std::make_shared())}); - } - - return virtuals; -} - } diff --git a/src/Storages/FileLog/StorageFileLog.h b/src/Storages/FileLog/StorageFileLog.h index bdcf85ad6e5..91d58540c94 100644 --- a/src/Storages/FileLog/StorageFileLog.h +++ b/src/Storages/FileLog/StorageFileLog.h @@ -102,8 +102,6 @@ public: String getFullMetaPath(const String & file_name) const { return std::filesystem::path(metadata_base_path) / file_name; } String getFullDataPath(const String & file_name) const { return std::filesystem::path(root_data_path) / file_name; } - NamesAndTypesList getVirtuals() const override; - static UInt64 getInode(const String & file_name); void openFilesAndSetPos(); @@ -212,6 +210,8 @@ private: UInt64 inode = 0; }; ReadMetadataResult readMetadata(const String & filename) const; + + static VirtualColumnsDescription createVirtuals(StreamingHandleErrorMode handle_error_mode); }; } diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index cd935fa3100..c574f57fc6a 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -241,8 +241,7 @@ StorageHDFS::StorageHDFS( storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); - - virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } namespace @@ -975,7 +974,7 @@ void StorageHDFS::read( size_t max_block_size, size_t num_streams) { - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context_), virtual_columns); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context_)); bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) && context_->getSettingsRef().optimize_count_from_files; @@ -1011,7 +1010,7 @@ void ReadFromHDFS::createIterator(const ActionsDAG::Node * predicate) else if (storage->is_path_with_globs) { /// Iterate through disclosed globs and make a source for each file - auto glob_iterator = std::make_shared(storage->uris[0], predicate, storage->virtual_columns, context); + auto glob_iterator = std::make_shared(storage->uris[0], predicate, storage->getVirtualsList(), context); iterator_wrapper = std::make_shared([glob_iterator]() { return glob_iterator->next(); @@ -1019,7 +1018,7 @@ void ReadFromHDFS::createIterator(const ActionsDAG::Node * predicate) } else { - auto uris_iterator = std::make_shared(storage->uris, predicate, storage->virtual_columns, context); + auto uris_iterator = std::make_shared(storage->uris, predicate, storage->getVirtualsList(), context); iterator_wrapper = std::make_shared([uris_iterator]() { return uris_iterator->next(); @@ -1179,16 +1178,6 @@ void registerStorageHDFS(StorageFactory & factory) }); } -NamesAndTypesList StorageHDFS::getVirtuals() const -{ - return virtual_columns; -} - -Names StorageHDFS::getVirtualColumnNames() -{ - return VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage({}).getNames(); -} - SchemaCache & StorageHDFS::getSchemaCache(const ContextPtr & ctx) { static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_hdfs", DEFAULT_SCHEMA_CACHE_ELEMENTS)); diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index b36ff7ea37e..47e5addccb4 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -69,9 +69,6 @@ public: ContextPtr local_context, TableExclusiveLockHolder &) override; - NamesAndTypesList getVirtuals() const override; - static Names getVirtualColumnNames(); - bool supportsPartitionBy() const override { return true; } /// Check if the format is column-oriented. @@ -114,7 +111,6 @@ private: const bool distributed_processing; ASTPtr partition_by; bool is_path_with_globs; - NamesAndTypesList virtual_columns; LoggerPtr log = getLogger("StorageHDFS"); }; diff --git a/src/Storages/HDFS/StorageHDFSCluster.cpp b/src/Storages/HDFS/StorageHDFSCluster.cpp index 714d6391543..bde8b84e349 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.cpp +++ b/src/Storages/HDFS/StorageHDFSCluster.cpp @@ -72,8 +72,7 @@ StorageHDFSCluster::StorageHDFSCluster( storage_metadata.setConstraints(constraints_); setInMemoryMetadata(storage_metadata); - - virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } void StorageHDFSCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const DB::StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context) @@ -89,18 +88,11 @@ void StorageHDFSCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const DB: RemoteQueryExecutor::Extension StorageHDFSCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const { - auto iterator = std::make_shared(uri, predicate, virtual_columns, context); + auto iterator = std::make_shared(uri, predicate, getVirtualsList(), context); auto callback = std::make_shared>([iter = std::move(iterator)]() mutable -> String { return iter->next().path; }); return RemoteQueryExecutor::Extension{.task_iterator = std::move(callback)}; } -NamesAndTypesList StorageHDFSCluster::getVirtuals() const -{ - return NamesAndTypesList{ - {"_path", std::make_shared(std::make_shared())}, - {"_file", std::make_shared(std::make_shared())}}; -} - } #endif diff --git a/src/Storages/HDFS/StorageHDFSCluster.h b/src/Storages/HDFS/StorageHDFSCluster.h index 40884f98984..26ebc8601ee 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.h +++ b/src/Storages/HDFS/StorageHDFSCluster.h @@ -32,8 +32,6 @@ public: std::string getName() const override { return "HDFSCluster"; } - NamesAndTypesList getVirtuals() const override; - RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; bool supportsSubcolumns() const override { return true; } @@ -45,7 +43,6 @@ private: String uri; String format_name; - NamesAndTypesList virtual_columns; }; diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 183a4532281..88ab8e15e76 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -45,6 +45,7 @@ #include #include #include +#include namespace CurrentMetrics { @@ -444,6 +445,7 @@ StorageHive::StorageHive( storage_metadata.partition_key = KeyDescription::getKeyFromAST(partition_by_ast, storage_metadata.columns, getContext()); setInMemoryMetadata(storage_metadata); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } void StorageHive::lazyInitialize() @@ -1020,13 +1022,6 @@ SinkToStoragePtr StorageHive::write(const ASTPtr & /*query*/, const StorageMetad throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method write is not implemented for StorageHive"); } -NamesAndTypesList StorageHive::getVirtuals() const -{ - return NamesAndTypesList{ - {"_path", std::make_shared(std::make_shared())}, - {"_file", std::make_shared(std::make_shared())}}; -} - std::optional StorageHive::totalRows(const Settings & settings) const { /// query_info is not used when prune_level == PruneLevel::None diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h index 07440097f7a..67ef153af0e 100644 --- a/src/Storages/Hive/StorageHive.h +++ b/src/Storages/Hive/StorageHive.h @@ -54,8 +54,6 @@ public: SinkToStoragePtr write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr /*context*/, bool async_insert) override; - NamesAndTypesList getVirtuals() const override; - bool supportsSubsetOfColumns() const; std::optional totalRows(const Settings & settings) const override; diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 85ef6a0bb35..9852220241f 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -27,10 +27,17 @@ namespace ErrorCodes extern const int CANNOT_RESTORE_TABLE; } +IStorage::IStorage(StorageID storage_id_) + : storage_id(std::move(storage_id_)) + , metadata(std::make_unique()) + , virtuals(std::make_unique()) +{ +} + bool IStorage::isVirtualColumn(const String & column_name, const StorageMetadataPtr & metadata_snapshot) const { /// Virtual column maybe overridden by real column - return !metadata_snapshot->getColumns().has(column_name) && getVirtuals().contains(column_name); + return !metadata_snapshot->getColumns().has(column_name) && virtuals.get()->has(column_name); } RWLockImpl::LockHolder IStorage::tryLockTimed( @@ -237,11 +244,6 @@ void IStorage::renameInMemory(const StorageID & new_table_id) storage_id = new_table_id; } -NamesAndTypesList IStorage::getVirtuals() const -{ - return {}; -} - Names IStorage::getAllRegisteredNames() const { Names result; diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 4fa6bfdd617..1108eafc6b6 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -98,9 +99,7 @@ class IStorage : public std::enable_shared_from_this, public TypePromo public: IStorage() = delete; /// Storage metadata can be set separately in setInMemoryMetadata method - explicit IStorage(StorageID storage_id_) - : storage_id(std::move(storage_id_)) - , metadata(std::make_unique()) {} + explicit IStorage(StorageID storage_id_); IStorage(const IStorage &) = delete; IStorage & operator=(const IStorage &) = delete; @@ -215,6 +214,10 @@ public: metadata.set(std::make_unique(metadata_)); } + void setVirtuals(VirtualColumnsDescription virtuals_) + { + virtuals.set(std::make_unique(std::move(virtuals_))); + } /// Return list of virtual columns (like _part, _table, etc). In the vast /// majority of cases virtual columns are static constant part of Storage @@ -226,7 +229,9 @@ public: /// virtual column will be overridden and inaccessible. /// /// By default return empty list of columns. - virtual NamesAndTypesList getVirtuals() const; + VirtualsDescriptionPtr getVirtualsPtr() const { return virtuals.get(); } + NamesAndTypesList getVirtualsList() const { return virtuals.get()->getNamesAndTypesList(); } + Block getVirtualsHeader() const { return virtuals.get()->getSampleBlock(); } Names getAllRegisteredNames() const override; @@ -263,15 +268,16 @@ public: virtual bool supportsTrivialCountOptimization() const { return false; } private: - StorageID storage_id; mutable std::mutex id_mutex; - /// Multiversion storage metadata. Allows to read/write storage metadata - /// without locks. + /// Multiversion storage metadata. Allows to read/write storage metadata without locks. MultiVersionStorageMetadataPtr metadata; + /// Description of virtual columns. Optional, may be set in constructor. + MultiVersionVirtualsDescriptionPtr virtuals; + protected: RWLockImpl::LockHolder tryLockTimed( const RWLock & rwlock, RWLockImpl::Type type, const String & query_id, const std::chrono::milliseconds & acquire_timeout) const; diff --git a/src/Storages/Kafka/KafkaSource.cpp b/src/Storages/Kafka/KafkaSource.cpp index dc62c13f633..9c68107872e 100644 --- a/src/Storages/Kafka/KafkaSource.cpp +++ b/src/Storages/Kafka/KafkaSource.cpp @@ -45,7 +45,7 @@ KafkaSource::KafkaSource( , max_block_size(max_block_size_) , commit_in_suffix(commit_in_suffix_) , non_virtual_header(storage_snapshot->metadata->getSampleBlockNonMaterialized()) - , virtual_header(storage_snapshot->getSampleBlockForColumns(storage.getVirtualColumnNames())) + , virtual_header(storage.getVirtualsHeader()) , handle_error_mode(storage.getStreamingHandleErrorMode()) { } diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 638f5fe2ef6..e41488189e9 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -363,6 +363,8 @@ StorageKafka::StorageKafka( StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); setInMemoryMetadata(storage_metadata); + setVirtuals(createVirtuals(kafka_settings->kafka_handle_error_mode)); + auto task_count = thread_per_consumer ? num_consumers : 1; for (size_t i = 0; i < task_count; ++i) { @@ -384,6 +386,28 @@ StorageKafka::StorageKafka( }); } +VirtualColumnsDescription StorageKafka::createVirtuals(StreamingHandleErrorMode handle_error_mode) +{ + VirtualColumnsDescription desc; + + desc.addEphemeral("_topic", std::make_shared(std::make_shared()), ""); + desc.addEphemeral("_key", std::make_shared(), ""); + desc.addEphemeral("_offset", std::make_shared(), ""); + desc.addEphemeral("_partition", std::make_shared(), ""); + desc.addEphemeral("_timestamp", std::make_shared(std::make_shared()), ""); + desc.addEphemeral("_timestamp_ms", std::make_shared(std::make_shared(3)), ""); + desc.addEphemeral("_headers.name", std::make_shared(std::make_shared()), ""); + desc.addEphemeral("_headers.value", std::make_shared(std::make_shared()), ""); + + if (handle_error_mode == StreamingHandleErrorMode::STREAM) + { + desc.addEphemeral("_raw_message", std::make_shared(), ""); + desc.addEphemeral("_error", std::make_shared(), ""); + } + + return desc; +} + SettingsChanges StorageKafka::createSettingsAdjustments() { SettingsChanges result; @@ -1194,43 +1218,4 @@ void registerStorageKafka(StorageFactory & factory) factory.registerStorage("Kafka", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); } -NamesAndTypesList StorageKafka::getVirtuals() const -{ - auto result = NamesAndTypesList{ - {"_topic", std::make_shared(std::make_shared())}, - {"_key", std::make_shared()}, - {"_offset", std::make_shared()}, - {"_partition", std::make_shared()}, - {"_timestamp", std::make_shared(std::make_shared())}, - {"_timestamp_ms", std::make_shared(std::make_shared(3))}, - {"_headers.name", std::make_shared(std::make_shared())}, - {"_headers.value", std::make_shared(std::make_shared())}}; - if (kafka_settings->kafka_handle_error_mode == StreamingHandleErrorMode::STREAM) - { - result.push_back({"_raw_message", std::make_shared()}); - result.push_back({"_error", std::make_shared()}); - } - return result; -} - -Names StorageKafka::getVirtualColumnNames() const -{ - auto result = Names { - "_topic", - "_key", - "_offset", - "_partition", - "_timestamp", - "_timestamp_ms", - "_headers.name", - "_headers.value", - }; - if (kafka_settings->kafka_handle_error_mode == StreamingHandleErrorMode::STREAM) - { - result.push_back({"_raw_message"}); - result.push_back({"_error"}); - } - return result; -} - } diff --git a/src/Storages/Kafka/StorageKafka.h b/src/Storages/Kafka/StorageKafka.h index d5e319b8974..3b20e6b23f6 100644 --- a/src/Storages/Kafka/StorageKafka.h +++ b/src/Storages/Kafka/StorageKafka.h @@ -74,8 +74,6 @@ public: const auto & getFormatName() const { return format_name; } - NamesAndTypesList getVirtuals() const override; - Names getVirtualColumnNames() const; StreamingHandleErrorMode getStreamingHandleErrorMode() const { return kafka_settings->kafka_handle_error_mode; } struct SafeConsumers @@ -158,6 +156,8 @@ private: bool checkDependencies(const StorageID & table_id); void cleanConsumers(); + + static VirtualColumnsDescription createVirtuals(StreamingHandleErrorMode handle_error_mode); }; } diff --git a/src/Storages/LightweightDeleteDescription.cpp b/src/Storages/LightweightDeleteDescription.cpp deleted file mode 100644 index ae5e68da9c2..00000000000 --- a/src/Storages/LightweightDeleteDescription.cpp +++ /dev/null @@ -1,9 +0,0 @@ -#include -#include - -namespace DB -{ - -const NameAndTypePair LightweightDeleteDescription::FILTER_COLUMN {"_row_exists", std::make_shared()}; - -} diff --git a/src/Storages/LightweightDeleteDescription.h b/src/Storages/LightweightDeleteDescription.h deleted file mode 100644 index 45bde59ea71..00000000000 --- a/src/Storages/LightweightDeleteDescription.h +++ /dev/null @@ -1,13 +0,0 @@ -#pragma once -#include -#include "Storages/TTLDescription.h" - -namespace DB -{ - -struct LightweightDeleteDescription -{ - static const NameAndTypePair FILTER_COLUMN; -}; - -} diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 958e0a326cf..c3aacfd67d3 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -218,6 +218,10 @@ StorageLiveView::StorageLiveView( setInMemoryMetadata(storage_metadata); + VirtualColumnsDescription virtuals; + virtuals.addEphemeral("_version", std::make_shared(), ""); + setVirtuals(std::move(virtuals)); + if (!query.select) throw Exception(ErrorCodes::INCORRECT_QUERY, "SELECT query is not specified for {}", getName()); @@ -236,13 +240,6 @@ StorageLiveView::~StorageLiveView() shutdown(false); } -NamesAndTypesList StorageLiveView::getVirtuals() const -{ - return NamesAndTypesList{ - NameAndTypePair("_version", std::make_shared()) - }; -} - void StorageLiveView::checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const { auto table_id = getStorageID(); diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index bf6b13fc837..91daac32c7b 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -74,8 +74,6 @@ public: bool supportsFinal() const override { return true; } - NamesAndTypesList getVirtuals() const override; - void checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const override; void drop() override; diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index c6407a99a4e..ff9941ee808 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -623,6 +623,15 @@ void DataPartStorageOnDiskBase::remove( } } + if (!disk->exists(from)) + { + LOG_ERROR(log, "Directory {} (part to remove) doesn't exist or one of nested files has gone. Most likely this is due to manual removing. This should be discouraged. Ignoring.", fullPath(disk, from)); + /// We will never touch this part again, so unlocking it from zero-copy + if (!can_remove_description) + can_remove_description.emplace(can_remove_callback()); + return; + } + try { disk->moveDirectory(from, to); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index a9bdceacef0..526ea542b0e 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1451,6 +1451,11 @@ bool IMergeTreeDataPart::supportLightweightDeleteMutate() const parent_part == nullptr && projection_parts.empty(); } +bool IMergeTreeDataPart::hasLightweightDelete() const +{ + return columns.contains(RowExistsColumn::name); +} + void IMergeTreeDataPart::assertHasVersionMetadata(MergeTreeTransaction * txn) const { TransactionID expected_tid = txn ? txn->tid : Tx::PrehistoricTID; diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 91c559d30c8..fba1e6ddbb1 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -1,12 +1,12 @@ #pragma once +#include #include #include #include #include #include #include -#include #include #include #include @@ -48,6 +48,8 @@ class MarkCache; class UncompressedCache; class MergeTreeTransaction; +struct MergeTreeReadTaskInfo; +using MergeTreeReadTaskInfoPtr = std::shared_ptr; enum class DataPartRemovalState { @@ -69,6 +71,7 @@ public: using Checksums = MergeTreeDataPartChecksums; using Checksum = MergeTreeDataPartChecksums::Checksum; using ValueSizeMap = std::map; + using VirtualFields = std::unordered_map; using MergeTreeReaderPtr = std::unique_ptr; using MergeTreeWriterPtr = std::unique_ptr; @@ -95,6 +98,7 @@ public: const NamesAndTypesList & columns_, const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, + const VirtualFields & virtual_fields, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, const AlterConversionsPtr & alter_conversions, @@ -493,7 +497,7 @@ public: bool supportLightweightDeleteMutate() const; /// True if here is lightweight deleted mask file in part. - bool hasLightweightDelete() const { return columns.contains(LightweightDeleteDescription::FILTER_COLUMN.name); } + bool hasLightweightDelete() const; void writeChecksums(const MergeTreeDataPartChecksums & checksums_, const WriteSettings & settings); diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index 63ed8021f58..4936f1d33c6 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -1,7 +1,8 @@ #include +#include +#include +#include #include -#include -#include #include #include #include @@ -19,12 +20,13 @@ namespace namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int NOT_IMPLEMENTED; } - IMergeTreeReader::IMergeTreeReader( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, const NamesAndTypesList & columns_, + const VirtualFields & virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, @@ -47,6 +49,7 @@ IMergeTreeReader::IMergeTreeReader( , part_columns(data_part_info_for_read->isWidePart() ? data_part_info_for_read->getColumnsDescriptionWithCollectedNested() : data_part_info_for_read->getColumnsDescription()) + , virtual_fields(virtual_fields_) { columns_to_read.reserve(requested_columns.size()); serializations.reserve(requested_columns.size()); @@ -63,7 +66,49 @@ const IMergeTreeReader::ValueSizeMap & IMergeTreeReader::getAvgValueSizeHints() return avg_value_size_hints; } -void IMergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows, size_t block_number) const +void IMergeTreeReader::fillVirtualColumns(Columns & columns, size_t rows) const +{ + chassert(columns.size() == requested_columns.size()); + + const auto * loaded_part_info = typeid_cast(data_part_info_for_read.get()); + if (!loaded_part_info) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Filling of virtual columns is supported only for LoadedMergeTreeDataPartInfoForReader"); + + const auto & data_part = loaded_part_info->getDataPart(); + const auto & storage_columns = storage_snapshot->getMetadataForQuery()->getColumns(); + const auto & virtual_columns = storage_snapshot->virtual_columns; + + auto it = requested_columns.begin(); + for (size_t pos = 0; pos < columns.size(); ++pos, ++it) + { + if (columns[pos] || storage_columns.has(it->name)) + continue; + + auto virtual_column = virtual_columns->tryGet(it->name); + if (!virtual_column) + continue; + + if (!it->type->equals(*virtual_column->type)) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Data type for virtual column {} mismatched. Requested type: {}, virtual column type: {}", + it->name, it->type->getName(), virtual_column->type->getName()); + } + + if (it->name == "_part_offset") + throw Exception(ErrorCodes::LOGICAL_ERROR, "Virtual column {} must be filled by range reader", it->name); + + Field field; + if (auto field_it = virtual_fields.find(it->name); field_it != virtual_fields.end()) + field = field_it->second; + else + field = getFieldForConstVirtualColumn(it->name, *data_part); + + columns[pos] = virtual_column->type->createColumnConst(rows, field)->convertToFullColumnIfConst(); + } +} + +void IMergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows) const { try { @@ -72,7 +117,7 @@ void IMergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_e res_columns, num_rows, Nested::convertToSubcolumns(requested_columns), Nested::convertToSubcolumns(available_columns), - partially_read_columns, storage_snapshot->metadata, block_number); + partially_read_columns, storage_snapshot->metadata); should_evaluate_missing_defaults = std::any_of( res_columns.begin(), res_columns.end(), [](const auto & column) { return column == nullptr; }); diff --git a/src/Storages/MergeTree/IMergeTreeReader.h b/src/Storages/MergeTree/IMergeTreeReader.h index 997be064f28..a5b84eba241 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.h +++ b/src/Storages/MergeTree/IMergeTreeReader.h @@ -9,8 +9,6 @@ namespace DB { -class IDataType; - /// Reads the data between pairs of marks in the same part. When reading consecutive ranges, avoids unnecessary seeks. /// When ranges are almost consecutive, seeks are fast because they are performed inside the buffer. /// Avoids loading the marks file if it is not needed (e.g. when reading the whole part). @@ -18,11 +16,13 @@ class IMergeTreeReader : private boost::noncopyable { public: using ValueSizeMap = std::map; + using VirtualFields = std::unordered_map; using DeserializeBinaryBulkStateMap = std::map; IMergeTreeReader( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, const NamesAndTypesList & columns_, + const VirtualFields & virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, @@ -42,10 +42,13 @@ public: const ValueSizeMap & getAvgValueSizeHints() const; + /// Add virtual columns that are not present in the block. + void fillVirtualColumns(Columns & columns, size_t rows) const; + /// Add columns from ordered_names that are not present in the block. /// Missing columns are added in the order specified by ordered_names. /// num_rows is needed in case if all res_columns are nullptr. - void fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows, size_t block_number = 0) const; + void fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows) const; /// Evaluate defaulted columns if necessary. void evaluateMissingDefaults(Block additional_columns, Columns & res_columns) const; @@ -113,6 +116,9 @@ private: /// Actual columns description in part. const ColumnsDescription & part_columns; + + /// Fields of virtual columns that were filled in previous stages. + VirtualFields virtual_fields; }; } diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index df64ae33713..aa38198334e 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include #include @@ -1075,14 +1074,18 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() if (global_ctx->deduplicate) { - /// We don't want to deduplicate by block number column - /// so if deduplicate_by_columns is empty, add all columns except _block_number - if (supportsBlockNumberColumn(global_ctx) && global_ctx->deduplicate_by_columns.empty()) + const auto & virtuals = *global_ctx->data->getVirtualsPtr(); + + /// We don't want to deduplicate by virtual persistent column. + /// If deduplicate_by_columns is empty, add all columns except virtuals. + if (global_ctx->deduplicate_by_columns.empty()) { - for (const auto & col : global_ctx->merging_column_names) + for (const auto & column_name : global_ctx->merging_column_names) { - if (col != BlockNumberColumn::name) - global_ctx->deduplicate_by_columns.emplace_back(col); + if (virtuals.tryGet(column_name, VirtualsKind::Persistent)) + continue; + + global_ctx->deduplicate_by_columns.emplace_back(column_name); } } diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index 7fb4797e482..1f50e55f8a0 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -15,7 +15,7 @@ #include -#include +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index f5f0fa6f726..67f5e7a53e8 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -106,16 +107,14 @@ NameSet injectRequiredColumns( auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical) .withExtendedObjects() - .withSystemColumns(); - - if (with_subcolumns) - options.withSubcolumns(); + .withVirtuals() + .withSubcolumns(with_subcolumns); for (size_t i = 0; i < columns.size(); ++i) { - /// We are going to fetch only physical columns and system columns + /// We are going to fetch physical columns and system columns first if (!storage_snapshot->tryGetColumn(options, columns[i])) - throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no physical column or subcolumn {} in table", columns[i]); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column or subcolumn {} in table", columns[i]); have_at_least_one_physical_column |= injectRequiredColumnsRecursively( columns[i], storage_snapshot, alter_conversions, @@ -258,11 +257,10 @@ void MergeTreeBlockSizePredictor::update(const Block & sample_block, const Colum } -MergeTreeReadTask::Columns getReadTaskColumns( +MergeTreeReadTaskColumns getReadTaskColumns( const IMergeTreeDataPartInfoForReader & data_part_info_for_reader, const StorageSnapshotPtr & storage_snapshot, const Names & required_columns, - const Names & system_columns, const PrewhereInfoPtr & prewhere_info, const ExpressionActionsSettings & actions_settings, const MergeTreeReaderSettings & reader_settings, @@ -270,28 +268,30 @@ MergeTreeReadTask::Columns getReadTaskColumns( { Names column_to_read_after_prewhere = required_columns; - /// Read system columns such as lightweight delete mask "_row_exists" if it is persisted in the part - for (const auto & name : system_columns) - if (data_part_info_for_reader.getColumns().contains(name)) - column_to_read_after_prewhere.push_back(name); - /// Inject columns required for defaults evaluation injectRequiredColumns( data_part_info_for_reader, storage_snapshot, with_subcolumns, column_to_read_after_prewhere); - MergeTreeReadTask::Columns result; + MergeTreeReadTaskColumns result; auto options = GetColumnsOptions(GetColumnsOptions::All) .withExtendedObjects() - .withSystemColumns(); + .withVirtuals() + .withSubcolumns(with_subcolumns); - if (with_subcolumns) - options.withSubcolumns(); + static const NameSet columns_to_read_at_first_step = {"_part_offset"}; NameSet columns_from_previous_steps; auto add_step = [&](const PrewhereExprStep & step) { Names step_column_names; + if (columns_from_previous_steps.empty()) + { + for (const auto & required_column : required_columns) + if (columns_to_read_at_first_step.contains(required_column)) + step_column_names.push_back(required_column); + } + /// Computation results from previous steps might be used in the current step as well. In such a case these /// computed columns will be present in the current step inputs. They don't need to be read from the disk so /// exclude them from the list of columns to read. This filtering must be done before injecting required diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h index 9417d47814a..b19c42c8db8 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include @@ -22,11 +21,10 @@ NameSet injectRequiredColumns( bool with_subcolumns, Names & columns); -MergeTreeReadTask::Columns getReadTaskColumns( +MergeTreeReadTaskColumns getReadTaskColumns( const IMergeTreeDataPartInfoForReader & data_part_info_for_reader, const StorageSnapshotPtr & storage_snapshot, const Names & required_columns, - const Names & system_columns, const PrewhereInfoPtr & prewhere_info, const ExpressionActionsSettings & actions_settings, const MergeTreeReaderSettings & reader_settings, diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 8aa188cfe5c..9bc360cbcac 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -67,7 +67,7 @@ #include #include #include -#include +#include #include #include #include @@ -430,6 +430,29 @@ MergeTreeData::MergeTreeData( }; } +VirtualColumnsDescription MergeTreeData::createVirtuals(const StorageInMemoryMetadata & metadata) +{ + VirtualColumnsDescription desc; + + desc.addEphemeral("_part", std::make_shared(std::make_shared()), "Name of part"); + desc.addEphemeral("_part_index", std::make_shared(), "Sequential index of the part in the query result"); + desc.addEphemeral("_part_uuid", std::make_shared(), "Unique part identifier (if enabled MergeTree setting assign_part_uuids)"); + desc.addEphemeral("_partition_id", std::make_shared(std::make_shared()), "Name of partition"); + desc.addEphemeral("_sample_factor", std::make_shared(), "Sample factor (from the query)"); + desc.addEphemeral("_part_offset", std::make_shared(), "Number of row in the part"); + + if (metadata.hasPartitionKey()) + { + auto partition_types = metadata.partition_key.sample_block.getDataTypes(); + desc.addEphemeral("_partition_value", std::make_shared(std::move(partition_types)), "Value (a tuple) of a PARTITION BY expression"); + } + + desc.addPersistent(RowExistsColumn::name, RowExistsColumn::type, nullptr, "Persisted mask created by lightweight delete that show whether row exists or is deleted"); + desc.addPersistent(BlockNumberColumn::name, BlockNumberColumn::type, BlockNumberColumn::codec, "Persisted original number of block that was assigned at insert"); + + return desc; +} + StoragePolicyPtr MergeTreeData::getStoragePolicy() const { auto settings = getSettings(); @@ -677,6 +700,7 @@ void MergeTreeData::setProperties( { checkProperties(new_metadata, old_metadata, attach, false, allow_nullable_key, local_context); setInMemoryMetadata(new_metadata); + setVirtuals(createVirtuals(new_metadata)); } namespace @@ -1002,73 +1026,38 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat /// TODO Checks for Graphite mode. } +const Names MergeTreeData::virtuals_useful_for_filter = {"_part", "_partition_id", "_part_uuid", "_partition_value"}; -DataTypePtr MergeTreeData::getPartitionValueType() const +Block MergeTreeData::getHeaderWithVirtualsForFilter() const { - DataTypePtr partition_value_type; - auto partition_types = getInMemoryMetadataPtr()->partition_key.sample_block.getDataTypes(); - if (partition_types.empty()) - partition_value_type = std::make_shared(); - else - partition_value_type = std::make_shared(std::move(partition_types)); - return partition_value_type; + Block header; + auto virtuals_desc = getVirtualsPtr(); + for (const auto & name : virtuals_useful_for_filter) + if (auto column = virtuals_desc->tryGet(name)) + header.insert({column->type->createColumn(), column->type, name}); + return header; } - -Block MergeTreeData::getSampleBlockWithVirtualColumns() const +Block MergeTreeData::getBlockWithVirtualsForFilter(const MergeTreeData::DataPartsVector & parts, bool ignore_empty) const { - DataTypePtr partition_value_type = getPartitionValueType(); - return { - ColumnWithTypeAndName( - DataTypeLowCardinality{std::make_shared()}.createColumn(), - std::make_shared(std::make_shared()), - "_part"), - ColumnWithTypeAndName( - DataTypeLowCardinality{std::make_shared()}.createColumn(), - std::make_shared(std::make_shared()), - "_partition_id"), - ColumnWithTypeAndName(ColumnUUID::create(), std::make_shared(), "_part_uuid"), - ColumnWithTypeAndName(partition_value_type->createColumn(), partition_value_type, "_partition_value")}; -} + auto block = getHeaderWithVirtualsForFilter(); - -Block MergeTreeData::getBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, bool one_part, bool ignore_empty) const -{ - auto block = getSampleBlockWithVirtualColumns(); - MutableColumns columns = block.mutateColumns(); - - auto & part_column = columns[0]; - auto & partition_id_column = columns[1]; - auto & part_uuid_column = columns[2]; - auto & partition_value_column = columns[3]; - - bool has_partition_value = typeid_cast(partition_value_column.get()); for (const auto & part_or_projection : parts) { if (ignore_empty && part_or_projection->isEmpty()) continue; - const auto * part = part_or_projection->isProjectionPart() ? part_or_projection->getParentPart() : part_or_projection.get(); - part_column->insert(part->name); - partition_id_column->insert(part->info.partition_id); - part_uuid_column->insert(part->uuid); - Tuple tuple(part->partition.value.begin(), part->partition.value.end()); - if (has_partition_value) - partition_value_column->insert(tuple); - if (one_part) + const auto * part = part_or_projection->isProjectionPart() + ? part_or_projection->getParentPart() + : part_or_projection.get(); + + for (auto & column : block) { - part_column = ColumnConst::create(std::move(part_column), 1); - partition_id_column = ColumnConst::create(std::move(partition_id_column), 1); - part_uuid_column = ColumnConst::create(std::move(part_uuid_column), 1); - if (has_partition_value) - partition_value_column = ColumnConst::create(std::move(partition_value_column), 1); - break; + auto field = getFieldForConstVirtualColumn(column.name, *part); + column.column->assumeMutableRef().insert(field); } } - block.setColumns(std::move(columns)); - if (!has_partition_value) - block.erase("_partition_value"); return block; } @@ -1077,13 +1066,14 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( const ActionsDAGPtr & filter_actions_dag, ContextPtr local_context, const DataPartsVector & parts) const { if (parts.empty()) - return 0u; + return 0; + auto metadata_snapshot = getInMemoryMetadataPtr(); - Block virtual_columns_block = getBlockWithVirtualPartColumns(parts, true /* one_part */); + auto virtual_columns_block = getBlockWithVirtualsForFilter({parts[0]}); auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), nullptr); - // Generate valid expressions for filtering + /// Generate valid expressions for filtering bool valid = true; for (const auto * input : filter_dag->getInputs()) if (!virtual_columns_block.has(input->result_name)) @@ -1096,7 +1086,7 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( std::unordered_set part_values; if (valid) { - virtual_columns_block = getBlockWithVirtualPartColumns(parts, false /* one_part */); + virtual_columns_block = getBlockWithVirtualsForFilter(parts); VirtualColumnUtils::filterBlockWithDAG(filter_dag, virtual_columns_block, local_context); part_values = VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); if (part_values.empty()) @@ -3658,6 +3648,7 @@ void MergeTreeData::checkPartDynamicColumns(MutableDataPartPtr & part, DataParts { auto metadata_snapshot = getInMemoryMetadataPtr(); const auto & columns = metadata_snapshot->getColumns(); + auto virtuals = getVirtualsPtr(); if (!hasDynamicSubcolumns(columns)) return; @@ -3665,7 +3656,7 @@ void MergeTreeData::checkPartDynamicColumns(MutableDataPartPtr & part, DataParts const auto & part_columns = part->getColumns(); for (const auto & part_column : part_columns) { - if (part_column.name == LightweightDeleteDescription::FILTER_COLUMN.name || part_column.name == BlockNumberColumn::name) + if (virtuals->has(part_column.name)) continue; auto storage_column = columns.getPhysical(part_column.name); @@ -6669,14 +6660,6 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( const auto & primary_key_max_column_name = metadata_snapshot->minmax_count_projection->primary_key_max_column_name; NameSet required_columns_set(required_columns.begin(), required_columns.end()); - if (required_columns_set.contains("_partition_value") && !typeid_cast(getPartitionValueType().get())) - { - throw Exception( - ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, - "Missing column `_partition_value` because there is no partition column in table {}", - getStorageID().getTableName()); - } - if (!primary_key_max_column_name.empty()) need_primary_key_max_column = required_columns_set.contains(primary_key_max_column_name); @@ -6702,11 +6685,11 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( }; Block virtual_columns_block; - auto virtual_block = getSampleBlockWithVirtualColumns(); + auto virtual_block = getHeaderWithVirtualsForFilter(); bool has_virtual_column = std::any_of(required_columns.begin(), required_columns.end(), [&](const auto & name) { return virtual_block.has(name); }); if (has_virtual_column || filter_dag) { - virtual_columns_block = getBlockWithVirtualPartColumns(parts, false /* one_part */, true /* ignore_empty */); + virtual_columns_block = getBlockWithVirtualsForFilter(parts, /*ignore_empty=*/ true); if (virtual_columns_block.rows() == 0) return {}; } @@ -7952,21 +7935,6 @@ AlterConversionsPtr MergeTreeData::getAlterConversionsForPart(MergeTreeDataPartP return result; } -NamesAndTypesList MergeTreeData::getVirtuals() const -{ - return NamesAndTypesList{ - NameAndTypePair("_part", std::make_shared(std::make_shared())), - NameAndTypePair("_part_index", std::make_shared()), - NameAndTypePair("_part_uuid", std::make_shared()), - NameAndTypePair("_partition_id", std::make_shared(std::make_shared())), - NameAndTypePair("_partition_value", getPartitionValueType()), - NameAndTypePair("_sample_factor", std::make_shared()), - NameAndTypePair("_part_offset", std::make_shared()), - LightweightDeleteDescription::FILTER_COLUMN, - NameAndTypePair(BlockNumberColumn::name, BlockNumberColumn::type), - }; -} - size_t MergeTreeData::getTotalMergesWithTTLInMergeList() const { return getContext()->getMergeList().getMergesWithTTLCount(); diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 4475f2b6f12..dc84505f38f 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -444,8 +444,6 @@ public: bool supportsTrivialCountOptimization() const override { return !hasLightweightDeletedMask(); } - NamesAndTypesList getVirtuals() const override; - /// Snapshot for MergeTree contains the current set of data parts /// at the moment of the start of query. struct SnapshotData : public StorageSnapshot::Data @@ -988,15 +986,13 @@ public: void removeQueryId(const String & query_id) const; void removeQueryIdNoLock(const String & query_id) const TSA_REQUIRES(query_id_set_mutex); - /// Return the partition expression types as a Tuple type. Return DataTypeUInt8 if partition expression is empty. - DataTypePtr getPartitionValueType() const; + static const Names virtuals_useful_for_filter; /// Construct a sample block of virtual columns. - Block getSampleBlockWithVirtualColumns() const; + Block getHeaderWithVirtualsForFilter() const; /// Construct a block consisting only of possible virtual columns for part pruning. - /// If one_part is true, fill in at most one part. - Block getBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, bool one_part, bool ignore_empty = false) const; + Block getBlockWithVirtualsForFilter(const MergeTreeData::DataPartsVector & parts, bool ignore_empty = false) const; /// In merge tree we do inserts with several steps. One of them: /// X. write part to temporary directory with some temp name @@ -1087,6 +1083,8 @@ public: bool initializeDiskOnConfigChange(const std::set & /*new_added_disks*/) override; + static VirtualColumnsDescription createVirtuals(const StorageInMemoryMetadata & metadata); + protected: friend class IMergeTreeDataPart; friend class MergeTreeDataMergerMutator; diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index 0ecd7abe183..9f201ab3b81 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -5,7 +5,6 @@ #include #include #include -#include namespace DB @@ -33,6 +32,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartCompact::getReader( const NamesAndTypesList & columns_to_read, const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, + const VirtualFields & virtual_fields, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, const AlterConversionsPtr & alter_conversions, @@ -41,12 +41,21 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartCompact::getReader( const ReadBufferFromFileBase::ProfileCallback & profile_callback) const { auto read_info = std::make_shared(shared_from_this(), alter_conversions); - auto * load_marks_threadpool = reader_settings.read_settings.load_marks_asynchronously ? &read_info->getContext()->getLoadMarksThreadpool() : nullptr; + auto * load_marks_threadpool + = reader_settings.read_settings.load_marks_asynchronously ? &read_info->getContext()->getLoadMarksThreadpool() : nullptr; return std::make_unique( - read_info, columns_to_read, storage_snapshot, uncompressed_cache, - mark_cache, mark_ranges, reader_settings, load_marks_threadpool, - avg_value_size_hints, profile_callback); + read_info, + columns_to_read, + virtual_fields, + storage_snapshot, + uncompressed_cache, + mark_cache, + mark_ranges, + reader_settings, + load_marks_threadpool, + avg_value_size_hints, + profile_callback); } IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartCompact::getWriter( @@ -66,12 +75,6 @@ IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartCompact::getWriter( ordered_columns_list.sort([this](const auto & lhs, const auto & rhs) { return *getColumnPosition(lhs.name) < *getColumnPosition(rhs.name); }); - /// _block_number column is not added by user, but is persisted in a part after merge - /// If _block_number is not present in the parts to be merged, then it won't have a position - /// So check if its not present and add it at the end - if (columns_list.contains(BlockNumberColumn::name) && !ordered_columns_list.contains(BlockNumberColumn::name)) - ordered_columns_list.emplace_back(NameAndTypePair{BlockNumberColumn::name, BlockNumberColumn::type}); - return std::make_unique( shared_from_this(), ordered_columns_list, metadata_snapshot, indices_to_recalc, stats_to_recalc_, getMarksFileExtension(), diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.h b/src/Storages/MergeTree/MergeTreeDataPartCompact.h index 35a358b3720..a97d15a08f3 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.h @@ -32,6 +32,7 @@ public: const NamesAndTypesList & columns, const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, + const VirtualFields & virtual_fields, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, const AlterConversionsPtr & alter_conversions, diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp index 2f01dbfe04b..e023ae9be0e 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp @@ -33,6 +33,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartInMemory::getReader( const NamesAndTypesList & columns_to_read, const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, + const VirtualFields & virtual_fields, UncompressedCache * /* uncompressed_cache */, MarkCache * /* mark_cache */, const AlterConversionsPtr & alter_conversions, @@ -44,7 +45,13 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartInMemory::getReader( auto ptr = std::static_pointer_cast(shared_from_this()); return std::make_unique( - read_info, ptr, columns_to_read, storage_snapshot, mark_ranges, reader_settings); + read_info, + ptr, + columns_to_read, + virtual_fields, + storage_snapshot, + mark_ranges, + reader_settings); } IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartInMemory::getWriter( diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h index 27f8ba4bccb..90b4b0e3471 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h @@ -21,6 +21,7 @@ public: const NamesAndTypesList & columns, const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, + const VirtualFields & virtual_fields, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, const AlterConversionsPtr & alter_conversions, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index dc6c1f0019d..018b8a35534 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -31,6 +31,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader( const NamesAndTypesList & columns_to_read, const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, + const VirtualFields & virtual_fields, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, const AlterConversionsPtr & alter_conversions, @@ -40,10 +41,16 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader( { auto read_info = std::make_shared(shared_from_this(), alter_conversions); return std::make_unique( - read_info, columns_to_read, - storage_snapshot, uncompressed_cache, - mark_cache, mark_ranges, reader_settings, - avg_value_size_hints, profile_callback); + read_info, + columns_to_read, + virtual_fields, + storage_snapshot, + uncompressed_cache, + mark_cache, + mark_ranges, + reader_settings, + avg_value_size_hints, + profile_callback); } IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartWide::getWriter( diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.h b/src/Storages/MergeTree/MergeTreeDataPartWide.h index 14147c4ad56..a8710dad679 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.h @@ -27,6 +27,7 @@ public: const NamesAndTypesList & columns, const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, + const VirtualFields & virtual_fields, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, const AlterConversionsPtr & alter_conversions, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index b05b4584259..1721fd15b8d 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -1,12 +1,9 @@ #include #include -#include namespace DB { - CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size); - namespace ErrorCodes { extern const int LOGICAL_ERROR; @@ -55,14 +52,10 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( marks_source_hashing = std::make_unique(*marks_compressor); } - const auto & storage_columns = metadata_snapshot->getColumns(); + auto storage_snapshot = std::make_shared(data_part->storage, metadata_snapshot); for (const auto & column : columns_list) { - ASTPtr compression; - if (column.name == BlockNumberColumn::name) - compression = BlockNumberColumn::compression_codec->getFullCodecDesc(); - else - compression = storage_columns.getCodecDescOrDefault(column.name, default_codec); + auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec); addStreams(column, compression); } } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 9d373504473..d79590ded21 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -6,12 +6,10 @@ #include #include #include -#include #include namespace DB { - CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size); namespace ErrorCodes { @@ -91,15 +89,11 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide( indices_to_recalc_, stats_to_recalc_, marks_file_extension_, default_codec_, settings_, index_granularity_) { - const auto & columns = metadata_snapshot->getColumns(); - for (const auto & it : columns_list) + auto storage_snapshot = std::make_shared(data_part->storage, metadata_snapshot); + for (const auto & column : columns_list) { - ASTPtr compression; - if (it.name == BlockNumberColumn::name) - compression = BlockNumberColumn::compression_codec->getFullCodecDesc(); - else - compression = columns.getCodecDescOrDefault(it.name, default_codec); - addStreams(it, compression); + auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec); + addStreams(column, compression); } } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 00aeac9cef4..428c8f92931 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -46,7 +46,6 @@ #include #include -#include #include namespace CurrentMetrics @@ -69,7 +68,6 @@ namespace ErrorCodes extern const int CANNOT_PARSE_TEXT; extern const int TOO_MANY_PARTITIONS; extern const int DUPLICATED_PART_UUIDS; - extern const int NO_SUCH_COLUMN_IN_TABLE; } @@ -166,7 +164,6 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( const MergeTreeData & data, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, - bool sample_factor_column_queried, LoggerPtr log) { const Settings & settings = context->getSettingsRef(); @@ -296,7 +293,7 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( if (sampling.use_sampling) { - if (sample_factor_column_queried && relative_sample_size != RelativeSize(0)) + if (relative_sample_size != RelativeSize(0)) sampling.used_sample_factor = 1.0 / boost::rational_cast(relative_sample_size); RelativeSize size_of_universum = 0; @@ -483,12 +480,13 @@ std::optional> MergeTreeDataSelectExecutor::filterPar { if (!filter_dag) return {}; - auto sample = data.getSampleBlockWithVirtualColumns(); + + auto sample = data.getHeaderWithVirtualsForFilter(); auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_dag->getOutputs().at(0), &sample); if (!dag) return {}; - auto virtual_columns_block = data.getBlockWithVirtualPartColumns(parts, false /* one_part */); + auto virtual_columns_block = data.getBlockWithVirtualsForFilter(parts); VirtualColumnUtils::filterBlockWithDAG(dag, virtual_columns_block, context); return VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); } @@ -868,69 +866,6 @@ std::shared_ptr MergeTreeDataSelectExecutor::checkLimits( return nullptr; } -static void selectColumnNames( - const Names & column_names_to_return, - const MergeTreeData & data, - Names & real_column_names, - Names & virt_column_names, - bool & sample_factor_column_queried) -{ - sample_factor_column_queried = false; - - for (const String & name : column_names_to_return) - { - if (name == "_part") - { - virt_column_names.push_back(name); - } - else if (name == "_part_index") - { - virt_column_names.push_back(name); - } - else if (name == "_partition_id") - { - virt_column_names.push_back(name); - } - else if (name == "_part_offset") - { - virt_column_names.push_back(name); - } - else if (name == LightweightDeleteDescription::FILTER_COLUMN.name) - { - virt_column_names.push_back(name); - } - else if (name == BlockNumberColumn::name) - { - virt_column_names.push_back(name); - } - else if (name == "_part_uuid") - { - virt_column_names.push_back(name); - } - else if (name == "_partition_value") - { - if (!typeid_cast(data.getPartitionValueType().get())) - { - throw Exception( - ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, - "Missing column `_partition_value` because there is no partition column in table {}", - data.getStorageID().getTableName()); - } - - virt_column_names.push_back(name); - } - else if (name == "_sample_factor") - { - sample_factor_column_queried = true; - virt_column_names.push_back(name); - } - else - { - real_column_names.push_back(name); - } - } -} - ReadFromMergeTree::AnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMarksToRead( MergeTreeData::DataPartsVector parts, const Names & column_names_to_return, @@ -944,14 +879,6 @@ ReadFromMergeTree::AnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMar if (total_parts == 0) return std::make_shared(); - Names real_column_names; - Names virt_column_names; - /// If query contains restrictions on the virtual column `_part` or `_part_index`, select only parts suitable for it. - /// The virtual column `_sample_factor` (which is equal to 1 / used sample rate) can be requested in the query. - bool sample_factor_column_queried = false; - - selectColumnNames(column_names_to_return, data, real_column_names, virt_column_names, sample_factor_column_queried); - std::optional indexes; /// NOTE: We don't need alter_conversions because the returned analysis_result is only used for: /// 1. estimate the number of rows to read; 2. projection reading, which doesn't have alter_conversions. @@ -964,8 +891,7 @@ ReadFromMergeTree::AnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMar num_streams, max_block_numbers_to_read, data, - real_column_names, - sample_factor_column_queried, + column_names_to_return, log, indexes); } @@ -992,27 +918,16 @@ QueryPlanStepPtr MergeTreeDataSelectExecutor::readFromParts( else if (parts.empty()) return {}; - Names real_column_names; - Names virt_column_names; - /// If query contains restrictions on the virtual column `_part` or `_part_index`, select only parts suitable for it. - /// The virtual column `_sample_factor` (which is equal to 1 / used sample rate) can be requested in the query. - bool sample_factor_column_queried = false; - - selectColumnNames(column_names_to_return, data, real_column_names, virt_column_names, sample_factor_column_queried); - return std::make_unique( std::move(parts), std::move(alter_conversions), column_names_to_return, - real_column_names, - virt_column_names, data, query_info, storage_snapshot, context, max_block_size, num_streams, - sample_factor_column_queried, max_block_numbers_to_read, log, merge_tree_select_result_ptr, diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index d61c97ab73b..b1afd7e6668 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -213,7 +213,6 @@ public: const MergeTreeData & data, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, - bool sample_factor_column_queried, LoggerPtr log); /// Check query limits: max_partitions_to_read, max_concurrent_queries. diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index 8d8b0f1cc79..c19b4ddd8a2 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -108,22 +109,22 @@ MergeTreeReadTask::Readers MergeTreePrefetchedReadPool::PrefetchedReaders::get() MergeTreePrefetchedReadPool::MergeTreePrefetchedReadPool( RangesInDataParts && parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_) : MergeTreeReadPoolBase( std::move(parts_), + std::move(shared_virtual_fields_), storage_snapshot_, prewhere_info_, actions_settings_, reader_settings_, column_names_, - virtual_column_names_, settings_, context_) , WithContext(context_) @@ -375,7 +376,7 @@ void MergeTreePrefetchedReadPool::fillPerPartStatistics() update_stat_for_column(column.name); if (reader_settings.apply_deleted_mask && read_info.data_part->hasLightweightDelete()) - update_stat_for_column(LightweightDeleteDescription::FILTER_COLUMN.name); + update_stat_for_column(RowExistsColumn::name); for (const auto & pre_columns : read_info.task_columns.pre_columns) for (const auto & column : pre_columns) diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h index 378034c5eae..0c8a6716d40 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h @@ -18,12 +18,12 @@ class MergeTreePrefetchedReadPool : public MergeTreeReadPoolBase, private WithCo public: MergeTreePrefetchedReadPool( RangesInDataParts && parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_); @@ -67,7 +67,7 @@ private: struct ThreadTask { - using InfoPtr = MergeTreeReadTask::InfoPtr; + using InfoPtr = MergeTreeReadTaskInfoPtr; ThreadTask(InfoPtr read_info_, MarkRanges ranges_, Priority priority_) : read_info(std::move(read_info_)), ranges(std::move(ranges_)), priority(priority_) diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index 50d1216cdc2..6932762f58b 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -362,7 +362,7 @@ void MergeTreeRangeReader::ReadResult::shrink(Columns & old_columns, const NumRo } } -/// The main invariant of the data in the read result is that he number of rows is +/// The main invariant of the data in the read result is that the number of rows is /// either equal to total_rows_per_granule (if filter has not been applied) or to the number of /// 1s in the filter (if filter has been applied). void MergeTreeRangeReader::ReadResult::checkInternalConsistency() const @@ -803,8 +803,7 @@ MergeTreeRangeReader::MergeTreeRangeReader( IMergeTreeReader * merge_tree_reader_, MergeTreeRangeReader * prev_reader_, const PrewhereExprStep * prewhere_info_, - bool last_reader_in_chain_, - const Names & non_const_virtual_column_names_) + bool last_reader_in_chain_) : merge_tree_reader(merge_tree_reader_) , index_granularity(&(merge_tree_reader->data_part_info_for_read->getIndexGranularity())) , prev_reader(prev_reader_) @@ -821,21 +820,6 @@ MergeTreeRangeReader::MergeTreeRangeReader( result_sample_block.insert({name_and_type.type->createColumn(), name_and_type.type, name_and_type.name}); } - for (const auto & column_name : non_const_virtual_column_names_) - { - if (result_sample_block.has(column_name)) - continue; - - non_const_virtual_column_names.push_back(column_name); - - if (column_name == "_part_offset" && !prev_reader) - { - /// _part_offset column is filled by the first reader. - read_sample_block.insert(ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), column_name)); - result_sample_block.insert(ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), column_name)); - } - } - if (prewhere_info) { const auto & step = *prewhere_info; @@ -1001,6 +985,8 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar if (num_read_rows == 0) num_read_rows = read_result.num_rows; + merge_tree_reader->fillVirtualColumns(columns, num_read_rows); + /// fillMissingColumns() must be called after reading but befoe any filterings because /// some columns (e.g. arrays) might be only partially filled and thus not be valid and /// fillMissingColumns() fixes this. @@ -1050,23 +1036,23 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar return read_result; { - /// Physical columns go first and then some virtual columns follow - size_t physical_columns_count = merge_tree_reader->getColumns().size(); - Columns physical_columns(read_result.columns.begin(), read_result.columns.begin() + physical_columns_count); + size_t columns_count = merge_tree_reader->getColumns().size(); + Columns columns(read_result.columns.begin(), read_result.columns.begin() + columns_count); + merge_tree_reader->fillVirtualColumns(columns, read_result.num_rows); bool should_evaluate_missing_defaults; - merge_tree_reader->fillMissingColumns(physical_columns, should_evaluate_missing_defaults, read_result.num_rows); + merge_tree_reader->fillMissingColumns(columns, should_evaluate_missing_defaults, read_result.num_rows); /// If some columns absent in part, then evaluate default values if (should_evaluate_missing_defaults) - merge_tree_reader->evaluateMissingDefaults({}, physical_columns); + merge_tree_reader->evaluateMissingDefaults({}, columns); /// If result not empty, then apply on-fly alter conversions if any required if (!prewhere_info || prewhere_info->perform_alter_conversions) - merge_tree_reader->performRequiredConversions(physical_columns); + merge_tree_reader->performRequiredConversions(columns); - for (size_t i = 0; i < physical_columns.size(); ++i) - read_result.columns[i] = std::move(physical_columns[i]); + for (size_t i = 0; i < columns.size(); ++i) + read_result.columns[i] = std::move(columns[i]); } size_t total_bytes = 0; @@ -1158,12 +1144,17 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t result.adjustLastGranule(); if (read_sample_block.has("_part_offset")) - fillPartOffsetColumn(result, leading_begin_part_offset, leading_end_part_offset); + { + size_t pos = read_sample_block.getPositionByName("_part_offset"); + chassert(pos < result.columns.size()); + chassert(result.columns[pos] == nullptr); + result.columns[pos] = createPartOffsetColumn(result, leading_begin_part_offset, leading_end_part_offset); + } return result; } -void MergeTreeRangeReader::fillPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset) +ColumnPtr MergeTreeRangeReader::createPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset) { size_t num_rows = result.numReadRows(); @@ -1189,7 +1180,7 @@ void MergeTreeRangeReader::fillPartOffsetColumn(ReadResult & result, UInt64 lead *pos++ = start_part_offset++; } - result.columns.emplace_back(std::move(column)); + return column; } Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, size_t & num_rows) @@ -1203,7 +1194,7 @@ Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, si if (result.rows_per_granule.empty()) { - /// If zero rows were read on prev step, than there is no more rows to read. + /// If zero rows were read on prev step, there is no more rows to read. /// Last granule may have less rows than index_granularity, so finish reading manually. stream.finish(); return columns; diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.h b/src/Storages/MergeTree/MergeTreeRangeReader.h index 79ed18f4d1f..688a6b0922b 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.h +++ b/src/Storages/MergeTree/MergeTreeRangeReader.h @@ -101,8 +101,7 @@ public: IMergeTreeReader * merge_tree_reader_, MergeTreeRangeReader * prev_reader_, const PrewhereExprStep * prewhere_info_, - bool last_reader_in_chain_, - const Names & non_const_virtual_column_names); + bool last_reader_in_chain_); MergeTreeRangeReader() = default; @@ -309,7 +308,7 @@ private: ReadResult startReadingChain(size_t max_rows, MarkRanges & ranges); Columns continueReadingChain(const ReadResult & result, size_t & num_rows); void executePrewhereActionsAndFilterColumns(ReadResult & result) const; - void fillPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset); + ColumnPtr createPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset); IMergeTreeReader * merge_tree_reader = nullptr; const MergeTreeIndexGranularity * index_granularity = nullptr; @@ -323,7 +322,6 @@ private: bool last_reader_in_chain = false; bool is_initialized = false; - Names non_const_virtual_column_names; LoggerPtr log = getLogger("MergeTreeRangeReader"); }; diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index 68d57bf7b06..e525f7f5f65 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -35,22 +35,22 @@ size_t getApproxSizeOfPart(const IMergeTreeDataPart & part, const Names & column MergeTreeReadPool::MergeTreeReadPool( RangesInDataParts && parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_) : MergeTreeReadPoolBase( std::move(parts_), + std::move(shared_virtual_fields_), storage_snapshot_, prewhere_info_, actions_settings_, reader_settings_, column_names_, - virtual_column_names_, settings_, context_) , min_marks_for_concurrent_read(pool_settings.min_marks_for_concurrent_read) diff --git a/src/Storages/MergeTree/MergeTreeReadPool.h b/src/Storages/MergeTree/MergeTreeReadPool.h index e45ccad912f..cb0e8a9657f 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.h +++ b/src/Storages/MergeTree/MergeTreeReadPool.h @@ -26,12 +26,12 @@ public: MergeTreeReadPool( RangesInDataParts && parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_); diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp index 94942c4aa0b..0cbb0a86b2f 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp @@ -8,21 +8,21 @@ namespace DB MergeTreeReadPoolBase::MergeTreeReadPoolBase( RangesInDataParts && parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & pool_settings_, const ContextPtr & context_) : parts_ranges(std::move(parts_)) + , shared_virtual_fields(std::move(shared_virtual_fields_)) , storage_snapshot(storage_snapshot_) , prewhere_info(prewhere_info_) , actions_settings(actions_settings_) , reader_settings(reader_settings_) , column_names(column_names_) - , virtual_column_names(virtual_column_names_) , pool_settings(pool_settings_) , owned_mark_cache(context_->getGlobalContext()->getMarkCache()) , owned_uncompressed_cache(pool_settings_.use_uncompressed_cache ? context_->getGlobalContext()->getUncompressedCache() : nullptr) @@ -45,7 +45,7 @@ void MergeTreeReadPoolBase::fillPerPartInfos() assertSortedAndNonIntersecting(part_with_ranges.ranges); #endif - MergeTreeReadTask::Info read_task_info; + MergeTreeReadTaskInfo read_task_info; read_task_info.data_part = part_with_ranges.data_part; read_task_info.part_index_in_query = part_with_ranges.part_index_in_query; @@ -54,9 +54,16 @@ void MergeTreeReadPoolBase::fillPerPartInfos() LoadedMergeTreeDataPartInfoForReader part_info(part_with_ranges.data_part, part_with_ranges.alter_conversions); read_task_info.task_columns = getReadTaskColumns( - part_info, storage_snapshot, column_names, virtual_column_names, - prewhere_info, actions_settings, - reader_settings, /*with_subcolumns=*/ true); + part_info, + storage_snapshot, + column_names, + prewhere_info, + actions_settings, + reader_settings, + /*with_subcolumns=*/true); + + read_task_info.const_virtual_fields = shared_virtual_fields; + read_task_info.const_virtual_fields.emplace("_part_index", read_task_info.part_index_in_query); if (pool_settings.preferred_block_size_bytes > 0) { @@ -76,7 +83,7 @@ void MergeTreeReadPoolBase::fillPerPartInfos() } is_part_on_remote_disk.push_back(part_with_ranges.data_part->isStoredOnRemoteDisk()); - per_part_infos.push_back(std::make_shared(std::move(read_task_info))); + per_part_infos.push_back(std::make_shared(std::move(read_task_info))); } } @@ -98,7 +105,7 @@ std::vector MergeTreeReadPoolBase::getPerPartSumMarks() const } MergeTreeReadTaskPtr MergeTreeReadPoolBase::createTask( - MergeTreeReadTask::InfoPtr read_info, + MergeTreeReadTaskInfoPtr read_info, MarkRanges ranges, MergeTreeReadTask * previous_task) const { diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.h b/src/Storages/MergeTree/MergeTreeReadPoolBase.h index 0081063cd37..1b5bfec5898 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolBase.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.h @@ -23,12 +23,12 @@ public: MergeTreeReadPoolBase( RangesInDataParts && parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_); @@ -37,12 +37,12 @@ public: protected: /// Initialized in constructor const RangesInDataParts parts_ranges; + const VirtualFields shared_virtual_fields; const StorageSnapshotPtr storage_snapshot; const PrewhereInfoPtr prewhere_info; const ExpressionActionsSettings actions_settings; const MergeTreeReaderSettings reader_settings; const Names column_names; - const Names virtual_column_names; const PoolSettings pool_settings; const MarkCachePtr owned_mark_cache; const UncompressedCachePtr owned_uncompressed_cache; @@ -52,13 +52,13 @@ protected: std::vector getPerPartSumMarks() const; MergeTreeReadTaskPtr createTask( - MergeTreeReadTask::InfoPtr read_info, + MergeTreeReadTaskInfoPtr read_info, MarkRanges ranges, MergeTreeReadTask * previous_task) const; MergeTreeReadTask::Extras getExtras() const; - std::vector per_part_infos; + std::vector per_part_infos; std::vector is_part_on_remote_disk; ReadBufferFromFileBase::ProfileCallback profile_callback; diff --git a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp index 1b621ad5055..4c0391ffa57 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp @@ -12,22 +12,22 @@ MergeTreeReadPoolInOrder::MergeTreeReadPoolInOrder( bool has_limit_below_one_block_, MergeTreeReadType read_type_, RangesInDataParts parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_) : MergeTreeReadPoolBase( std::move(parts_), + std::move(shared_virtual_fields_), storage_snapshot_, prewhere_info_, actions_settings_, reader_settings_, column_names_, - virtual_column_names_, settings_, context_) , has_limit_below_one_block(has_limit_below_one_block_) diff --git a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h index d9cc1ba4984..9fedf396a6b 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h @@ -11,12 +11,12 @@ public: bool has_limit_below_one_block_, MergeTreeReadType read_type_, RangesInDataParts parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_); diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp index 47436ed1407..38035d97f56 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp @@ -13,22 +13,22 @@ namespace ErrorCodes MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas( ParallelReadingExtension extension_, RangesInDataParts && parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_) : MergeTreeReadPoolBase( std::move(parts_), + std::move(shared_virtual_fields_), storage_snapshot_, prewhere_info_, actions_settings_, reader_settings_, column_names_, - virtual_column_names_, settings_, context_) , extension(std::move(extension_)) diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h index 6a548dffe37..ca159edb91c 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h @@ -11,12 +11,12 @@ public: MergeTreeReadPoolParallelReplicas( ParallelReadingExtension extension_, RangesInDataParts && parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_); diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp index a822a517933..01c0a9f91be 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp @@ -12,22 +12,22 @@ MergeTreeReadPoolParallelReplicasInOrder::MergeTreeReadPoolParallelReplicasInOrd ParallelReadingExtension extension_, CoordinationMode mode_, RangesInDataParts parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_) : MergeTreeReadPoolBase( std::move(parts_), + std::move(shared_virtual_fields_), storage_snapshot_, prewhere_info_, actions_settings_, reader_settings_, column_names_, - virtual_column_names_, settings_, context_) , extension(std::move(extension_)) diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h index 3e5f8f5dfba..4fe3f7a699c 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h @@ -12,12 +12,12 @@ public: ParallelReadingExtension extension_, CoordinationMode mode_, RangesInDataParts parts_, + VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, - const Names & virtual_column_names_, const PoolSettings & settings_, const ContextPtr & context_); diff --git a/src/Storages/MergeTree/MergeTreeReadTask.cpp b/src/Storages/MergeTree/MergeTreeReadTask.cpp index 41c7531b6a6..08b30e445e2 100644 --- a/src/Storages/MergeTree/MergeTreeReadTask.cpp +++ b/src/Storages/MergeTree/MergeTreeReadTask.cpp @@ -1,5 +1,6 @@ #include #include +#include #include namespace DB @@ -10,7 +11,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -String MergeTreeReadTask::Columns::dump() const +String MergeTreeReadTaskColumns::dump() const { WriteBufferFromOwnString s; for (size_t i = 0; i < pre_columns.size(); ++i) @@ -22,7 +23,7 @@ String MergeTreeReadTask::Columns::dump() const } MergeTreeReadTask::MergeTreeReadTask( - InfoPtr info_, + MergeTreeReadTaskInfoPtr info_, Readers readers_, MarkRanges mark_ranges_, MergeTreeBlockSizePredictorPtr size_predictor_) @@ -34,23 +35,30 @@ MergeTreeReadTask::MergeTreeReadTask( } MergeTreeReadTask::Readers MergeTreeReadTask::createReaders( - const InfoPtr & read_info, const Extras & extras, const MarkRanges & ranges) + const MergeTreeReadTaskInfoPtr & read_info, const Extras & extras, const MarkRanges & ranges) { Readers new_readers; auto create_reader = [&](const NamesAndTypesList & columns_to_read) { return read_info->data_part->getReader( - columns_to_read, extras.storage_snapshot, ranges, - extras.uncompressed_cache, extras.mark_cache, - read_info->alter_conversions, extras.reader_settings, extras.value_size_map, extras.profile_callback); + columns_to_read, + extras.storage_snapshot, + ranges, + read_info->const_virtual_fields, + extras.uncompressed_cache, + extras.mark_cache, + read_info->alter_conversions, + extras.reader_settings, + extras.value_size_map, + extras.profile_callback); }; new_readers.main = create_reader(read_info->task_columns.columns); /// Add lightweight delete filtering step if (extras.reader_settings.apply_deleted_mask && read_info->data_part->hasLightweightDelete()) - new_readers.prewhere.push_back(create_reader({LightweightDeleteDescription::FILTER_COLUMN})); + new_readers.prewhere.push_back(create_reader({{RowExistsColumn::name, RowExistsColumn::type}})); for (const auto & pre_columns_per_step : read_info->task_columns.pre_columns) new_readers.prewhere.push_back(create_reader(pre_columns_per_step)); @@ -58,10 +66,8 @@ MergeTreeReadTask::Readers MergeTreeReadTask::createReaders( return new_readers; } -MergeTreeReadTask::RangeReaders MergeTreeReadTask::createRangeReaders( - const Readers & task_readers, - const PrewhereExprInfo & prewhere_actions, - const Names & non_const_virtual_column_names) +MergeTreeReadTask::RangeReaders +MergeTreeReadTask::createRangeReaders(const Readers & task_readers, const PrewhereExprInfo & prewhere_actions) { MergeTreeReadTask::RangeReaders new_range_readers; if (prewhere_actions.steps.size() != task_readers.prewhere.size()) @@ -77,10 +83,7 @@ MergeTreeReadTask::RangeReaders MergeTreeReadTask::createRangeReaders( { last_reader = task_readers.main->getColumns().empty() && (i + 1 == prewhere_actions.steps.size()); - MergeTreeRangeReader current_reader( - task_readers.prewhere[i].get(), - prev_reader, prewhere_actions.steps[i].get(), - last_reader, non_const_virtual_column_names); + MergeTreeRangeReader current_reader(task_readers.prewhere[i].get(), prev_reader, prewhere_actions.steps[i].get(), last_reader); new_range_readers.prewhere.push_back(std::move(current_reader)); prev_reader = &new_range_readers.prewhere.back(); @@ -88,11 +91,11 @@ MergeTreeReadTask::RangeReaders MergeTreeReadTask::createRangeReaders( if (!last_reader) { - new_range_readers.main = MergeTreeRangeReader(task_readers.main.get(), prev_reader, nullptr, true, non_const_virtual_column_names); + new_range_readers.main = MergeTreeRangeReader(task_readers.main.get(), prev_reader, nullptr, true); } else { - /// If all columns are read by prewhere range readers than move last prewhere range reader to main. + /// If all columns are read by prewhere range readers, move last prewhere range reader to main. new_range_readers.main = std::move(new_range_readers.prewhere.back()); new_range_readers.prewhere.pop_back(); } @@ -100,14 +103,12 @@ MergeTreeReadTask::RangeReaders MergeTreeReadTask::createRangeReaders( return new_range_readers; } -void MergeTreeReadTask::initializeRangeReaders( - const PrewhereExprInfo & prewhere_actions, - const Names & non_const_virtual_column_names) +void MergeTreeReadTask::initializeRangeReaders(const PrewhereExprInfo & prewhere_actions) { if (range_readers.main.isInitialized()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Range reader is already initialized"); - range_readers = createRangeReaders(readers, prewhere_actions, non_const_virtual_column_names); + range_readers = createRangeReaders(readers, prewhere_actions); } UInt64 MergeTreeReadTask::estimateNumRows(const BlockSizeParams & params) const diff --git a/src/Storages/MergeTree/MergeTreeReadTask.h b/src/Storages/MergeTree/MergeTreeReadTask.h index 3fe79f1087c..c8bb501c0e8 100644 --- a/src/Storages/MergeTree/MergeTreeReadTask.h +++ b/src/Storages/MergeTree/MergeTreeReadTask.h @@ -20,6 +20,8 @@ using MergeTreeBlockSizePredictorPtr = std::shared_ptr; using MergeTreeReaderPtr = std::unique_ptr; +using VirtualFields = std::unordered_map; + enum class MergeTreeReadType { @@ -40,36 +42,38 @@ enum class MergeTreeReadType ParallelReplicas, }; +struct MergeTreeReadTaskColumns +{ + /// Column names to read during WHERE + NamesAndTypesList columns; + /// Column names to read during each PREWHERE step + std::vector pre_columns; + + String dump() const; +}; + +struct MergeTreeReadTaskInfo +{ + /// Data part which should be read while performing this task + DataPartPtr data_part; + /// For `part_index` virtual column + size_t part_index_in_query; + /// Alter converversionss that should be applied on-fly for part. + AlterConversionsPtr alter_conversions; + /// Column names to read during PREWHERE and WHERE + MergeTreeReadTaskColumns task_columns; + /// Shared initialized size predictor. It is copied for each new task. + MergeTreeBlockSizePredictorPtr shared_size_predictor; + /// TODO: comment + VirtualFields const_virtual_fields; +}; + +using MergeTreeReadTaskInfoPtr = std::shared_ptr; + /// A batch of work for MergeTreeSelectProcessor struct MergeTreeReadTask : private boost::noncopyable { public: - struct Columns - { - /// Column names to read during WHERE - NamesAndTypesList columns; - /// Column names to read during each PREWHERE step - std::vector pre_columns; - - String dump() const; - }; - - struct Info - { - /// Data part which should be read while performing this task - DataPartPtr data_part; - /// For virtual `part_index` virtual column - size_t part_index_in_query; - /// Alter converversionss that should be applied on-fly for part. - AlterConversionsPtr alter_conversions; - /// Column names to read during PREWHERE and WHERE - Columns task_columns; - /// Shared initialized size predictor. It is copied for each new task. - MergeTreeBlockSizePredictorPtr shared_size_predictor; - }; - - using InfoPtr = std::shared_ptr; - /// Extra params that required for creation of reader. struct Extras { @@ -115,27 +119,32 @@ public: size_t num_read_bytes = 0; }; - MergeTreeReadTask(InfoPtr info_, Readers readers_, MarkRanges mark_ranges_, MergeTreeBlockSizePredictorPtr size_predictor_); + MergeTreeReadTask( + MergeTreeReadTaskInfoPtr info_, + Readers readers_, + MarkRanges mark_ranges_, - void initializeRangeReaders(const PrewhereExprInfo & prewhere_actions, const Names & non_const_virtual_column_names); + MergeTreeBlockSizePredictorPtr size_predictor_); + + void initializeRangeReaders(const PrewhereExprInfo & prewhere_actions); BlockAndProgress read(const BlockSizeParams & params); bool isFinished() const { return mark_ranges.empty() && range_readers.main.isCurrentRangeFinished(); } - const Info & getInfo() const { return *info; } + const MergeTreeReadTaskInfo & getInfo() const { return *info; } const MergeTreeRangeReader & getMainRangeReader() const { return range_readers.main; } const IMergeTreeReader & getMainReader() const { return *readers.main; } Readers releaseReaders() { return std::move(readers); } - static Readers createReaders(const InfoPtr & read_info, const Extras & extras, const MarkRanges & ranges); - static RangeReaders createRangeReaders(const Readers & readers, const PrewhereExprInfo & prewhere_actions, const Names & non_const_virtual_column_names); + static Readers createReaders(const MergeTreeReadTaskInfoPtr & read_info, const Extras & extras, const MarkRanges & ranges); + static RangeReaders createRangeReaders(const Readers & readers, const PrewhereExprInfo & prewhere_actions); private: UInt64 estimateNumRows(const BlockSizeParams & params) const; /// Shared information required for reading. - InfoPtr info; + MergeTreeReadTaskInfoPtr info; /// Readers for data_part of this task. /// May be reused and released to the next task. diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index 02048009296..63824366722 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -17,6 +17,7 @@ namespace ErrorCodes MergeTreeReaderCompact::MergeTreeReaderCompact( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, NamesAndTypesList columns_, + const VirtualFields & virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, @@ -29,6 +30,7 @@ MergeTreeReaderCompact::MergeTreeReaderCompact( : IMergeTreeReader( data_part_info_for_read_, columns_, + virtual_fields_, storage_snapshot_, uncompressed_cache_, mark_cache_, diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.h b/src/Storages/MergeTree/MergeTreeReaderCompact.h index dace4ec468e..769e6a08be4 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.h +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.h @@ -21,6 +21,7 @@ public: MergeTreeReaderCompact( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, NamesAndTypesList columns_, + const VirtualFields & virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, diff --git a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp index bacd86511f5..91fc8966a7a 100644 --- a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp @@ -19,12 +19,14 @@ MergeTreeReaderInMemory::MergeTreeReaderInMemory( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, DataPartInMemoryPtr data_part_, NamesAndTypesList columns_, + const VirtualFields & virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, MarkRanges mark_ranges_, MergeTreeReaderSettings settings_) : IMergeTreeReader( data_part_info_for_read_, columns_, + virtual_fields_, storage_snapshot_, nullptr, nullptr, diff --git a/src/Storages/MergeTree/MergeTreeReaderInMemory.h b/src/Storages/MergeTree/MergeTreeReaderInMemory.h index e26a98f0916..cc1e2e9e4e2 100644 --- a/src/Storages/MergeTree/MergeTreeReaderInMemory.h +++ b/src/Storages/MergeTree/MergeTreeReaderInMemory.h @@ -18,6 +18,7 @@ public: MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, DataPartInMemoryPtr data_part_, NamesAndTypesList columns_, + const VirtualFields & virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, MarkRanges mark_ranges_, MergeTreeReaderSettings settings_); diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 640432ef755..d34a58a25b0 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -24,6 +24,7 @@ namespace MergeTreeReaderWide::MergeTreeReaderWide( MergeTreeDataPartInfoForReaderPtr data_part_info_, NamesAndTypesList columns_, + const VirtualFields & virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, @@ -35,6 +36,7 @@ MergeTreeReaderWide::MergeTreeReaderWide( : IMergeTreeReader( data_part_info_, columns_, + virtual_fields_, storage_snapshot_, uncompressed_cache_, mark_cache_, diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.h b/src/Storages/MergeTree/MergeTreeReaderWide.h index 2a850cc2814..a9a5526dd65 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.h +++ b/src/Storages/MergeTree/MergeTreeReaderWide.h @@ -17,6 +17,7 @@ public: MergeTreeReaderWide( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, NamesAndTypesList columns_, + const VirtualFields & virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index 64cec946991..fce733d47b7 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include namespace DB @@ -20,41 +20,26 @@ namespace DB namespace ErrorCodes { - extern const int LOGICAL_ERROR; extern const int QUERY_WAS_CANCELLED; } -static void injectNonConstVirtualColumns( - size_t rows, - Block & block, - const Names & virtual_columns, - MergeTreeReadTask * task = nullptr); - -static void injectPartConstVirtualColumns( - size_t rows, - Block & block, - MergeTreeReadTask * task, - const DataTypePtr & partition_value_type, - const Names & virtual_columns); - MergeTreeSelectProcessor::MergeTreeSelectProcessor( MergeTreeReadPoolPtr pool_, MergeTreeSelectAlgorithmPtr algorithm_, - const MergeTreeData & storage_, + const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReadTask::BlockSizeParams & block_size_params_, - const MergeTreeReaderSettings & reader_settings_, - const Names & virt_column_names_) + const MergeTreeReaderSettings & reader_settings_) : pool(std::move(pool_)) , algorithm(std::move(algorithm_)) + , storage_snapshot(storage_snapshot_) , prewhere_info(prewhere_info_) , actions_settings(actions_settings_) , prewhere_actions(getPrewhereActions(prewhere_info, actions_settings, reader_settings_.enable_multiple_prewhere_read_steps)) , reader_settings(reader_settings_) , block_size_params(block_size_params_) - , virt_column_names(virt_column_names_) - , partition_value_type(storage_.getPartitionValueType()) + , result_header(transformHeader(pool->getHeader(), prewhere_info)) { if (reader_settings.apply_deleted_mask) { @@ -62,7 +47,7 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( { .type = PrewhereExprStep::Filter, .actions = nullptr, - .filter_column_name = LightweightDeleteDescription::FILTER_COLUMN.name, + .filter_column_name = RowExistsColumn::name, .remove_filter_column = true, .need_filter = true, .perform_alter_conversions = true, @@ -71,16 +56,6 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( lightweight_delete_filter_step = std::make_shared(std::move(step)); } - header_without_const_virtual_columns = SourceStepWithFilter::applyPrewhereActions(pool->getHeader(), prewhere_info); - size_t non_const_columns_offset = header_without_const_virtual_columns.columns(); - injectNonConstVirtualColumns(0, header_without_const_virtual_columns, virt_column_names); - - for (size_t col_num = non_const_columns_offset; col_num < header_without_const_virtual_columns.columns(); ++col_num) - non_const_virtual_column_names.emplace_back(header_without_const_virtual_columns.getByPosition(col_num).name); - - result_header = header_without_const_virtual_columns; - injectPartConstVirtualColumns(0, result_header, nullptr, partition_value_type, virt_column_names); - if (!prewhere_actions.steps.empty()) LOG_TRACE(log, "PREWHERE condition was split into {} steps: {}", prewhere_actions.steps.size(), prewhere_actions.dumpConditions()); @@ -163,8 +138,6 @@ ChunkAndProgress MergeTreeSelectProcessor::read() if (res.row_count) { - injectVirtualColumns(res.block, res.row_count, task.get(), partition_value_type, virt_column_names); - /// Reorder the columns according to result_header Columns ordered_columns; ordered_columns.reserve(result_header.columns()); @@ -198,209 +171,12 @@ void MergeTreeSelectProcessor::initializeRangeReaders() for (const auto & step : prewhere_actions.steps) all_prewhere_actions.steps.push_back(step); - task->initializeRangeReaders(all_prewhere_actions, non_const_virtual_column_names); + task->initializeRangeReaders(all_prewhere_actions); } - -namespace +Block MergeTreeSelectProcessor::transformHeader(Block block, const PrewhereInfoPtr & prewhere_info) { - struct VirtualColumnsInserter - { - explicit VirtualColumnsInserter(Block & block_) : block(block_) {} - - bool columnExists(const String & name) const { return block.has(name); } - - void insertUInt8Column(const ColumnPtr & column, const String & name) - { - block.insert({column, std::make_shared(), name}); - } - - void insertUInt64Column(const ColumnPtr & column, const String & name) - { - block.insert({column, std::make_shared(), name}); - } - - void insertUUIDColumn(const ColumnPtr & column, const String & name) - { - block.insert({column, std::make_shared(), name}); - } - - void insertLowCardinalityColumn(const ColumnPtr & column, const String & name) - { - block.insert({column, std::make_shared(std::make_shared()), name}); - } - - void insertPartitionValueColumn( - size_t rows, const Row & partition_value, const DataTypePtr & partition_value_type, const String & name) - { - ColumnPtr column; - if (rows) - column = partition_value_type->createColumnConst(rows, Tuple(partition_value.begin(), partition_value.end())) - ->convertToFullColumnIfConst(); - else - column = partition_value_type->createColumn(); - - block.insert({column, partition_value_type, name}); - } - - Block & block; - }; -} - -/// Adds virtual columns that are not const for all rows -static void injectNonConstVirtualColumns( - size_t rows, - Block & block, - const Names & virtual_columns, - MergeTreeReadTask * task) -{ - VirtualColumnsInserter inserter(block); - for (const auto & virtual_column_name : virtual_columns) - { - if (virtual_column_name == "_part_offset") - { - if (!rows) - { - inserter.insertUInt64Column(DataTypeUInt64().createColumn(), virtual_column_name); - } - else - { - if (!inserter.columnExists(virtual_column_name)) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Column {} must have been filled part reader", - virtual_column_name); - } - } - - if (virtual_column_name == LightweightDeleteDescription::FILTER_COLUMN.name) - { - /// If _row_exists column isn't present in the part then fill it here with 1s - ColumnPtr column; - if (rows) - column = LightweightDeleteDescription::FILTER_COLUMN.type->createColumnConst(rows, 1)->convertToFullColumnIfConst(); - else - column = LightweightDeleteDescription::FILTER_COLUMN.type->createColumn(); - - inserter.insertUInt8Column(column, virtual_column_name); - } - - if (virtual_column_name == BlockNumberColumn::name) - { - ColumnPtr column; - if (rows) - { - size_t value = 0; - if (task) - { - value = task->getInfo().data_part ? task->getInfo().data_part->info.min_block : 0; - } - column = BlockNumberColumn::type->createColumnConst(rows, value)->convertToFullColumnIfConst(); - } - else - column = BlockNumberColumn::type->createColumn(); - - inserter.insertUInt64Column(column, virtual_column_name); - } - } -} - -/// Adds virtual columns that are const for the whole part -static void injectPartConstVirtualColumns( - size_t rows, - Block & block, - MergeTreeReadTask * task, - const DataTypePtr & partition_value_type, - const Names & virtual_columns) -{ - VirtualColumnsInserter inserter(block); - /// add virtual columns - /// Except _sample_factor, which is added from the outside. - if (!virtual_columns.empty()) - { - if (unlikely(rows && !task)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot insert virtual columns to non-empty chunk without specified task."); - - const IMergeTreeDataPart * part = nullptr; - - if (rows) - { - part = task->getInfo().data_part.get(); - if (part->isProjectionPart()) - part = part->getParentPart(); - } - - for (const auto & virtual_column_name : virtual_columns) - { - if (virtual_column_name == "_part") - { - ColumnPtr column; - if (rows) - column = DataTypeLowCardinality{std::make_shared()} - .createColumnConst(rows, part->name) - ->convertToFullColumnIfConst(); - else - column = DataTypeLowCardinality{std::make_shared()}.createColumn(); - - inserter.insertLowCardinalityColumn(column, virtual_column_name); - } - else if (virtual_column_name == "_part_index") - { - ColumnPtr column; - if (rows) - column = DataTypeUInt64().createColumnConst(rows, task->getInfo().part_index_in_query)->convertToFullColumnIfConst(); - else - column = DataTypeUInt64().createColumn(); - - inserter.insertUInt64Column(column, virtual_column_name); - } - else if (virtual_column_name == "_part_uuid") - { - ColumnPtr column; - if (rows) - column = DataTypeUUID().createColumnConst(rows, part->uuid)->convertToFullColumnIfConst(); - else - column = DataTypeUUID().createColumn(); - - inserter.insertUUIDColumn(column, virtual_column_name); - } - else if (virtual_column_name == "_partition_id") - { - ColumnPtr column; - if (rows) - column = DataTypeLowCardinality{std::make_shared()} - .createColumnConst(rows, part->info.partition_id) - ->convertToFullColumnIfConst(); - else - column = DataTypeLowCardinality{std::make_shared()}.createColumn(); - - inserter.insertLowCardinalityColumn(column, virtual_column_name); - } - else if (virtual_column_name == "_partition_value") - { - if (rows) - inserter.insertPartitionValueColumn(rows, part->partition.value, partition_value_type, virtual_column_name); - else - inserter.insertPartitionValueColumn(rows, {}, partition_value_type, virtual_column_name); - } - } - } -} - -void MergeTreeSelectProcessor::injectVirtualColumns( - Block & block, size_t row_count, MergeTreeReadTask * task, const DataTypePtr & partition_value_type, const Names & virtual_columns) -{ - /// First add non-const columns that are filled by the range reader and then const columns that we will fill ourselves. - /// Note that the order is important: virtual columns filled by the range reader must go first - injectNonConstVirtualColumns(row_count, block, virtual_columns,task); - injectPartConstVirtualColumns(row_count, block, task, partition_value_type, virtual_columns); -} - -Block MergeTreeSelectProcessor::transformHeader( - Block block, const PrewhereInfoPtr & prewhere_info, const DataTypePtr & partition_value_type, const Names & virtual_columns) -{ - injectVirtualColumns(block, 0, nullptr, partition_value_type, virtual_columns); - auto transformed = SourceStepWithFilter::applyPrewhereActions(std::move(block), prewhere_info); - return transformed; + return SourceStepWithFilter::applyPrewhereActions(std::move(block), prewhere_info); } } diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.h b/src/Storages/MergeTree/MergeTreeSelectProcessor.h index b1606f983a1..01bb3851e04 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.h @@ -41,21 +41,15 @@ public: MergeTreeSelectProcessor( MergeTreeReadPoolPtr pool_, MergeTreeSelectAlgorithmPtr algorithm_, - const MergeTreeData & storage_, + const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReadTask::BlockSizeParams & block_size_params_, - const MergeTreeReaderSettings & reader_settings_, - const Names & virt_column_names_); + const MergeTreeReaderSettings & reader_settings_); String getName() const; - static Block transformHeader( - Block block, - const PrewhereInfoPtr & prewhere_info, - const DataTypePtr & partition_value_type, - const Names & virtual_columns); - + static Block transformHeader(Block block, const PrewhereInfoPtr & prewhere_info); Block getHeader() const { return result_header; } ChunkAndProgress read(); @@ -81,14 +75,12 @@ private: size_t num_read_bytes = 0; }; - /// Used for filling header with no rows as well as block with data - static void injectVirtualColumns(Block & block, size_t row_count, MergeTreeReadTask * task, const DataTypePtr & partition_value_type, const Names & virtual_columns); - /// Sets up range readers corresponding to data readers void initializeRangeReaders(); const MergeTreeReadPoolPtr pool; const MergeTreeSelectAlgorithmPtr algorithm; + const StorageSnapshotPtr storage_snapshot; const PrewhereInfoPtr prewhere_info; const ExpressionActionsSettings actions_settings; @@ -96,17 +88,11 @@ private: const MergeTreeReaderSettings reader_settings; const MergeTreeReadTask::BlockSizeParams block_size_params; - const Names virt_column_names; - const DataTypePtr partition_value_type; /// Current task to read from. MergeTreeReadTaskPtr task; /// This step is added when the part has lightweight delete mask PrewhereExprStepPtr lightweight_delete_filter_step; - /// These columns will be filled by the merge tree range reader - Names non_const_virtual_column_names; - /// This header is used for chunks from readFromPart(). - Block header_without_const_virtual_columns; /// A result of getHeader(). A chunk which this header is returned from read(). Block result_header; diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index d0fbc316024..e5545a92aea 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -16,6 +17,7 @@ namespace DB { + namespace ErrorCodes { extern const int MEMORY_LIMIT_EXCEEDED; @@ -55,7 +57,6 @@ protected: Chunk generate() override; private: - const MergeTreeData & storage; StorageSnapshotPtr storage_snapshot; @@ -86,7 +87,6 @@ private: void finish(); }; - MergeTreeSequentialSource::MergeTreeSequentialSource( MergeTreeSequentialSourceType type, const MergeTreeData & storage_, @@ -136,10 +136,8 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( { auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical) .withExtendedObjects() - .withSystemColumns(); - - if (storage.supportsSubcolumns()) - options.withSubcolumns(); + .withVirtuals() + .withSubcolumns(storage.supportsSubcolumns()); columns_for_reader = storage_snapshot->getColumnsByNames(options, columns_to_read); } @@ -181,9 +179,37 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( mark_ranges.emplace(MarkRanges{MarkRange(0, data_part->getMarksCount())}); reader = data_part->getReader( - columns_for_reader, storage_snapshot, - *mark_ranges, /* uncompressed_cache = */ nullptr, - mark_cache.get(), alter_conversions, reader_settings, {}, {}); + columns_for_reader, + storage_snapshot, + *mark_ranges, + /*virtual_fields=*/ {}, + /*uncompressed_cache=*/{}, + mark_cache.get(), + alter_conversions, + reader_settings, + {}, + {}); +} + +static void fillBlockNumberColumns( + Columns & res_columns, + const NamesAndTypesList & columns_list, + UInt64 block_number, + UInt64 num_rows) +{ + chassert(res_columns.size() == columns_list.size()); + + auto it = columns_list.begin(); + for (size_t i = 0; i < res_columns.size(); ++i, ++it) + { + if (res_columns[i]) + continue; + + if (it->name == BlockNumberColumn::name) + { + res_columns[i] = BlockNumberColumn::type->createColumnConst(num_rows, block_number)->convertToFullColumnIfConst(); + } + } } Chunk MergeTreeSequentialSource::generate() @@ -204,16 +230,17 @@ try if (rows_read) { + fillBlockNumberColumns(columns, sample, data_part->info.min_block, rows_read); + reader->fillVirtualColumns(columns, rows_read); + current_row += rows_read; current_mark += (rows_to_read == rows_read); bool should_evaluate_missing_defaults = false; - reader->fillMissingColumns(columns, should_evaluate_missing_defaults, rows_read, data_part->info.min_block); + reader->fillMissingColumns(columns, should_evaluate_missing_defaults, rows_read); if (should_evaluate_missing_defaults) - { reader->evaluateMissingDefaults({}, columns); - } reader->performRequiredConversions(columns); @@ -278,14 +305,13 @@ Pipe createMergeTreeSequentialSource( bool quiet, std::shared_ptr> filtered_rows_count) { - const auto & filter_column = LightweightDeleteDescription::FILTER_COLUMN; /// The part might have some rows masked by lightweight deletes const bool need_to_filter_deleted_rows = apply_deleted_mask && data_part->hasLightweightDelete(); - const bool has_filter_column = std::ranges::find(columns_to_read, filter_column.name) != columns_to_read.end(); + const bool has_filter_column = std::ranges::find(columns_to_read, RowExistsColumn::name) != columns_to_read.end(); if (need_to_filter_deleted_rows && !has_filter_column) - columns_to_read.emplace_back(filter_column.name); + columns_to_read.emplace_back(RowExistsColumn::name); auto column_part_source = std::make_shared(type, storage, storage_snapshot, data_part, columns_to_read, std::move(mark_ranges), @@ -299,7 +325,7 @@ Pipe createMergeTreeSequentialSource( pipe.addSimpleTransform([filtered_rows_count, has_filter_column](const Block & header) { return std::make_shared( - header, nullptr, filter_column.name, !has_filter_column, false, filtered_rows_count); + header, nullptr, RowExistsColumn::name, !has_filter_column, false, filtered_rows_count); }); } diff --git a/src/Storages/MergeTree/MergeTreeVirtualColumns.cpp b/src/Storages/MergeTree/MergeTreeVirtualColumns.cpp new file mode 100644 index 00000000000..b87dccc2b18 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeVirtualColumns.cpp @@ -0,0 +1,52 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NO_SUCH_COLUMN_IN_TABLE; +} + +static ASTPtr getCompressionCodecDeltaLZ4() +{ + return makeASTFunction("CODEC", + std::make_shared("Delta"), + std::make_shared("LZ4")); +} + +const String RowExistsColumn::name = "_row_exists"; +const DataTypePtr RowExistsColumn::type = std::make_shared(); + +const String BlockNumberColumn::name = "_block_number"; +const DataTypePtr BlockNumberColumn::type = std::make_shared(); +const ASTPtr BlockNumberColumn::codec = getCompressionCodecDeltaLZ4(); + +Field getFieldForConstVirtualColumn(const String & column_name, const IMergeTreeDataPart & part) +{ + if (column_name == RowExistsColumn::name) + return 1ULL; + + if (column_name == BlockNumberColumn::name) + return part.info.min_block; + + if (column_name == "_part") + return part.name; + + if (column_name == "_part_uuid") + return part.uuid; + + if (column_name == "_partition_id") + return part.info.partition_id; + + if (column_name == "_partition_value") + return Tuple(part.partition.value.begin(), part.partition.value.end()); + + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "Unexpected const virtual column: {}", column_name); +} + +} diff --git a/src/Storages/MergeTree/MergeTreeVirtualColumns.h b/src/Storages/MergeTree/MergeTreeVirtualColumns.h new file mode 100644 index 00000000000..24721bf1ad1 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeVirtualColumns.h @@ -0,0 +1,26 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +class IMergeTreeDataPart; + +struct RowExistsColumn +{ + static const String name; + static const DataTypePtr type; +}; + +struct BlockNumberColumn +{ + static const String name; + static const DataTypePtr type; + static const ASTPtr codec; +}; + +Field getFieldForConstVirtualColumn(const String & column_name, const IMergeTreeDataPart & part); + +} diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 1c33f018a5d..a5b8a2a2a6d 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include @@ -168,7 +168,7 @@ static void splitAndModifyMutationCommands( { if (!mutated_columns.contains(column.name)) { - if (!metadata_snapshot->getColumns().has(column.name) && !part->storage.getVirtuals().contains(column.name)) + if (!metadata_snapshot->getColumns().has(column.name) && !part->storage.getVirtualsPtr()->has(column.name)) { /// We cannot add the column because there's no such column in table. /// It's okay if the column was dropped. It may also absent in dropped_columns @@ -283,7 +283,6 @@ getColumnsForNewDataPart( ColumnsDescription part_columns(source_part->getColumns()); NamesAndTypesList system_columns; - const auto & deleted_mask_column = LightweightDeleteDescription::FILTER_COLUMN; bool supports_lightweight_deletes = source_part->supportLightweightDeleteMutate(); bool deleted_mask_updated = false; @@ -299,9 +298,9 @@ getColumnsForNewDataPart( { for (const auto & [column_name, _] : command.column_to_update_expression) { - if (column_name == deleted_mask_column.name + if (column_name == RowExistsColumn::name && supports_lightweight_deletes - && !storage_columns_set.contains(deleted_mask_column.name)) + && !storage_columns_set.contains(RowExistsColumn::name)) deleted_mask_updated = true; } } @@ -323,12 +322,12 @@ getColumnsForNewDataPart( } } - if (!storage_columns_set.contains(deleted_mask_column.name)) + if (!storage_columns_set.contains(RowExistsColumn::name)) { - if (deleted_mask_updated || (part_columns.has(deleted_mask_column.name) && !has_delete_command)) + if (deleted_mask_updated || (part_columns.has(RowExistsColumn::name) && !has_delete_command)) { - storage_columns.push_back(deleted_mask_column); - storage_columns_set.insert(deleted_mask_column.name); + storage_columns.emplace_back(RowExistsColumn::name, RowExistsColumn::type); + storage_columns_set.insert(RowExistsColumn::name); } } diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index bbb38346f38..ca8ed9abdb5 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -34,6 +34,7 @@ public: , partition_id(part_->info.partition_id) { setInMemoryMetadata(storage.getInMemoryMetadata()); + setVirtuals(*storage.getVirtualsPtr()); } /// Used in queries with projection. @@ -90,11 +91,6 @@ public: bool supportsSubcolumns() const override { return true; } - NamesAndTypesList getVirtuals() const override - { - return storage.getVirtuals(); - } - String getPartitionId() const { return partition_id; diff --git a/src/Storages/NATS/NATSSource.cpp b/src/Storages/NATS/NATSSource.cpp index 3fc01eacb22..54f479faacc 100644 --- a/src/Storages/NATS/NATSSource.cpp +++ b/src/Storages/NATS/NATSSource.cpp @@ -9,10 +9,10 @@ namespace DB { -static std::pair getHeaders(StorageNATS & storage, const StorageSnapshotPtr & storage_snapshot) +static std::pair getHeaders(const StorageSnapshotPtr & storage_snapshot) { auto non_virtual_header = storage_snapshot->metadata->getSampleBlockNonMaterialized(); - auto virtual_header = storage_snapshot->getSampleBlockForColumns(storage.getVirtuals().getNames()); + auto virtual_header = storage_snapshot->virtual_columns->getSampleBlock(); return {non_virtual_header, virtual_header}; } @@ -33,7 +33,7 @@ NATSSource::NATSSource( const Names & columns, size_t max_block_size_, StreamingHandleErrorMode handle_error_mode_) - : NATSSource(storage_, storage_snapshot_, getHeaders(storage_, storage_snapshot_), context_, columns, max_block_size_, handle_error_mode_) + : NATSSource(storage_, storage_snapshot_, getHeaders(storage_snapshot_), context_, columns, max_block_size_, handle_error_mode_) { } diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp index c7a5d0b8d0a..0b88a9e8929 100644 --- a/src/Storages/NATS/StorageNATS.cpp +++ b/src/Storages/NATS/StorageNATS.cpp @@ -88,6 +88,7 @@ StorageNATS::StorageNATS( StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); setInMemoryMetadata(storage_metadata); + setVirtuals(createVirtuals(nats_settings->nats_handle_error_mode)); nats_context = addSettings(getContext()); nats_context->makeQueryContext(); @@ -131,6 +132,19 @@ StorageNATS::StorageNATS( connection_task->deactivate(); } +VirtualColumnsDescription StorageNATS::createVirtuals(StreamingHandleErrorMode handle_error_mode) +{ + VirtualColumnsDescription desc; + desc.addEphemeral("_subject", std::make_shared(), ""); + + if (handle_error_mode == StreamingHandleErrorMode::STREAM) + { + desc.addEphemeral("_raw_message", std::make_shared(std::make_shared()), ""); + desc.addEphemeral("_error", std::make_shared(std::make_shared()), ""); + } + + return desc; +} Names StorageNATS::parseList(const String & list, char delim) { @@ -746,20 +760,4 @@ void registerStorageNATS(StorageFactory & factory) factory.registerStorage("NATS", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); } - -NamesAndTypesList StorageNATS::getVirtuals() const -{ - auto virtuals = NamesAndTypesList{ - {"_subject", std::make_shared()} - }; - - if (nats_settings->nats_handle_error_mode == StreamingHandleErrorMode::STREAM) - { - virtuals.push_back({"_raw_message", std::make_shared(std::make_shared())}); - virtuals.push_back({"_error", std::make_shared(std::make_shared())}); - } - - return virtuals; -} - } diff --git a/src/Storages/NATS/StorageNATS.h b/src/Storages/NATS/StorageNATS.h index 94f955ccdae..41d77acfde6 100644 --- a/src/Storages/NATS/StorageNATS.h +++ b/src/Storages/NATS/StorageNATS.h @@ -61,7 +61,6 @@ public: NATSConsumerPtr popConsumer(std::chrono::milliseconds timeout); const String & getFormatName() const { return format_name; } - NamesAndTypesList getVirtuals() const override; void incrementReader(); void decrementReader(); @@ -137,6 +136,7 @@ private: static Names parseList(const String & list, char delim); static String getTableBasedName(String name, const StorageID & table_id); + static VirtualColumnsDescription createVirtuals(StreamingHandleErrorMode handle_error_mode); ContextMutablePtr addSettings(ContextPtr context) const; size_t getMaxBlockSize() const; diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index 61d83750c31..64d329f74b2 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -72,6 +72,7 @@ StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL( throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage MaterializedPostgreSQL is allowed only for Atomic database"); setInMemoryMetadata(storage_metadata); + setVirtuals(createVirtuals()); replication_settings->materialized_postgresql_tables_list = remote_table_name_; @@ -127,8 +128,16 @@ StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL( , nested_table_id(nested_storage_->getStorageID()) { setInMemoryMetadata(nested_storage_->getInMemoryMetadata()); + setVirtuals(*nested_storage_->getVirtualsPtr()); } +VirtualColumnsDescription StorageMaterializedPostgreSQL::createVirtuals() +{ + VirtualColumnsDescription desc; + desc.addEphemeral("_sign", std::make_shared(), ""); + desc.addEphemeral("_version", std::make_shared(), ""); + return desc; +} /// A temporary clone table might be created for current table in order to update its schema and reload /// all data in the background while current table will still handle read requests. @@ -254,15 +263,6 @@ void StorageMaterializedPostgreSQL::dropInnerTableIfAny(bool sync, ContextPtr lo } -NamesAndTypesList StorageMaterializedPostgreSQL::getVirtuals() const -{ - return NamesAndTypesList{ - {"_sign", std::make_shared()}, - {"_version", std::make_shared()} - }; -} - - bool StorageMaterializedPostgreSQL::needRewriteQueryWithFinal(const Names & column_names) const { return needRewriteQueryWithFinalForStorage(column_names, getNested()); diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h index 2d5c1efae7c..af2f13bb880 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h @@ -89,8 +89,6 @@ public: /// Used only for single MaterializedPostgreSQL storage. void dropInnerTableIfAny(bool sync, ContextPtr local_context) override; - NamesAndTypesList getVirtuals() const override; - bool needRewriteQueryWithFinal(const Names & column_names) const override; void read( @@ -138,6 +136,8 @@ private: static std::shared_ptr getMaterializedColumnsDeclaration( String name, String type, UInt64 default_value); + static VirtualColumnsDescription createVirtuals(); + ASTPtr getColumnDeclaration(const DataTypePtr & data_type) const; String getNestedTableName() const; diff --git a/src/Storages/RabbitMQ/RabbitMQSource.cpp b/src/Storages/RabbitMQ/RabbitMQSource.cpp index 72196e7dd3c..4dc257074f3 100644 --- a/src/Storages/RabbitMQ/RabbitMQSource.cpp +++ b/src/Storages/RabbitMQ/RabbitMQSource.cpp @@ -11,10 +11,10 @@ namespace DB { -static std::pair getHeaders(StorageRabbitMQ & storage_, const StorageSnapshotPtr & storage_snapshot) +static std::pair getHeaders(const StorageSnapshotPtr & storage_snapshot) { auto non_virtual_header = storage_snapshot->metadata->getSampleBlockNonMaterialized(); - auto virtual_header = storage_snapshot->getSampleBlockForColumns(storage_.getVirtuals().getNames()); + auto virtual_header = storage_snapshot->virtual_columns->getSampleBlock(); return {non_virtual_header, virtual_header}; } @@ -40,7 +40,7 @@ RabbitMQSource::RabbitMQSource( : RabbitMQSource( storage_, storage_snapshot_, - getHeaders(storage_, storage_snapshot_), + getHeaders(storage_snapshot_), context_, columns, max_block_size_, diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index e2ef23193c5..980fccd307e 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -136,6 +136,7 @@ StorageRabbitMQ::StorageRabbitMQ( StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); setInMemoryMetadata(storage_metadata); + setVirtuals(createVirtuals(rabbitmq_settings->rabbitmq_handle_error_mode)); rabbitmq_context = addSettings(getContext()); rabbitmq_context->makeQueryContext(); @@ -191,6 +192,26 @@ StorageRabbitMQ::StorageRabbitMQ( init_task->deactivate(); } +VirtualColumnsDescription StorageRabbitMQ::createVirtuals(StreamingHandleErrorMode handle_error_mode) +{ + VirtualColumnsDescription desc; + + desc.addEphemeral("_exchange_name", std::make_shared(), ""); + desc.addEphemeral("_channel_id", std::make_shared(), ""); + desc.addEphemeral("_delivery_tag", std::make_shared(), ""); + desc.addEphemeral("_redelivered", std::make_shared(), ""); + desc.addEphemeral("_message_id", std::make_shared(), ""); + desc.addEphemeral("_timestamp", std::make_shared(), ""); + + + if (handle_error_mode == StreamingHandleErrorMode::STREAM) + { + desc.addEphemeral("_raw_message", std::make_shared(std::make_shared()), ""); + desc.addEphemeral("_error", std::make_shared(std::make_shared()), ""); + } + + return desc; +} Names StorageRabbitMQ::parseSettings(String settings_list) { @@ -1213,25 +1234,4 @@ void registerStorageRabbitMQ(StorageFactory & factory) factory.registerStorage("RabbitMQ", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); } - -NamesAndTypesList StorageRabbitMQ::getVirtuals() const -{ - auto virtuals = NamesAndTypesList{ - {"_exchange_name", std::make_shared()}, - {"_channel_id", std::make_shared()}, - {"_delivery_tag", std::make_shared()}, - {"_redelivered", std::make_shared()}, - {"_message_id", std::make_shared()}, - {"_timestamp", std::make_shared()} - }; - - if (rabbitmq_settings->rabbitmq_handle_error_mode == StreamingHandleErrorMode::STREAM) - { - virtuals.push_back({"_raw_message", std::make_shared(std::make_shared())}); - virtuals.push_back({"_error", std::make_shared(std::make_shared())}); - } - - return virtuals; -} - } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index b3a0b53cde5..e14741d9636 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -68,7 +68,6 @@ public: RabbitMQConsumerPtr popConsumer(std::chrono::milliseconds timeout); const String & getFormatName() const { return format_name; } - NamesAndTypesList getVirtuals() const override; String getExchange() const { return exchange_name; } void unbindExchange(); @@ -191,6 +190,8 @@ private: bool tryStreamToViews(); bool hasDependencies(const StorageID & table_id); + static VirtualColumnsDescription createVirtuals(StreamingHandleErrorMode handle_error_mode); + static String getRandomName() { std::uniform_int_distribution distribution('a', 'z'); diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index 88504975490..6e7ac2b47b8 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -155,8 +155,7 @@ StorageS3Queue::StorageS3Queue( storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); - - virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); LOG_INFO(log, "Using zookeeper path: {}", zk_path.string()); task = getContext()->getSchedulePool().createTask("S3QueueStreamingTask", [this] { threadFunc(); }); @@ -315,7 +314,7 @@ void StorageS3Queue::read( } auto this_ptr = std::static_pointer_cast(shared_from_this()); - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals()); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); auto reading = std::make_unique( column_names, @@ -493,7 +492,7 @@ bool StorageS3Queue::streamToViews() auto block_io = interpreter.execute(); auto file_iterator = createFileIterator(s3queue_context, nullptr); - auto read_from_format_info = prepareReadingFromFormat(block_io.pipeline.getHeader().getNames(), storage_snapshot, supportsSubsetOfColumns(s3queue_context), getVirtuals()); + auto read_from_format_info = prepareReadingFromFormat(block_io.pipeline.getHeader().getNames(), storage_snapshot, supportsSubsetOfColumns(s3queue_context)); Pipes pipes; pipes.reserve(s3queue_settings->s3queue_processing_threads_num); @@ -602,8 +601,9 @@ void StorageS3Queue::checkTableStructure(const String & zookeeper_prefix, const std::shared_ptr StorageS3Queue::createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate) { auto glob_iterator = std::make_unique( - *configuration.client, configuration.url, predicate, virtual_columns, local_context, + *configuration.client, configuration.url, predicate, getVirtualsList(), local_context, /* read_keys */nullptr, configuration.request_settings); + return std::make_shared(files_metadata, std::move(glob_iterator), s3queue_settings->s3queue_current_shard_num, shutdown_called); } diff --git a/src/Storages/S3Queue/StorageS3Queue.h b/src/Storages/S3Queue/StorageS3Queue.h index fd3b4bb4914..bdd3ab7b687 100644 --- a/src/Storages/S3Queue/StorageS3Queue.h +++ b/src/Storages/S3Queue/StorageS3Queue.h @@ -51,8 +51,6 @@ public: size_t max_block_size, size_t num_streams) override; - NamesAndTypesList getVirtuals() const override { return virtual_columns; } - const auto & getFormatName() const { return configuration.format; } const fs::path & getZooKeeperPath() const { return zk_path; } @@ -71,7 +69,6 @@ private: Configuration configuration; const std::optional format_settings; - NamesAndTypesList virtual_columns; BackgroundSchedulePool::TaskHolder task; std::atomic stream_cancelled{false}; diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 1f0fba99f84..bac9aa1cbdf 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -491,12 +491,11 @@ StorageAzureBlob::StorageAzureBlob( storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); StoredObjects objects; for (const auto & key : configuration.blobs_paths) objects.emplace_back(key); - - virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); } void StorageAzureBlob::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) @@ -736,7 +735,7 @@ void StorageAzureBlob::read( auto this_ptr = std::static_pointer_cast(shared_from_this()); - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals()); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) && local_context->getSettingsRef().optimize_count_from_files; @@ -772,13 +771,13 @@ void ReadFromAzureBlob::createIterator(const ActionsDAG::Node * predicate) /// Iterate through disclosed globs and make a source for each file iterator_wrapper = std::make_shared( storage->object_storage.get(), configuration.container, configuration.blob_path, - predicate, storage->virtual_columns, context, nullptr, context->getFileProgressCallback()); + predicate, storage->getVirtualsList(), context, nullptr, context->getFileProgressCallback()); } else { iterator_wrapper = std::make_shared( storage->object_storage.get(), configuration.container, configuration.blobs_paths, - predicate, storage->virtual_columns, context, nullptr, context->getFileProgressCallback()); + predicate, storage->getVirtualsList(), context, nullptr, context->getFileProgressCallback()); } } @@ -886,16 +885,6 @@ SinkToStoragePtr StorageAzureBlob::write(const ASTPtr & query, const StorageMeta } } -NamesAndTypesList StorageAzureBlob::getVirtuals() const -{ - return virtual_columns; -} - -Names StorageAzureBlob::getVirtualColumnNames() -{ - return VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage({}).getNames(); -} - bool StorageAzureBlob::supportsPartitionBy() const { return true; diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h index e1d1c3abd33..63fd489dcaf 100644 --- a/src/Storages/StorageAzureBlob.h +++ b/src/Storages/StorageAzureBlob.h @@ -94,9 +94,6 @@ public: void truncate(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, TableExclusiveLockHolder &) override; - NamesAndTypesList getVirtuals() const override; - static Names getVirtualColumnNames(); - bool supportsPartitionBy() const override; bool supportsSubcolumns() const override { return true; } @@ -136,7 +133,6 @@ private: std::string name; Configuration configuration; std::unique_ptr object_storage; - NamesAndTypesList virtual_columns; const bool distributed_processing; std::optional format_settings; diff --git a/src/Storages/StorageAzureBlobCluster.cpp b/src/Storages/StorageAzureBlobCluster.cpp index 32445556611..a80d121567a 100644 --- a/src/Storages/StorageAzureBlobCluster.cpp +++ b/src/Storages/StorageAzureBlobCluster.cpp @@ -63,8 +63,7 @@ StorageAzureBlobCluster::StorageAzureBlobCluster( storage_metadata.setConstraints(constraints_); setInMemoryMetadata(storage_metadata); - - virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } void StorageAzureBlobCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const DB::StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context) @@ -81,17 +80,12 @@ RemoteQueryExecutor::Extension StorageAzureBlobCluster::getTaskIteratorExtension { auto iterator = std::make_shared( object_storage.get(), configuration.container, configuration.blob_path, - predicate, virtual_columns, context, nullptr); + predicate, getVirtualsList(), context, nullptr); + auto callback = std::make_shared>([iterator]() mutable -> String{ return iterator->next().relative_path; }); return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) }; } -NamesAndTypesList StorageAzureBlobCluster::getVirtuals() const -{ - return virtual_columns; -} - - } #endif diff --git a/src/Storages/StorageAzureBlobCluster.h b/src/Storages/StorageAzureBlobCluster.h index 476f21c6742..545e568a772 100644 --- a/src/Storages/StorageAzureBlobCluster.h +++ b/src/Storages/StorageAzureBlobCluster.h @@ -31,8 +31,6 @@ public: std::string getName() const override { return "AzureBlobStorageCluster"; } - NamesAndTypesList getVirtuals() const override; - RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; bool supportsSubcolumns() const override { return true; } @@ -45,7 +43,6 @@ private: void updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) override; StorageAzureBlob::Configuration configuration; - NamesAndTypesList virtual_columns; std::unique_ptr object_storage; }; diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 7370bd3ab8f..4e3d8d38b0e 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -104,11 +105,8 @@ #include #include -#include - #include #include -#include #include @@ -290,22 +288,17 @@ size_t getClusterQueriedNodes(const Settings & settings, const ClusterPtr & clus StorageDistributed::~StorageDistributed() = default; -NamesAndTypesList StorageDistributed::getVirtuals() const +VirtualColumnsDescription StorageDistributed::createVirtuals() { - /// NOTE This is weird. Most of these virtual columns are part of MergeTree + /// NOTE: This is weird. + /// Most of these virtual columns are part of MergeTree /// tables info. But Distributed is general-purpose engine. - return NamesAndTypesList{ - NameAndTypePair("_table", std::make_shared(std::make_shared())), - NameAndTypePair("_part", std::make_shared(std::make_shared())), - NameAndTypePair("_part_index", std::make_shared()), - NameAndTypePair("_part_uuid", std::make_shared()), - NameAndTypePair("_partition_id", std::make_shared(std::make_shared())), - NameAndTypePair("_sample_factor", std::make_shared()), - NameAndTypePair("_part_offset", std::make_shared()), - NameAndTypePair("_row_exists", std::make_shared()), - NameAndTypePair(BlockNumberColumn::name, BlockNumberColumn::type), - NameAndTypePair("_shard_num", std::make_shared()), /// deprecated - }; + StorageInMemoryMetadata metadata; + auto desc = MergeTreeData::createVirtuals(metadata); + + desc.addEphemeral("_shard_num", std::make_shared(), "Deprecated. Use function shardNum instead"); + + return desc; } StorageDistributed::StorageDistributed( @@ -354,6 +347,7 @@ StorageDistributed::StorageDistributed( storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); + setVirtuals(createVirtuals()); if (sharding_key_) { diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index c00dd8cea04..cf5b78305a5 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -146,8 +146,6 @@ public: ActionLock getActionLock(StorageActionBlockType type) override; - NamesAndTypesList getVirtuals() const override; - /// Used by InterpreterInsertQuery std::string getRemoteDatabaseName() const { return remote_database; } std::string getRemoteTableName() const { return remote_table; } @@ -234,6 +232,8 @@ private: std::optional distributedWriteFromClusterStorage(const IStorageCluster & src_storage_cluster, const ASTInsertQuery & query, ContextPtr context) const; std::optional distributedWriteBetweenDistributedTables(const StorageDistributed & src_distributed, const ASTInsertQuery & query, ContextPtr context) const; + static VirtualColumnsDescription createVirtuals(); + String remote_database; String remote_table; ASTPtr remote_table_function_ptr; diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 7ed99446122..0d220f2fd5d 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1097,8 +1097,7 @@ void StorageFile::setStorageMetadata(CommonArguments args) storage_metadata.setConstraints(args.constraints); storage_metadata.setComment(args.comment); setInMemoryMetadata(storage_metadata); - - virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } @@ -1581,7 +1580,7 @@ void StorageFile::read( auto this_ptr = std::static_pointer_cast(shared_from_this()); - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context), getVirtuals()); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context)); bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) && context->getSettingsRef().optimize_count_from_files; @@ -1609,7 +1608,7 @@ void ReadFromFile::createIterator(const ActionsDAG::Node * predicate) storage->paths, storage->archive_info, predicate, - storage->virtual_columns, + storage->getVirtualsList(), context, storage->distributed_processing); } @@ -2256,9 +2255,4 @@ StorageFile::ArchiveInfo StorageFile::getArchiveInfo( return archive_info; } -Names StorageFile::getVirtualColumnNames() -{ - return VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage({}).getNames(); -} - } diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index 1db33b72d77..93c263008a6 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -80,10 +80,6 @@ public: bool storesDataOnDisk() const override; Strings getDataPaths() const override; - NamesAndTypesList getVirtuals() const override { return virtual_columns; } - - static Names getVirtualColumnNames(); - static Strings getPathsList(const String & table_path, const String & user_files_path, const ContextPtr & context, size_t & total_bytes_to_read); /// Check if the format supports reading only some subset of columns. @@ -197,8 +193,6 @@ private: std::atomic readers_counter = 0; FileRenamer file_renamer; bool was_renamed = false; - - NamesAndTypesList virtual_columns; bool distributed_processing = false; }; diff --git a/src/Storages/StorageFileCluster.cpp b/src/Storages/StorageFileCluster.cpp index 0cc18abef5f..d43e242f70c 100644 --- a/src/Storages/StorageFileCluster.cpp +++ b/src/Storages/StorageFileCluster.cpp @@ -61,8 +61,7 @@ StorageFileCluster::StorageFileCluster( storage_metadata.setConstraints(constraints_); setInMemoryMetadata(storage_metadata); - - virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } void StorageFileCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context) @@ -77,7 +76,7 @@ void StorageFileCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const Sto RemoteQueryExecutor::Extension StorageFileCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const { - auto iterator = std::make_shared(paths, std::nullopt, predicate, virtual_columns, context); + auto iterator = std::make_shared(paths, std::nullopt, predicate, getVirtualsList(), context); auto callback = std::make_shared([iter = std::move(iterator)]() mutable -> String { return iter->next(); }); return RemoteQueryExecutor::Extension{.task_iterator = std::move(callback)}; } diff --git a/src/Storages/StorageFileCluster.h b/src/Storages/StorageFileCluster.h index 2803c8b6e5b..3acbc71ba7e 100644 --- a/src/Storages/StorageFileCluster.h +++ b/src/Storages/StorageFileCluster.h @@ -28,8 +28,6 @@ public: std::string getName() const override { return "FileCluster"; } - NamesAndTypesList getVirtuals() const override { return virtual_columns; } - RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; bool supportsSubcolumns() const override { return true; } @@ -42,7 +40,6 @@ private: Strings paths; String filename; String format_name; - NamesAndTypesList virtual_columns; }; } diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index 40a4190a413..b0b7afdfe8d 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -331,6 +331,10 @@ StorageKeeperMap::StorageKeeperMap( setInMemoryMetadata(metadata); + VirtualColumnsDescription virtuals; + virtuals.addEphemeral(String(version_column_name), std::make_shared(), ""); + setVirtuals(std::move(virtuals)); + WriteBufferFromOwnString out; out << "KeeperMap metadata format version: 1\n" << "columns: " << metadata.columns.toString() @@ -634,12 +638,6 @@ void StorageKeeperMap::drop() dropTable(client, metadata_drop_lock); } -NamesAndTypesList StorageKeeperMap::getVirtuals() const -{ - return NamesAndTypesList{ - {std::string{version_column_name}, std::make_shared()}}; -} - namespace { diff --git a/src/Storages/StorageKeeperMap.h b/src/Storages/StorageKeeperMap.h index d65548ed428..d4556792c48 100644 --- a/src/Storages/StorageKeeperMap.h +++ b/src/Storages/StorageKeeperMap.h @@ -50,8 +50,6 @@ public: void truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) override; void drop() override; - NamesAndTypesList getVirtuals() const override; - std::string getName() const override { return "KeeperMap"; } Names getPrimaryKey() const override { return {primary_key}; } diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 7459760b0f5..549cfca1b6c 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -35,7 +35,6 @@ #include #include #include -#include #include #include @@ -48,8 +47,6 @@ namespace DB { - CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size); - namespace ErrorCodes { extern const int TIMEOUT_EXCEEDED; @@ -299,6 +296,7 @@ public: : SinkToStorage(metadata_snapshot_->getSampleBlock()) , storage(storage_) , metadata_snapshot(metadata_snapshot_) + , storage_snapshot(std::make_shared(storage, metadata_snapshot)) , lock(std::move(lock_)) { if (!lock) @@ -343,6 +341,7 @@ public: private: StorageLog & storage; StorageMetadataPtr metadata_snapshot; + StorageSnapshotPtr storage_snapshot; WriteLock lock; bool done = false; @@ -476,13 +475,7 @@ void LogSink::writeData(const NameAndTypePair & name_and_type, const IColumn & c throw Exception(ErrorCodes::LOGICAL_ERROR, "No information about file {} in StorageLog", data_file_name); const auto & data_file = *data_file_it->second; - const auto & columns = metadata_snapshot->getColumns(); - - CompressionCodecPtr compression; - if (name_and_type.name == BlockNumberColumn::name) - compression = BlockNumberColumn::compression_codec; - else - compression = columns.getCodecOrDefault(name_and_type.name); + auto compression = storage_snapshot->getCodecOrDefault(name_and_type.name); it = streams.try_emplace(data_file.name, storage.disk, data_file.path, storage.file_checker.getFileSize(data_file.path), diff --git a/src/Storages/StorageMaterializedMySQL.cpp b/src/Storages/StorageMaterializedMySQL.cpp index 0dc0b1bff0b..887c58ff816 100644 --- a/src/Storages/StorageMaterializedMySQL.cpp +++ b/src/Storages/StorageMaterializedMySQL.cpp @@ -22,9 +22,8 @@ namespace DB StorageMaterializedMySQL::StorageMaterializedMySQL(const StoragePtr & nested_storage_, const IDatabase * database_) : StorageProxy(nested_storage_->getStorageID()), nested_storage(nested_storage_), database(database_) { - StorageInMemoryMetadata in_memory_metadata; - in_memory_metadata = nested_storage->getInMemoryMetadata(); - setInMemoryMetadata(in_memory_metadata); + setInMemoryMetadata(nested_storage->getInMemoryMetadata()); + setVirtuals(*nested_storage->getVirtualsPtr()); } bool StorageMaterializedMySQL::needRewriteQueryWithFinal(const Names & column_names) const @@ -49,14 +48,6 @@ void StorageMaterializedMySQL::read( query_info, context, processed_stage, max_block_size, num_streams); } -NamesAndTypesList StorageMaterializedMySQL::getVirtuals() const -{ - if (const auto * db = typeid_cast(database)) - db->rethrowExceptionIfNeeded(); - - return nested_storage->getVirtuals(); -} - IStorage::ColumnSizeByName StorageMaterializedMySQL::getColumnSizes() const { auto sizes = nested_storage->getColumnSizes(); diff --git a/src/Storages/StorageMaterializedMySQL.h b/src/Storages/StorageMaterializedMySQL.h index 2cd589bfd75..9f5d157ce3b 100644 --- a/src/Storages/StorageMaterializedMySQL.h +++ b/src/Storages/StorageMaterializedMySQL.h @@ -34,7 +34,6 @@ public: SinkToStoragePtr write(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, bool) override { throwNotAllowed(); } - NamesAndTypesList getVirtuals() const override; ColumnSizeByName getColumnSizes() const override; StoragePtr getNested() const override { return nested_storage; } diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 1d0898a2f11..02cba1cf753 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -171,6 +171,12 @@ QueryProcessingStage::Enum StorageMaterializedView::getQueryProcessingStage( return getTargetTable()->getQueryProcessingStage(local_context, to_stage, getTargetTable()->getStorageSnapshot(target_metadata, local_context), query_info); } +StorageSnapshotPtr StorageMaterializedView::getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr) const +{ + /// We cannot set virtuals at table creation because target table may not exist at that time. + return std::make_shared(*this, metadata_snapshot, getTargetTable()->getVirtualsPtr()); +} + void StorageMaterializedView::read( QueryPlan & query_plan, const Names & column_names, @@ -536,11 +542,6 @@ StoragePtr StorageMaterializedView::tryGetTargetTable() const return DatabaseCatalog::instance().tryGetTable(getTargetTableId(), getContext()); } -NamesAndTypesList StorageMaterializedView::getVirtuals() const -{ - return getTargetTable()->getVirtuals(); -} - Strings StorageMaterializedView::getDataPaths() const { if (auto table = tryGetTargetTable()) diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index 4d574a821ec..198b7a642ee 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -75,12 +75,11 @@ public: StoragePtr tryGetTargetTable() const; StorageID getTargetTableId() const; - /// Get the virtual column of the target table; - NamesAndTypesList getVirtuals() const override; - ActionLock getActionLock(StorageActionBlockType type) override; void onActionLockRemove(StorageActionBlockType action_type) override; + StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr) const override; + void read( QueryPlan & query_plan, const Names & column_names, diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index eb45a8fac66..8410f0a8df8 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -147,6 +147,7 @@ StorageMerge::StorageMerge( storage_metadata.setColumns(columns_.empty() ? getColumnsDescriptionFromSourceTables() : columns_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); + setVirtuals(createVirtuals()); } StorageMerge::StorageMerge( @@ -169,6 +170,7 @@ StorageMerge::StorageMerge( storage_metadata.setColumns(columns_.empty() ? getColumnsDescriptionFromSourceTables() : columns_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); + setVirtuals(createVirtuals()); } StorageMerge::DatabaseTablesIterators StorageMerge::getDatabaseIterators(ContextPtr context_) const @@ -321,6 +323,37 @@ QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage( return selected_table_size == 1 ? stage_in_source_tables : std::min(stage_in_source_tables, QueryProcessingStage::WithMergeableState); } +VirtualColumnsDescription StorageMerge::createVirtuals() +{ + VirtualColumnsDescription desc; + + desc.addEphemeral("_database", std::make_shared(std::make_shared()), ""); + desc.addEphemeral("_table", std::make_shared(std::make_shared()), ""); + + return desc; +} + +StorageSnapshotPtr StorageMerge::getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr) const +{ + static const auto common_virtuals = createVirtuals(); + + auto virtuals = common_virtuals; + if (auto first_table = getFirstTable([](auto && table) { return table; })) + { + auto table_virtuals = first_table->getVirtualsPtr(); + for (const auto & column : *table_virtuals) + { + if (virtuals.has(column.name)) + continue; + + virtuals.add(column); + } + } + + auto virtuals_ptr = std::make_shared(std::move(virtuals)); + return std::make_shared(*this, metadata_snapshot, std::move(virtuals_ptr)); +} + void StorageMerge::read( QueryPlan & query_plan, const Names & column_names, @@ -912,7 +945,6 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_ column_node = std::make_shared(NameAndTypePair{column, storage_columns.getColumn(get_column_options, column).type }, modified_query_info.table_expression); } - PlannerActionsVisitor actions_visitor(modified_query_info.planner_context, false /*use_column_identifier_as_action_node_name*/); actions_visitor.visit(filter_actions_dag, column_node); } @@ -1015,7 +1047,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( Block pipe_header = builder->getHeader(); - if (has_database_virtual_column && !pipe_header.has("_database")) + if (has_database_virtual_column && common_header.has("_database") && !pipe_header.has("_database")) { ColumnWithTypeAndName column; column.name = "_database"; @@ -1030,7 +1062,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( { return std::make_shared(stream_header, adding_column_actions); }); } - if (has_table_virtual_column && !pipe_header.has("_table")) + if (has_table_virtual_column && common_header.has("_table") && !pipe_header.has("_table")) { ColumnWithTypeAndName column; column.name = "_table"; @@ -1390,6 +1422,7 @@ void StorageMerge::alter( params.apply(storage_metadata, local_context); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(local_context, table_id, storage_metadata); setInMemoryMetadata(storage_metadata); + setVirtuals(createVirtuals()); } void ReadFromMerge::convertAndFilterSourceStream( @@ -1649,20 +1682,4 @@ void registerStorageMerge(StorageFactory & factory) }); } -NamesAndTypesList StorageMerge::getVirtuals() const -{ - NamesAndTypesList virtuals{ - {"_database", std::make_shared(std::make_shared())}, - {"_table", std::make_shared(std::make_shared())}}; - - auto first_table = getFirstTable([](auto && table) { return table; }); - if (first_table) - { - auto table_virtuals = first_table->getVirtuals(); - virtuals.insert(virtuals.end(), table_virtuals.begin(), table_virtuals.end()); - } - - return virtuals; -} - } diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index 8b062a392d4..556649f622d 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -55,6 +55,8 @@ public: QueryProcessingStage::Enum getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override; + StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr) const override; + void read( QueryPlan & query_plan, const Names & column_names, @@ -116,11 +118,12 @@ private: template void forEachTable(F && func) const; - NamesAndTypesList getVirtuals() const override; ColumnSizeByName getColumnSizes() const override; ColumnsDescription getColumnsDescriptionFromSourceTables() const; + static VirtualColumnsDescription createVirtuals(); + bool tableSupportsPrewhere() const; template @@ -281,6 +284,8 @@ private: ContextPtr query_context, bool filter_by_database_virtual_column, bool filter_by_table_virtual_column) const; + + // static VirtualColumnsDescription createVirtuals(StoragePtr first_table); }; } diff --git a/src/Storages/StorageProxy.h b/src/Storages/StorageProxy.h index 18a1f9086ae..a8e7fd528dd 100644 --- a/src/Storages/StorageProxy.h +++ b/src/Storages/StorageProxy.h @@ -30,7 +30,6 @@ public: bool hasEvenlyDistributedRead() const override { return getNested()->hasEvenlyDistributedRead(); } ColumnSizeByName getColumnSizes() const override { return getNested()->getColumnSizes(); } - NamesAndTypesList getVirtuals() const override { return getNested()->getVirtuals(); } QueryProcessingStage::Enum getQueryProcessingStage( ContextPtr context, diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index b73965b8774..72bbcdd3ea8 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -161,7 +161,7 @@ public: , num_streams(num_streams_) { query_configuration = storage.updateConfigurationAndGetCopy(context); - virtual_columns = storage.getVirtuals(); + virtual_columns = storage.getVirtualsList(); } private: @@ -1084,8 +1084,7 @@ StorageS3::StorageS3( storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); - - virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } static std::shared_ptr createFileIterator( @@ -1152,7 +1151,7 @@ void StorageS3::read( size_t max_block_size, size_t num_streams) { - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), virtual_columns); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) && local_context->getSettingsRef().optimize_count_from_files; @@ -1985,16 +1984,6 @@ void registerStorageOSS(StorageFactory & factory) return registerStorageS3Impl("OSS", factory); } -NamesAndTypesList StorageS3::getVirtuals() const -{ - return virtual_columns; -} - -Names StorageS3::getVirtualColumnNames() -{ - return VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage({}).getNames(); -} - bool StorageS3::supportsPartitionBy() const { return true; diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 65fb3b51be2..7c5db76aadf 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -336,9 +336,6 @@ public: void truncate(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, TableExclusiveLockHolder &) override; - NamesAndTypesList getVirtuals() const override; - static Names getVirtualColumnNames(); - bool supportsPartitionBy() const override; static void processNamedCollectionResult(StorageS3::Configuration & configuration, const NamedCollection & collection); @@ -378,7 +375,6 @@ private: Configuration configuration; std::mutex configuration_update_mutex; - NamesAndTypesList virtual_columns; String name; const bool distributed_processing; diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index 31c241a5b13..6b22771b38f 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -67,8 +67,7 @@ StorageS3Cluster::StorageS3Cluster( storage_metadata.setConstraints(constraints_); setInMemoryMetadata(storage_metadata); - - virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } void StorageS3Cluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const DB::StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context) @@ -92,7 +91,7 @@ void StorageS3Cluster::updateConfigurationIfChanged(ContextPtr local_context) RemoteQueryExecutor::Extension StorageS3Cluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const { auto iterator = std::make_shared( - *s3_configuration.client, s3_configuration.url, predicate, virtual_columns, context, nullptr, s3_configuration.request_settings, context->getFileProgressCallback()); + *s3_configuration.client, s3_configuration.url, predicate, getVirtualsList(), context, nullptr, s3_configuration.request_settings, context->getFileProgressCallback()); auto callback = std::make_shared>([iterator]() mutable -> String { @@ -103,12 +102,6 @@ RemoteQueryExecutor::Extension StorageS3Cluster::getTaskIteratorExtension(const return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) }; } -NamesAndTypesList StorageS3Cluster::getVirtuals() const -{ - return virtual_columns; -} - - } #endif diff --git a/src/Storages/StorageS3Cluster.h b/src/Storages/StorageS3Cluster.h index 03155b6e707..6a5b03e682f 100644 --- a/src/Storages/StorageS3Cluster.h +++ b/src/Storages/StorageS3Cluster.h @@ -28,8 +28,6 @@ public: std::string getName() const override { return "S3Cluster"; } - NamesAndTypesList getVirtuals() const override; - RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; bool supportsSubcolumns() const override { return true; } @@ -45,7 +43,6 @@ private: void updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) override; StorageS3::Configuration s3_configuration; - NamesAndTypesList virtual_columns; }; diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp index 34c092c7208..8b087a4a2bc 100644 --- a/src/Storages/StorageSnapshot.cpp +++ b/src/Storages/StorageSnapshot.cpp @@ -1,6 +1,4 @@ #include -#include -#include #include #include #include @@ -18,6 +16,49 @@ namespace ErrorCodes extern const int COLUMN_QUERIED_MORE_THAN_ONCE; } +StorageSnapshot::StorageSnapshot( + const IStorage & storage_, + StorageMetadataPtr metadata_) + : storage(storage_) + , metadata(std::move(metadata_)) + , virtual_columns(storage_.getVirtualsPtr()) +{ +} + +StorageSnapshot::StorageSnapshot( + const IStorage & storage_, + StorageMetadataPtr metadata_, + VirtualsDescriptionPtr virtual_columns_) + : storage(storage_) + , metadata(std::move(metadata_)) + , virtual_columns(std::move(virtual_columns_)) +{ +} + +StorageSnapshot::StorageSnapshot( + const IStorage & storage_, + StorageMetadataPtr metadata_, + ColumnsDescription object_columns_) + : storage(storage_) + , metadata(std::move(metadata_)) + , virtual_columns(storage_.getVirtualsPtr()) + , object_columns(std::move(object_columns_)) +{ +} + +StorageSnapshot::StorageSnapshot( + const IStorage & storage_, + StorageMetadataPtr metadata_, + ColumnsDescription object_columns_, + DataPtr data_) + : storage(storage_) + , metadata(std::move(metadata_)) + , virtual_columns(storage_.getVirtualsPtr()) + , object_columns(std::move(object_columns_)) + , data(std::move(data_)) +{ +} + std::shared_ptr StorageSnapshot::clone(DataPtr data_) const { auto res = std::make_shared(storage, metadata, object_columns); @@ -28,15 +69,12 @@ std::shared_ptr StorageSnapshot::clone(DataPtr data_) const return res; } -void StorageSnapshot::init() +ColumnsDescription StorageSnapshot::getAllColumnsDescription() const { - for (const auto & [name, type] : storage.getVirtuals()) - virtual_columns[name] = type; + auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals(); + auto column_names_and_types = getColumns(get_column_options); - if (storage.hasLightweightDeletedMask()) - system_columns[LightweightDeleteDescription::FILTER_COLUMN.name] = LightweightDeleteDescription::FILTER_COLUMN.type; - - system_columns[BlockNumberColumn::name] = BlockNumberColumn::type; + return ColumnsDescription{column_names_and_types}; } NamesAndTypesList StorageSnapshot::getColumns(const GetColumnsOptions & options) const @@ -46,35 +84,22 @@ NamesAndTypesList StorageSnapshot::getColumns(const GetColumnsOptions & options) if (options.with_extended_objects) extendObjectColumns(all_columns, object_columns, options.with_subcolumns); - NameSet column_names; - if (options.with_virtuals) + if (options.virtuals_kind != VirtualsKind::None && !virtual_columns->empty()) { - /// Virtual columns must be appended after ordinary, - /// because user can override them. - if (!virtual_columns.empty()) + NameSet column_names; + for (const auto & column : all_columns) + column_names.insert(column.name); + + auto virtuals_list = virtual_columns->getNamesAndTypesList(options.virtuals_kind); + for (const auto & column : virtuals_list) { - for (const auto & column : all_columns) - column_names.insert(column.name); + if (column_names.contains(column.name)) + continue; - for (const auto & [name, type] : virtual_columns) - if (!column_names.contains(name)) - all_columns.emplace_back(name, type); + all_columns.emplace_back(column.name, column.type); } } - if (options.with_system_columns) - { - if (!system_columns.empty() && column_names.empty()) - { - for (const auto & column : all_columns) - column_names.insert(column.name); - } - - for (const auto & [name, type] : system_columns) - if (!column_names.contains(name)) - all_columns.emplace_back(name, type); - } - return all_columns; } @@ -100,18 +125,11 @@ std::optional StorageSnapshot::tryGetColumn(const GetColumnsOpt return object_column; } - if (options.with_virtuals) + if (options.virtuals_kind != VirtualsKind::None) { - auto it = virtual_columns.find(column_name); - if (it != virtual_columns.end()) - return NameAndTypePair(column_name, it->second); - } - - if (options.with_system_columns) - { - auto it = system_columns.find(column_name); - if (it != system_columns.end()) - return NameAndTypePair(column_name, it->second); + auto virtual_column = virtual_columns->tryGet(column_name, options.virtuals_kind); + if (virtual_column) + return NameAndTypePair{virtual_column->name, virtual_column->type}; } return {}; @@ -126,6 +144,47 @@ NameAndTypePair StorageSnapshot::getColumn(const GetColumnsOptions & options, co return *column; } +CompressionCodecPtr StorageSnapshot::getCodecOrDefault(const String & column_name, CompressionCodecPtr default_codec) const +{ + auto get_codec_or_default = [&](const auto & column_desc) + { + return column_desc.codec + ? CompressionCodecFactory::instance().get(column_desc.codec, column_desc.type, default_codec) + : default_codec; + }; + + const auto & columns = metadata->getColumns(); + if (const auto * column_desc = columns.tryGet(column_name)) + return get_codec_or_default(*column_desc); + + if (const auto * virtual_desc = virtual_columns->tryGetDescription(column_name)) + return get_codec_or_default(*virtual_desc); + + return default_codec; +} + +CompressionCodecPtr StorageSnapshot::getCodecOrDefault(const String & column_name) const +{ + return getCodecOrDefault(column_name, CompressionCodecFactory::instance().getDefaultCodec()); +} + +ASTPtr StorageSnapshot::getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const +{ + auto get_codec_or_default = [&](const auto & column_desc) + { + return column_desc.codec ? column_desc.codec : default_codec->getFullCodecDesc(); + }; + + const auto & columns = metadata->getColumns(); + if (const auto * column_desc = columns.tryGet(column_name)) + return get_codec_or_default(*column_desc); + + if (const auto * virtual_desc = virtual_columns->tryGetDescription(column_name)) + return get_codec_or_default(*virtual_desc); + + return default_codec->getFullCodecDesc(); +} + Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names) const { Block res; @@ -143,11 +202,11 @@ Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names) cons { res.insert({object_column->type->createColumn(), object_column->type, column_name}); } - else if (auto it = virtual_columns.find(column_name); it != virtual_columns.end()) + else if (auto virtual_column = virtual_columns->tryGet(column_name)) { /// Virtual columns must be appended after ordinary, because user can /// override them. - const auto & type = it->second; + const auto & type = virtual_column->type; res.insert({type->createColumn(), type, column_name}); } else @@ -175,12 +234,11 @@ ColumnsDescription StorageSnapshot::getDescriptionForColumns(const Names & colum { res.add(*object_column, "", false, false); } - else if (auto it = virtual_columns.find(name); it != virtual_columns.end()) + else if (auto virtual_column = virtual_columns->tryGet(name)) { /// Virtual columns must be appended after ordinary, because user can /// override them. - const auto & type = it->second; - res.add({name, type}); + res.add({name, virtual_column->type}); } else { @@ -216,7 +274,7 @@ void StorageSnapshot::check(const Names & column_names) const { bool has_column = columns.hasColumnOrSubcolumn(GetColumnsOptions::AllPhysical, name) || object_columns.hasColumnOrSubcolumn(GetColumnsOptions::AllPhysical, name) - || virtual_columns.contains(name); + || virtual_columns->has(name); if (!has_column) { diff --git a/src/Storages/StorageSnapshot.h b/src/Storages/StorageSnapshot.h index d62e118e1f2..89e97f2abb8 100644 --- a/src/Storages/StorageSnapshot.h +++ b/src/Storages/StorageSnapshot.h @@ -1,10 +1,14 @@ #pragma once #include +#include namespace DB { class IStorage; +class ICompressionCodec; + +using CompressionCodecPtr = std::shared_ptr; /// Snapshot of storage that fixes set columns that can be read in query. /// There are 3 sources of columns: regular columns from metadata, @@ -13,6 +17,7 @@ struct StorageSnapshot { const IStorage & storage; const StorageMetadataPtr metadata; + const VirtualsDescriptionPtr virtual_columns; const ColumnsDescription object_columns; /// Additional data, on which set of columns may depend. @@ -30,38 +35,29 @@ struct StorageSnapshot StorageSnapshot( const IStorage & storage_, - StorageMetadataPtr metadata_) - : storage(storage_), metadata(std::move(metadata_)) - { - init(); - } + StorageMetadataPtr metadata_); StorageSnapshot( const IStorage & storage_, StorageMetadataPtr metadata_, - ColumnsDescription object_columns_) - : storage(storage_) - , metadata(std::move(metadata_)) - , object_columns(std::move(object_columns_)) - { - init(); - } + VirtualsDescriptionPtr virtual_columns_); + + StorageSnapshot( + const IStorage & storage_, + StorageMetadataPtr metadata_, + ColumnsDescription object_columns_); StorageSnapshot( const IStorage & storage_, StorageMetadataPtr metadata_, ColumnsDescription object_columns_, - DataPtr data_) - : storage(storage_) - , metadata(std::move(metadata_)) - , object_columns(std::move(object_columns_)) - , data(std::move(data_)) - { - init(); - } + DataPtr data_); std::shared_ptr clone(DataPtr data_) const; + /// Get columns description + ColumnsDescription getAllColumnsDescription() const; + /// Get all available columns with types according to options. NamesAndTypesList getColumns(const GetColumnsOptions & options) const; @@ -72,6 +68,10 @@ struct StorageSnapshot std::optional tryGetColumn(const GetColumnsOptions & options, const String & column_name) const; NameAndTypePair getColumn(const GetColumnsOptions & options, const String & column_name) const; + CompressionCodecPtr getCodecOrDefault(const String & column_name, CompressionCodecPtr default_codec) const; + CompressionCodecPtr getCodecOrDefault(const String & column_name) const; + ASTPtr getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const; + /// Block with ordinary + materialized + aliases + virtuals + subcolumns. Block getSampleBlockForColumns(const Names & column_names) const; @@ -87,15 +87,6 @@ struct StorageSnapshot /// If we have a projection then we should use its metadata. StorageMetadataPtr getMetadataForQuery() const { return projection ? projection->metadata : metadata; } - -private: - void init(); - - std::unordered_map virtual_columns; - - /// System columns are not visible in the schema but might be persisted in the data. - /// One example of such column is lightweight delete mask '_row_exists'. - std::unordered_map system_columns; }; using StorageSnapshotPtr = std::shared_ptr; diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 40acee57e7c..039be222e7e 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -152,8 +152,7 @@ IStorageURLBase::IStorageURLBase( storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); - - virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } @@ -1059,7 +1058,7 @@ void IStorageURLBase::read( size_t num_streams) { auto params = getReadURIParams(column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size); - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals()); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) && local_context->getSettingsRef().optimize_count_from_files; @@ -1127,7 +1126,7 @@ void ReadFromURL::createIterator(const ActionsDAG::Node * predicate) else if (is_url_with_globs) { /// Iterate through disclosed globs and make a source for each file - auto glob_iterator = std::make_shared(storage->uri, max_addresses, predicate, storage->virtual_columns, context); + auto glob_iterator = std::make_shared(storage->uri, max_addresses, predicate, storage->getVirtualsList(), context); /// check if we filtered out all the paths if (glob_iterator->size() == 0) @@ -1230,7 +1229,7 @@ void StorageURLWithFailover::read( size_t num_streams) { auto params = getReadURIParams(column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size); - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals()); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) && local_context->getSettingsRef().optimize_count_from_files; @@ -1303,16 +1302,6 @@ SinkToStoragePtr IStorageURLBase::write(const ASTPtr & query, const StorageMetad } } -NamesAndTypesList IStorageURLBase::getVirtuals() const -{ - return virtual_columns; -} - -Names IStorageURLBase::getVirtualColumnNames() -{ - return VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage({}).getNames(); -} - SchemaCache & IStorageURLBase::getSchemaCache(const ContextPtr & context) { static SchemaCache schema_cache(context->getConfigRef().getUInt("schema_inference_cache_max_elements_for_url", DEFAULT_SCHEMA_CACHE_ELEMENTS)); diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 480670475dd..294b1f828bb 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -48,9 +48,6 @@ public: bool supportsPartitionBy() const override { return true; } - NamesAndTypesList getVirtuals() const override; - static Names getVirtualColumnNames(); - static ColumnsDescription getTableStructureFromData( const String & format, const String & uri, @@ -106,8 +103,6 @@ protected: ASTPtr partition_by; bool distributed_processing; - NamesAndTypesList virtual_columns; - virtual std::string getReadMethod() const; virtual std::vector> getReadURIParams( diff --git a/src/Storages/StorageURLCluster.cpp b/src/Storages/StorageURLCluster.cpp index d0df74d7521..2e7c63d0097 100644 --- a/src/Storages/StorageURLCluster.cpp +++ b/src/Storages/StorageURLCluster.cpp @@ -75,8 +75,7 @@ StorageURLCluster::StorageURLCluster( storage_metadata.setConstraints(constraints_); setInMemoryMetadata(storage_metadata); - - virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); } void StorageURLCluster::updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) @@ -91,7 +90,7 @@ void StorageURLCluster::updateQueryToSendIfNeeded(ASTPtr & query, const StorageS RemoteQueryExecutor::Extension StorageURLCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const { - auto iterator = std::make_shared(uri, context->getSettingsRef().glob_expansion_max_elements, predicate, virtual_columns, context); + auto iterator = std::make_shared(uri, context->getSettingsRef().glob_expansion_max_elements, predicate, getVirtualsList(), context); auto callback = std::make_shared([iter = std::move(iterator)]() mutable -> String { return iter->next(); }); return RemoteQueryExecutor::Extension{.task_iterator = std::move(callback)}; } diff --git a/src/Storages/StorageURLCluster.h b/src/Storages/StorageURLCluster.h index f57d262f434..dce2e0106ea 100644 --- a/src/Storages/StorageURLCluster.h +++ b/src/Storages/StorageURLCluster.h @@ -31,8 +31,6 @@ public: std::string getName() const override { return "URLCluster"; } - NamesAndTypesList getVirtuals() const override { return virtual_columns; } - RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; bool supportsSubcolumns() const override { return true; } @@ -44,7 +42,6 @@ private: String uri; String format_name; - NamesAndTypesList virtual_columns; }; diff --git a/src/Storages/StorageValues.cpp b/src/Storages/StorageValues.cpp index 300b11b7346..a930ffd1307 100644 --- a/src/Storages/StorageValues.cpp +++ b/src/Storages/StorageValues.cpp @@ -12,12 +12,13 @@ StorageValues::StorageValues( const StorageID & table_id_, const ColumnsDescription & columns_, const Block & res_block_, - const NamesAndTypesList & virtuals_) - : IStorage(table_id_), res_block(res_block_), virtuals(virtuals_) + VirtualColumnsDescription virtuals_) + : IStorage(table_id_), res_block(res_block_) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); setInMemoryMetadata(storage_metadata); + setVirtuals(std::move(virtuals_)); } Pipe StorageValues::read( diff --git a/src/Storages/StorageValues.h b/src/Storages/StorageValues.h index af1c134077b..ebb182ab667 100644 --- a/src/Storages/StorageValues.h +++ b/src/Storages/StorageValues.h @@ -11,8 +11,14 @@ namespace DB class StorageValues final : public IStorage { public: + /// Why we may have virtual columns in the storage from a single block? + /// Because it used as tmp storage for pushing blocks into views, and some + /// views may contain virtual columns from original storage. StorageValues( - const StorageID & table_id_, const ColumnsDescription & columns_, const Block & res_block_, const NamesAndTypesList & virtuals_ = {}); + const StorageID & table_id_, + const ColumnsDescription & columns_, + const Block & res_block_, + VirtualColumnsDescription virtuals_ = {}); std::string getName() const override { return "Values"; } @@ -25,13 +31,6 @@ public: size_t max_block_size, size_t num_streams) override; - /// Why we may have virtual columns in the storage from a single block? - /// Because it used as tmp storage for pushing blocks into views, and some - /// views may contain virtual columns from original storage. - NamesAndTypesList getVirtuals() const override - { - return virtuals; - } /// FIXME probably it should return false, but StorageValues is used in ExecutingInnerQueryFromViewTransform (whatever it is) bool supportsTransactions() const override { return true; } @@ -40,7 +39,6 @@ public: private: Block res_block; - NamesAndTypesList virtuals; }; } diff --git a/src/Storages/System/IStorageSystemOneBlock.cpp b/src/Storages/System/IStorageSystemOneBlock.cpp index 20c8f3517ee..53399654c8d 100644 --- a/src/Storages/System/IStorageSystemOneBlock.cpp +++ b/src/Storages/System/IStorageSystemOneBlock.cpp @@ -56,10 +56,9 @@ void IStorageSystemOneBlock::read( QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, size_t /*num_streams*/) - { storage_snapshot->check(column_names); - Block sample_block = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtuals()); + Block sample_block = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtualsList()); std::vector columns_mask; if (supportsColumnsMask()) diff --git a/src/Storages/System/StorageSystemCertificates.cpp b/src/Storages/System/StorageSystemCertificates.cpp index 0e4c5648b74..5ec683e1784 100644 --- a/src/Storages/System/StorageSystemCertificates.cpp +++ b/src/Storages/System/StorageSystemCertificates.cpp @@ -19,19 +19,18 @@ namespace DB ColumnsDescription StorageSystemCertificates::getColumnsDescription() { - /// TODO: Fill in all the comments. return ColumnsDescription { - {"version", std::make_shared>()}, - {"serial_number", std::make_shared(std::make_shared())}, - {"signature_algo", std::make_shared(std::make_shared())}, - {"issuer", std::make_shared(std::make_shared())}, - {"not_before", std::make_shared(std::make_shared())}, - {"not_after", std::make_shared(std::make_shared())}, - {"subject", std::make_shared(std::make_shared())}, - {"pkey_algo", std::make_shared(std::make_shared())}, - {"path", std::make_shared()}, - {"default", std::make_shared>()} + {"version", std::make_shared>(), "Version of the certificate. Values are 0 for v1, 1 for v2, 2 for v3."}, + {"serial_number", std::make_shared(std::make_shared()), "Serial Number of the certificate assigned by the issuer."}, + {"signature_algo", std::make_shared(std::make_shared()), "Signature Algorithm - an algorithm used by the issuer to sign this certificate."}, + {"issuer", std::make_shared(std::make_shared()), "Issuer - an unique identifier for the Certificate Authority issuing this certificate."}, + {"not_before", std::make_shared(std::make_shared()), "The beginning of the time window when this certificate is valid."}, + {"not_after", std::make_shared(std::make_shared()), "The end of the time window when this certificate is valid."}, + {"subject", std::make_shared(std::make_shared()), "Subject - identifies the owner of the public key."}, + {"pkey_algo", std::make_shared(std::make_shared()), "Public Key Algorithm defines the algorithm the public key can be used with."}, + {"path", std::make_shared(), "Path to the file or directory containing this certificate."}, + {"default", std::make_shared>(), "Certificate is in the default certificate location."} }; } diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 6bc1208a6a9..5c96c6502af 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -31,27 +31,32 @@ StorageSystemColumns::StorageSystemColumns(const StorageID & table_id_) StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(ColumnsDescription( { - { "database", std::make_shared() }, - { "table", std::make_shared() }, - { "name", std::make_shared() }, - { "type", std::make_shared() }, - { "position", std::make_shared() }, - { "default_kind", std::make_shared() }, - { "default_expression", std::make_shared() }, - { "data_compressed_bytes", std::make_shared() }, - { "data_uncompressed_bytes", std::make_shared() }, - { "marks_bytes", std::make_shared() }, - { "comment", std::make_shared() }, - { "is_in_partition_key", std::make_shared() }, - { "is_in_sorting_key", std::make_shared() }, - { "is_in_primary_key", std::make_shared() }, - { "is_in_sampling_key", std::make_shared() }, - { "compression_codec", std::make_shared() }, - { "character_octet_length", std::make_shared(std::make_shared()) }, - { "numeric_precision", std::make_shared(std::make_shared()) }, - { "numeric_precision_radix", std::make_shared(std::make_shared()) }, - { "numeric_scale", std::make_shared(std::make_shared()) }, - { "datetime_precision", std::make_shared(std::make_shared()) }, + { "database", std::make_shared(), "Database name."}, + { "table", std::make_shared(), "Table name."}, + { "name", std::make_shared(), "Column name."}, + { "type", std::make_shared(), "Column type."}, + { "position", std::make_shared(), "Ordinal position of a column in a table starting with 1."}, + { "default_kind", std::make_shared(), "Expression type (DEFAULT, MATERIALIZED, ALIAS) for the default value, or an empty string if it is not defined."}, + { "default_expression", std::make_shared(), "Expression for the default value, or an empty string if it is not defined."}, + { "data_compressed_bytes", std::make_shared(), "The size of compressed data, in bytes."}, + { "data_uncompressed_bytes", std::make_shared(), "The size of decompressed data, in bytes."}, + { "marks_bytes", std::make_shared(), "The size of marks, in bytes."}, + { "comment", std::make_shared(), "Comment on the column, or an empty string if it is not defined."}, + { "is_in_partition_key", std::make_shared(), "Flag that indicates whether the column is in the partition expression."}, + { "is_in_sorting_key", std::make_shared(), "Flag that indicates whether the column is in the sorting key expression."}, + { "is_in_primary_key", std::make_shared(), "Flag that indicates whether the column is in the primary key expression."}, + { "is_in_sampling_key", std::make_shared(), "Flag that indicates whether the column is in the sampling key expression."}, + { "compression_codec", std::make_shared(), "Compression codec name."}, + { "character_octet_length", std::make_shared(std::make_shared()), + "Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for FixedString data type. Otherwise, the NULL value is returned."}, + { "numeric_precision", std::make_shared(std::make_shared()), + "Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bit width for integer types and decimal precision for Decimal types. Otherwise, the NULL value is returned."}, + { "numeric_precision_radix", std::make_shared(std::make_shared()), + "The base of the number system is the accuracy of approximate numeric data, exact numeric data, integer data or monetary data. In ClickHouse it's 2 for integer types and 10 for Decimal types. Otherwise, the NULL value is returned."}, + { "numeric_scale", std::make_shared(std::make_shared()), + "The scale of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse makes sense only for Decimal types. Otherwise, the NULL value is returned."}, + { "datetime_precision", std::make_shared(std::make_shared()), + "Decimal precision of DateTime64 data type. For other data types, the NULL value is returned."}, })); setInMemoryMetadata(storage_metadata); diff --git a/src/Storages/System/StorageSystemDataSkippingIndices.cpp b/src/Storages/System/StorageSystemDataSkippingIndices.cpp index 2fa74ef23e6..ff782647c79 100644 --- a/src/Storages/System/StorageSystemDataSkippingIndices.cpp +++ b/src/Storages/System/StorageSystemDataSkippingIndices.cpp @@ -26,16 +26,16 @@ StorageSystemDataSkippingIndices::StorageSystemDataSkippingIndices(const Storage StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(ColumnsDescription( { - { "database", std::make_shared() }, - { "table", std::make_shared() }, - { "name", std::make_shared() }, - { "type", std::make_shared() }, - { "type_full", std::make_shared() }, - { "expr", std::make_shared() }, - { "granularity", std::make_shared() }, - { "data_compressed_bytes", std::make_shared() }, - { "data_uncompressed_bytes", std::make_shared() }, - { "marks", std::make_shared()} + { "database", std::make_shared(), "Database name."}, + { "table", std::make_shared(), "Table name."}, + { "name", std::make_shared(), "Index name."}, + { "type", std::make_shared(), "Index type."}, + { "type_full", std::make_shared(), "Index type expression from create statement."}, + { "expr", std::make_shared(), "Expression for the index calculation."}, + { "granularity", std::make_shared(), "The number of granules in the block."}, + { "data_compressed_bytes", std::make_shared(), "The size of compressed data, in bytes."}, + { "data_uncompressed_bytes", std::make_shared(), "The size of decompressed data, in bytes."}, + { "marks", std::make_shared(), "The size of marks, in bytes."} })); setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index fa74093a5a5..1eb79744022 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -271,18 +271,18 @@ StorageSystemDetachedParts::StorageSystemDetachedParts(const StorageID & table_i { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(ColumnsDescription{{ - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"partition_id", std::make_shared(std::make_shared())}, - {"name", std::make_shared()}, - {"bytes_on_disk", std::make_shared()}, - {"modification_time",std::make_shared()}, - {"disk", std::make_shared()}, - {"path", std::make_shared()}, - {"reason", std::make_shared(std::make_shared())}, - {"min_block_number", std::make_shared(std::make_shared())}, - {"max_block_number", std::make_shared(std::make_shared())}, - {"level", std::make_shared(std::make_shared())}, + {"database", std::make_shared(), "The name of the database this part belongs to."}, + {"table", std::make_shared(), "The name of the table this part belongs to."}, + {"partition_id", std::make_shared(std::make_shared()), "The identifier of the partition this part belongs to."}, + {"name", std::make_shared(), "The name of the part."}, + {"bytes_on_disk", std::make_shared(), "Total size of all the data part files in bytes."}, + {"modification_time",std::make_shared(), "The time the directory with the data part was modified. This usually corresponds to the time when detach happened."}, + {"disk", std::make_shared(), "The name of the disk that stores this data part."}, + {"path", std::make_shared(), "The path of the disk to the file of this data part."}, + {"reason", std::make_shared(std::make_shared()), "The explanation why this part was detached."}, + {"min_block_number", std::make_shared(std::make_shared()), "The minimum number of data parts that make up the current part after merging."}, + {"max_block_number", std::make_shared(std::make_shared()), "The maximum number of data parts that make up the current part after merging."}, + {"level", std::make_shared(std::make_shared()), "Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts."}, }}); setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemDictionaries.cpp b/src/Storages/System/StorageSystemDictionaries.cpp index b3a60e16d05..8b528b4a298 100644 --- a/src/Storages/System/StorageSystemDictionaries.cpp +++ b/src/Storages/System/StorageSystemDictionaries.cpp @@ -16,7 +16,6 @@ #include #include -#include namespace DB { @@ -52,6 +51,14 @@ catch (const DB::Exception &) } +StorageSystemDictionaries::StorageSystemDictionaries(const StorageID & storage_id_, ColumnsDescription columns_description_) + : IStorageSystemOneBlock(storage_id_, std::move(columns_description_)) +{ + VirtualColumnsDescription virtuals; + virtuals.addEphemeral("key", std::make_shared(), ""); + setVirtuals(std::move(virtuals)); +} + ColumnsDescription StorageSystemDictionaries::getColumnsDescription() { return ColumnsDescription @@ -75,7 +82,7 @@ ColumnsDescription StorageSystemDictionaries::getColumnsDescription() {"attribute.names", std::make_shared(std::make_shared()), "Array of attribute names provided by the dictionary."}, {"attribute.types", std::make_shared(std::make_shared()), "Corresponding array of attribute types provided by the dictionary."}, {"bytes_allocated", std::make_shared(), "Amount of RAM allocated for the dictionary."}, - {"hierarchical_index_bytes_allocated", std::make_shared(), ""}, + {"hierarchical_index_bytes_allocated", std::make_shared(), "Amount of RAM allocated for hierarchical index."}, {"query_count", std::make_shared(), "Number of queries since the dictionary was loaded or since the last successful reboot."}, {"hit_rate", std::make_shared(), "For cache dictionaries, the percentage of uses for which the value was in the cache."}, {"found_rate", std::make_shared(), "The percentage of uses for which the value was found."}, @@ -92,13 +99,6 @@ ColumnsDescription StorageSystemDictionaries::getColumnsDescription() }; } -NamesAndTypesList StorageSystemDictionaries::getVirtuals() const -{ - return { - {"key", std::make_shared()} - }; -} - void StorageSystemDictionaries::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const { const auto access = context->getAccess(); diff --git a/src/Storages/System/StorageSystemDictionaries.h b/src/Storages/System/StorageSystemDictionaries.h index 1a071fda872..058b8b163d9 100644 --- a/src/Storages/System/StorageSystemDictionaries.h +++ b/src/Storages/System/StorageSystemDictionaries.h @@ -12,15 +12,13 @@ class Context; class StorageSystemDictionaries final : public IStorageSystemOneBlock { public: + StorageSystemDictionaries(const StorageID & storage_id_, ColumnsDescription columns_description_); + std::string getName() const override { return "SystemDictionaries"; } static ColumnsDescription getColumnsDescription(); - NamesAndTypesList getVirtuals() const override; - protected: - using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const override; }; diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index 0f8a6640f2c..f67d4f7acd0 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -18,21 +18,21 @@ StorageSystemDisks::StorageSystemDisks(const StorageID & table_id_) StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(ColumnsDescription( { - {"name", std::make_shared()}, - {"path", std::make_shared()}, - {"free_space", std::make_shared()}, - {"total_space", std::make_shared()}, - {"unreserved_space", std::make_shared()}, - {"keep_free_space", std::make_shared()}, - {"type", std::make_shared()}, - {"object_storage_type", std::make_shared()}, - {"metadata_type", std::make_shared()}, - {"is_encrypted", std::make_shared()}, - {"is_read_only", std::make_shared()}, - {"is_write_once", std::make_shared()}, - {"is_remote", std::make_shared()}, - {"is_broken", std::make_shared()}, - {"cache_path", std::make_shared()}, + {"name", std::make_shared(), "Name of a disk in the server configuration."}, + {"path", std::make_shared(), "Path to the mount point in the file system."}, + {"free_space", std::make_shared(), "Free space on disk in bytes."}, + {"total_space", std::make_shared(), "Disk volume in bytes."}, + {"unreserved_space", std::make_shared(), "Free space which is not taken by reservations (free_space minus the size of reservations taken by merges, inserts, and other disk write operations currently running)."}, + {"keep_free_space", std::make_shared(), "Amount of disk space that should stay free on disk in bytes. Defined in the keep_free_space_bytes parameter of disk configuration."}, + {"type", std::make_shared(), "The disk type which tells where this disk stores the data - RAM, local drive or remote storage."}, + {"object_storage_type", std::make_shared(), "Type of object storage if disk type is object_storage"}, + {"metadata_type", std::make_shared(), "Type of metadata storage if disk type is object_storage"}, + {"is_encrypted", std::make_shared(), "Flag which shows whether this disk ecrypts the underlying data. "}, + {"is_read_only", std::make_shared(), "Flag which indicates that you can only perform read operations with this disk."}, + {"is_write_once", std::make_shared(), "Flag which indicates if disk is write-once. Which means that it does support BACKUP to this disk, but does not support INSERT into MergeTree table on this disk."}, + {"is_remote", std::make_shared(), "Flag which indicated what operations with this disk involve network interaction."}, + {"is_broken", std::make_shared(), "Flag which indicates if disk is broken. Broken disks will have 0 space and cannot be used."}, + {"cache_path", std::make_shared(), "The path to the cache directory on local drive in case when the disk supports caching."}, })); setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemFilesystemCache.cpp b/src/Storages/System/StorageSystemFilesystemCache.cpp index 53cd76e4219..cfb388bc232 100644 --- a/src/Storages/System/StorageSystemFilesystemCache.cpp +++ b/src/Storages/System/StorageSystemFilesystemCache.cpp @@ -17,21 +17,21 @@ ColumnsDescription StorageSystemFilesystemCache::getColumnsDescription() /// TODO: Fill in all the comments. return ColumnsDescription { - {"cache_name", std::make_shared()}, - {"cache_base_path", std::make_shared()}, - {"cache_path", std::make_shared()}, - {"key", std::make_shared()}, - {"file_segment_range_begin", std::make_shared()}, - {"file_segment_range_end", std::make_shared()}, - {"size", std::make_shared()}, - {"state", std::make_shared()}, - {"cache_hits", std::make_shared()}, - {"references", std::make_shared()}, - {"downloaded_size", std::make_shared()}, - {"kind", std::make_shared()}, - {"unbound", std::make_shared>()}, - {"user_id", std::make_shared()}, - {"file_size", std::make_shared(std::make_shared())}, + {"cache_name", std::make_shared(), "Name of the cache object. Can be used in `SYSTEM DESCRIBE FILESYSTEM CACHE `, `SYSTEM DROP FILESYSTEM CACHE ` commands"}, + {"cache_base_path", std::make_shared(), "Path to the base directory where all caches files (of a cache identidied by `cache_name`) are stored."}, + {"cache_path", std::make_shared(), "Path to a particular cache file, corresponding to a file segment in a source file"}, + {"key", std::make_shared(), "Cache key of the file segment"}, + {"file_segment_range_begin", std::make_shared(), "Offset corresponding to the beginning of the file segment range"}, + {"file_segment_range_end", std::make_shared(), "Offset corresponding to the (including) end of the file segment range"}, + {"size", std::make_shared(), "Size of the file segment"}, + {"state", std::make_shared(), "File segment state (DOWNLOADED, DOWNLOADING, PARTIALLY_DOWNLOADED, ...)"}, + {"cache_hits", std::make_shared(), "Number of cache hits of corresponding file segment"}, + {"references", std::make_shared(), "Number of references to corresponding file segment. Value 1 means that nobody uses it at the moment (the only existing reference is in cache storage itself)"}, + {"downloaded_size", std::make_shared(), "Downloaded size of the file segment"}, + {"kind", std::make_shared(), "File segment kind (used to distringuish between file segments added as a part of 'Temporary data in cache')"}, + {"unbound", std::make_shared>(), "Internal implementation flag"}, + {"user_id", std::make_shared(), "User id of the user which created the file segment"}, + {"file_size", std::make_shared(std::make_shared()), "File size of the file to which current file segment belongs"}, }; } diff --git a/src/Storages/System/StorageSystemGraphite.cpp b/src/Storages/System/StorageSystemGraphite.cpp index a638a08fac7..eaa386763c2 100644 --- a/src/Storages/System/StorageSystemGraphite.cpp +++ b/src/Storages/System/StorageSystemGraphite.cpp @@ -11,7 +11,11 @@ ColumnsDescription StorageSystemGraphite::getColumnsDescription() return ColumnsDescription { {"config_name", std::make_shared(), "graphite_rollup parameter name."}, - {"rule_type", std::make_shared(), ""}, + {"rule_type", std::make_shared(), + "The rule type. Possible values: RuleTypeAll = 0 - default, with regex, compatible with old scheme; " + "RuleTypePlain = 1 - plain metrics, with regex, compatible with old scheme; " + "RuleTypeTagged = 2 - tagged metrics, with regex, compatible with old scheme; " + "RuleTypeTagList = 3 - tagged metrics, with regex (converted to RuleTypeTagged from string like 'retention=10min ; env=(staging|prod)')"}, {"regexp", std::make_shared(), "A pattern for the metric name."}, {"function", std::make_shared(), "The name of the aggregating function."}, {"age", std::make_shared(), "The minimum age of the data in seconds."}, diff --git a/src/Storages/System/StorageSystemJemalloc.cpp b/src/Storages/System/StorageSystemJemalloc.cpp index 15543208dd9..b55e32c479c 100644 --- a/src/Storages/System/StorageSystemJemalloc.cpp +++ b/src/Storages/System/StorageSystemJemalloc.cpp @@ -115,7 +115,7 @@ Pipe StorageSystemJemallocBins::read( { storage_snapshot->check(column_names); - auto header = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtuals()); + auto header = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtualsList()); MutableColumns res_columns = header.cloneEmptyColumns(); fillJemallocBins(res_columns); diff --git a/src/Storages/System/StorageSystemKafkaConsumers.cpp b/src/Storages/System/StorageSystemKafkaConsumers.cpp index 6c9b1681c8b..86713632339 100644 --- a/src/Storages/System/StorageSystemKafkaConsumers.cpp +++ b/src/Storages/System/StorageSystemKafkaConsumers.cpp @@ -41,8 +41,8 @@ ColumnsDescription StorageSystemKafkaConsumers::getColumnsDescription() {"last_rebalance_time", std::make_shared(), "Timestamp of the most recent Kafka rebalance."}, {"num_rebalance_revocations", std::make_shared(), "Number of times the consumer was revoked its partitions."}, {"num_rebalance_assignments", std::make_shared(), "Number of times the consumer was assigned to Kafka cluster."}, - {"is_currently_used", std::make_shared(), "Consumer is in use."}, - {"last_used", std::make_shared(6)}, + {"is_currently_used", std::make_shared(), "The flag which shows whether the consumer is in use."}, + {"last_used", std::make_shared(6), "The last time this consumer was in use."}, {"rdkafka_stat", std::make_shared(), "Library internal statistic. Set statistics_interval_ms to 0 disable, default is 3000 (once in three seconds)."}, }; } diff --git a/src/Storages/System/StorageSystemMerges.cpp b/src/Storages/System/StorageSystemMerges.cpp index fac653e524e..0fca5dc84a2 100644 --- a/src/Storages/System/StorageSystemMerges.cpp +++ b/src/Storages/System/StorageSystemMerges.cpp @@ -16,12 +16,12 @@ ColumnsDescription StorageSystemMerges::getColumnsDescription() {"elapsed", std::make_shared(), "The time elapsed (in seconds) since the merge started."}, {"progress", std::make_shared(), "The percentage of completed work from 0 to 1."}, {"num_parts", std::make_shared(), "The number of parts to be merged."}, - {"source_part_names", std::make_shared(std::make_shared()), ""}, + {"source_part_names", std::make_shared(std::make_shared()), "The list of source parts names."}, {"result_part_name", std::make_shared(), "The name of the part that will be formed as the result of merging."}, - {"source_part_paths", std::make_shared(std::make_shared()), ""}, - {"result_part_path", std::make_shared(), ""}, - {"partition_id", std::make_shared()}, - {"partition", std::make_shared()}, + {"source_part_paths", std::make_shared(std::make_shared()), "The list of paths for each source part."}, + {"result_part_path", std::make_shared(), "The path of the part that will be formed as the result of merging."}, + {"partition_id", std::make_shared(), "The identifier of the partition where the merge is happening."}, + {"partition", std::make_shared(), "The name of the partition"}, {"is_mutation", std::make_shared(), "1 if this process is a part mutation."}, {"total_size_bytes_compressed", std::make_shared(), "The total size of the compressed data in the merged chunks."}, {"total_size_bytes_uncompressed", std::make_shared(), "The total size of compressed data in the merged chunks."}, diff --git a/src/Storages/System/StorageSystemModels.cpp b/src/Storages/System/StorageSystemModels.cpp index e715238ddd4..8960d0625ba 100644 --- a/src/Storages/System/StorageSystemModels.cpp +++ b/src/Storages/System/StorageSystemModels.cpp @@ -13,13 +13,12 @@ namespace DB ColumnsDescription StorageSystemModels::getColumnsDescription() { - /// TODO: Fill in all the comments. return ColumnsDescription { - { "model_path", std::make_shared() }, - { "type", std::make_shared() }, - { "loading_start_time", std::make_shared() }, - { "loading_duration", std::make_shared() }, + { "model_path", std::make_shared(), "Path to trained model."}, + { "type", std::make_shared(), "Model type. Now catboost only."}, + { "loading_start_time", std::make_shared(), "The time when the loading of the model started."}, + { "loading_duration", std::make_shared(), "How much time did it take to load the model."}, }; } diff --git a/src/Storages/System/StorageSystemMutations.cpp b/src/Storages/System/StorageSystemMutations.cpp index 7d263d9468d..60b80e0b0ad 100644 --- a/src/Storages/System/StorageSystemMutations.cpp +++ b/src/Storages/System/StorageSystemMutations.cpp @@ -38,7 +38,7 @@ ColumnsDescription StorageSystemMutations::getColumnsDescription() "1 if the mutation is completed, " "0 if the mutation is still in process. " }, - { "is_killed", std::make_shared() }, + { "is_killed", std::make_shared(), "Only available in ClickHouse Cloud."}, { "latest_failed_part", std::make_shared(), "The name of the most recent part that could not be mutated."}, { "latest_fail_time", std::make_shared(), "The date and time of the most recent part mutation failure."}, { "latest_fail_reason", std::make_shared(), "The exception message that caused the most recent part mutation failure."}, diff --git a/src/Storages/System/StorageSystemNumbers.cpp b/src/Storages/System/StorageSystemNumbers.cpp index c5d8b307368..10898f79d10 100644 --- a/src/Storages/System/StorageSystemNumbers.cpp +++ b/src/Storages/System/StorageSystemNumbers.cpp @@ -19,6 +19,8 @@ StorageSystemNumbers::StorageSystemNumbers(const StorageID & table_id, bool mult : IStorage(table_id), multithreaded(multithreaded_), limit(limit_), offset(offset_) { StorageInMemoryMetadata storage_metadata; + /// This column doesn't have a comment, because otherwise it will be added to all the tables which were created via + /// CREATE TABLE test as numbers(5) storage_metadata.setColumns(ColumnsDescription({{"number", std::make_shared()}})); setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemOne.cpp b/src/Storages/System/StorageSystemOne.cpp index 3091ffdb51a..936d55e61a0 100644 --- a/src/Storages/System/StorageSystemOne.cpp +++ b/src/Storages/System/StorageSystemOne.cpp @@ -15,6 +15,8 @@ StorageSystemOne::StorageSystemOne(const StorageID & table_id_) : IStorage(table_id_) { StorageInMemoryMetadata storage_metadata; + /// This column doesn't have a comment, because otherwise it will be added to all tables created via: + /// CREATE TABLE test (dummy UInt8) ENGINE = Distributed(`default`, `system.one`) storage_metadata.setColumns(ColumnsDescription({{"dummy", std::make_shared()}})); setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemPartMovesBetweenShards.cpp b/src/Storages/System/StorageSystemPartMovesBetweenShards.cpp index 9dd2ba0b156..9cba92bca12 100644 --- a/src/Storages/System/StorageSystemPartMovesBetweenShards.cpp +++ b/src/Storages/System/StorageSystemPartMovesBetweenShards.cpp @@ -18,28 +18,27 @@ namespace DB ColumnsDescription StorageSystemPartMovesBetweenShards::getColumnsDescription() { - /// TODO: Fill in all the comments return ColumnsDescription { /// Table properties. - { "database", std::make_shared() }, - { "table", std::make_shared() }, + { "database", std::make_shared(), "The name of the database where move is performed."}, + { "table", std::make_shared(), "The name of the table where move is performed."}, /// Constant element properties. - { "task_name", std::make_shared() }, - { "task_uuid", std::make_shared() }, - { "create_time", std::make_shared() }, - { "part_name", std::make_shared() }, - { "part_uuid", std::make_shared() }, - { "to_shard", std::make_shared() }, - { "dst_part_name", std::make_shared() }, + { "task_name", std::make_shared(), "The name of the moving task."}, + { "task_uuid", std::make_shared(), "The identifier of the moving task."}, + { "create_time", std::make_shared(), "The time when the task was created."}, + { "part_name", std::make_shared(), "The name of the part which is in a process of moving."}, + { "part_uuid", std::make_shared(), "The UUID of the part which is in a process of moving."}, + { "to_shard", std::make_shared(), "The name of the destination shard."}, + { "dst_part_name", std::make_shared(), "The result part name."}, /// Processing status of item. - { "update_time", std::make_shared() }, - { "state", std::make_shared() }, - { "rollback", std::make_shared() }, - { "num_tries", std::make_shared() }, - { "last_exception", std::make_shared() }, + { "update_time", std::make_shared(), "The last time update was performed."}, + { "state", std::make_shared(), "The current state of the move."}, + { "rollback", std::make_shared(), "The flag which indicated whether the operation was rolled back."}, + { "num_tries", std::make_shared(), "The number of tries to complete the operation."}, + { "last_exception", std::make_shared(), "The last exception name if any."}, }; } diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index b6e4ee4161e..1b800fd64a9 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -94,33 +94,33 @@ StorageSystemParts::StorageSystemParts(const StorageID & table_id_) {"move_ttl_info.min", std::make_shared(std::make_shared()), "Array of date and time values. Each element describes the minimum key value for a TTL MOVE rule."}, {"move_ttl_info.max", std::make_shared(std::make_shared()), "Array of date and time values. Each element describes the maximum key value for a TTL MOVE rule."}, - {"default_compression_codec", std::make_shared()}, + {"default_compression_codec", std::make_shared(), "The name of the codec used to compress this data part (in case when there is no explicit codec for columns)."}, - {"recompression_ttl_info.expression", std::make_shared(std::make_shared())}, - {"recompression_ttl_info.min", std::make_shared(std::make_shared())}, - {"recompression_ttl_info.max", std::make_shared(std::make_shared())}, + {"recompression_ttl_info.expression", std::make_shared(std::make_shared()), "The TTL expression."}, + {"recompression_ttl_info.min", std::make_shared(std::make_shared()), "The minimum value of the calculated TTL expression within this part. Used to understand whether we have at least one row with expired TTL."}, + {"recompression_ttl_info.max", std::make_shared(std::make_shared()), "The maximum value of the calculated TTL expression within this part. Used to understand whether we have all rows with expired TTL."}, - {"group_by_ttl_info.expression", std::make_shared(std::make_shared())}, - {"group_by_ttl_info.min", std::make_shared(std::make_shared())}, - {"group_by_ttl_info.max", std::make_shared(std::make_shared())}, + {"group_by_ttl_info.expression", std::make_shared(std::make_shared()), "The TTL expression."}, + {"group_by_ttl_info.min", std::make_shared(std::make_shared()), "The minimum value of the calculated TTL expression within this part. Used to understand whether we have at least one row with expired TTL."}, + {"group_by_ttl_info.max", std::make_shared(std::make_shared()), "The maximum value of the calculated TTL expression within this part. Used to understand whether we have all rows with expired TTL."}, - {"rows_where_ttl_info.expression", std::make_shared(std::make_shared())}, - {"rows_where_ttl_info.min", std::make_shared(std::make_shared())}, - {"rows_where_ttl_info.max", std::make_shared(std::make_shared())}, + {"rows_where_ttl_info.expression", std::make_shared(std::make_shared()), "The TTL expression."}, + {"rows_where_ttl_info.min", std::make_shared(std::make_shared()), "The minimum value of the calculated TTL expression within this part. Used to understand whether we have at least one row with expired TTL."}, + {"rows_where_ttl_info.max", std::make_shared(std::make_shared()), "The maximum value of the calculated TTL expression within this part. Used to understand whether we have all rows with expired TTL."}, - {"projections", std::make_shared(std::make_shared())}, + {"projections", std::make_shared(std::make_shared()), "The list of projection names calculated for this part."}, - {"visible", std::make_shared()}, - {"creation_tid", getTransactionIDDataType()}, - {"removal_tid_lock", std::make_shared()}, - {"removal_tid", getTransactionIDDataType()}, - {"creation_csn", std::make_shared()}, - {"removal_csn", std::make_shared()}, + {"visible", std::make_shared(), "Flag which indicated whether this part is visible for SELECT queries."}, + {"creation_tid", getTransactionIDDataType(), "ID of transaction that has created/is trying to create this object."}, + {"removal_tid_lock", std::make_shared(), "Hash of removal_tid, used to lock an object for removal."}, + {"removal_tid", getTransactionIDDataType(), "ID of transaction that has removed/is trying to remove this object"}, + {"creation_csn", std::make_shared(), "CSN of transaction that has created this object"}, + {"removal_csn", std::make_shared(), "CSN of transaction that has removed this object"}, - {"has_lightweight_delete", std::make_shared()}, + {"has_lightweight_delete", std::make_shared(), "The flag which indicated whether the part has lightweight delete mask."}, - {"last_removal_attempt_time", std::make_shared()}, - {"removal_state", std::make_shared()}, + {"last_removal_attempt_time", std::make_shared(), "The last time the server tried to delete this part."}, + {"removal_state", std::make_shared(), "The current state of part removal process."}, } ) { diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index f31dd5a955e..6bdfdd357e8 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -325,12 +325,10 @@ StorageSystemPartsBase::StorageSystemPartsBase(const StorageID & table_id_, Colu StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns); setInMemoryMetadata(storage_metadata); + + VirtualColumnsDescription virtuals; + virtuals.addEphemeral("_state", std::make_shared(), ""); + setVirtuals(std::move(virtuals)); } -NamesAndTypesList StorageSystemPartsBase::getVirtuals() const -{ - return NamesAndTypesList{ - NameAndTypePair("_state", std::make_shared()) - }; -} } diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h index b5d4a74e58b..0527a99b841 100644 --- a/src/Storages/System/StorageSystemPartsBase.h +++ b/src/Storages/System/StorageSystemPartsBase.h @@ -133,8 +133,6 @@ public: size_t max_block_size, size_t num_streams) override; - NamesAndTypesList getVirtuals() const override; - bool isSystemStorage() const override { return true; } private: diff --git a/src/Storages/System/StorageSystemPartsColumns.cpp b/src/Storages/System/StorageSystemPartsColumns.cpp index 833a5e1ec16..f34b0e0cfda 100644 --- a/src/Storages/System/StorageSystemPartsColumns.cpp +++ b/src/Storages/System/StorageSystemPartsColumns.cpp @@ -22,59 +22,60 @@ namespace DB StorageSystemPartsColumns::StorageSystemPartsColumns(const StorageID & table_id_) : StorageSystemPartsBase(table_id_, ColumnsDescription{ - {"partition", std::make_shared()}, - {"name", std::make_shared()}, - {"uuid", std::make_shared()}, - {"part_type", std::make_shared()}, - {"active", std::make_shared()}, - {"marks", std::make_shared()}, - {"rows", std::make_shared()}, - {"bytes_on_disk", std::make_shared()}, - {"data_compressed_bytes", std::make_shared()}, - {"data_uncompressed_bytes", std::make_shared()}, - {"marks_bytes", std::make_shared()}, - {"modification_time", std::make_shared()}, - {"remove_time", std::make_shared()}, - {"refcount", std::make_shared()}, - {"min_date", std::make_shared()}, - {"max_date", std::make_shared()}, - {"min_time", std::make_shared()}, - {"max_time", std::make_shared()}, - {"partition_id", std::make_shared()}, - {"min_block_number", std::make_shared()}, - {"max_block_number", std::make_shared()}, - {"level", std::make_shared()}, - {"data_version", std::make_shared()}, - {"primary_key_bytes_in_memory", std::make_shared()}, - {"primary_key_bytes_in_memory_allocated", std::make_shared()}, + {"partition", std::make_shared(), "The partition name."}, + {"name", std::make_shared(), "Name of the data part."}, + {"uuid", std::make_shared(), "The parts UUID."}, + {"part_type", std::make_shared(), "The data part storing format. " + "Possible values: Wide — Each column is stored in a separate file in a filesystem, Compact — All columns are stored in one file in a filesystem."}, + {"active", std::make_shared(), "Flag that indicates whether the data part is active. If a data part is active, it’s used in a table. Otherwise, it’s deleted. Inactive data parts remain after merging."}, + {"marks", std::make_shared(), "The number of marks. To get the approximate number of rows in a data part, multiply marks by the index granularity (usually 8192) (this hint does not work for adaptive granularity)."}, + {"rows", std::make_shared(), "The number of rows."}, + {"bytes_on_disk", std::make_shared(), "Total size of all the data part files in bytes."}, + {"data_compressed_bytes", std::make_shared(), "Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included."}, + {"data_uncompressed_bytes", std::make_shared(), "Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included."}, + {"marks_bytes", std::make_shared(),"The size of the file with marks."}, + {"modification_time", std::make_shared(), "The time the directory with the data part was modified. This usually corresponds to the time of data part creation."}, + {"remove_time", std::make_shared(), "The time when the data part became inactive."}, + {"refcount", std::make_shared(), "The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges."}, + {"min_date", std::make_shared(), "The minimum value for the Date column if that is included in the partition key."}, + {"max_date", std::make_shared(), "The maximum value for the Date column if that is included in the partition key."}, + {"min_time", std::make_shared(), "The minimum value for the DateTime column if that is included in the partition key."}, + {"max_time", std::make_shared(), "The maximum value for the DateTime column if that is included in the partition key."}, + {"partition_id", std::make_shared(), "ID of the partition."}, + {"min_block_number", std::make_shared(), "The minimum number of data parts that make up the current part after merging."}, + {"max_block_number", std::make_shared(), "The maximum number of data parts that make up the current part after merging."}, + {"level", std::make_shared(), "Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts."}, + {"data_version", std::make_shared(), "Number that is used to determine which mutations should be applied to the data part (mutations with a version higher than data_version)."}, + {"primary_key_bytes_in_memory", std::make_shared(), "The amount of memory (in bytes) used by primary key values."}, + {"primary_key_bytes_in_memory_allocated", std::make_shared(), "The amount of memory (in bytes) reserved for primary key values."}, - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"engine", std::make_shared()}, - {"disk_name", std::make_shared()}, - {"path", std::make_shared()}, + {"database", std::make_shared(), "Name of the database."}, + {"table", std::make_shared(), "Name of the table."}, + {"engine", std::make_shared(), "Name of the table engine without parameters."}, + {"disk_name", std::make_shared(), "Name of a disk that stores the data part."}, + {"path", std::make_shared(), "Absolute path to the folder with data part files."}, - {"column", std::make_shared()}, - {"type", std::make_shared()}, - {"column_position", std::make_shared()}, - {"default_kind", std::make_shared()}, - {"default_expression", std::make_shared()}, - {"column_bytes_on_disk", std::make_shared()}, - {"column_data_compressed_bytes", std::make_shared()}, - {"column_data_uncompressed_bytes", std::make_shared()}, - {"column_marks_bytes", std::make_shared()}, - {"column_modification_time", std::make_shared(std::make_shared())}, + {"column", std::make_shared(), "Name of the column."}, + {"type", std::make_shared(), "Column type."}, + {"column_position", std::make_shared(), "Ordinal position of a column in a table starting with 1."}, + {"default_kind", std::make_shared(), "Expression type (DEFAULT, MATERIALIZED, ALIAS) for the default value, or an empty string if it is not defined."}, + {"default_expression", std::make_shared(), "Expression for the default value, or an empty string if it is not defined."}, + {"column_bytes_on_disk", std::make_shared(), "Total size of the column in bytes."}, + {"column_data_compressed_bytes", std::make_shared(), "Total size of the compressed data in the column, in bytes."}, + {"column_data_uncompressed_bytes", std::make_shared(), "Total size of the decompressed data in the column, in bytes."}, + {"column_marks_bytes", std::make_shared(), "The size of the marks for column, in bytes."}, + {"column_modification_time", std::make_shared(std::make_shared()), "The last time the column was modified."}, - {"serialization_kind", std::make_shared()}, - {"substreams", std::make_shared(std::make_shared())}, - {"filenames", std::make_shared(std::make_shared())}, - {"subcolumns.names", std::make_shared(std::make_shared())}, - {"subcolumns.types", std::make_shared(std::make_shared())}, - {"subcolumns.serializations", std::make_shared(std::make_shared())}, - {"subcolumns.bytes_on_disk", std::make_shared(std::make_shared())}, - {"subcolumns.data_compressed_bytes", std::make_shared(std::make_shared())}, - {"subcolumns.data_uncompressed_bytes", std::make_shared(std::make_shared())}, - {"subcolumns.marks_bytes", std::make_shared(std::make_shared())}, + {"serialization_kind", std::make_shared(), "Kind of serialization of a column"}, + {"substreams", std::make_shared(std::make_shared()), "Names of substreams to which column is serialized"}, + {"filenames", std::make_shared(std::make_shared()), "Names of files for each substream of a column respectively"}, + {"subcolumns.names", std::make_shared(std::make_shared()), "Names of subcolumns of a column"}, + {"subcolumns.types", std::make_shared(std::make_shared()), "Types of subcolumns of a column"}, + {"subcolumns.serializations", std::make_shared(std::make_shared()), "Kinds of serialization of subcolumns of a column"}, + {"subcolumns.bytes_on_disk", std::make_shared(std::make_shared()), "Sizes in bytes for each subcolumn"}, + {"subcolumns.data_compressed_bytes", std::make_shared(std::make_shared()), "Sizes of the compressed data for each subcolumn, in bytes"}, + {"subcolumns.data_uncompressed_bytes", std::make_shared(std::make_shared()), "Sizes of the decompressed data for each subcolumn, in bytes"}, + {"subcolumns.marks_bytes", std::make_shared(std::make_shared()), "Sizes of the marks for each subcolumn of a column, in bytes"}, } ) { diff --git a/src/Storages/System/StorageSystemPrivileges.cpp b/src/Storages/System/StorageSystemPrivileges.cpp index 4d749ab0815..58dcf62115e 100644 --- a/src/Storages/System/StorageSystemPrivileges.cpp +++ b/src/Storages/System/StorageSystemPrivileges.cpp @@ -68,13 +68,21 @@ const std::vector> & StorageSystemPrivileges::getAccess ColumnsDescription StorageSystemPrivileges::getColumnsDescription() { - /// TODO: Fill in all the comments. - return ColumnsDescription - { - {"privilege", std::make_shared(getAccessTypeEnumValues())}, - {"aliases", std::make_shared(std::make_shared())}, - {"level", std::make_shared(std::make_shared(getLevelEnumValues()))}, - {"parent_group", std::make_shared(std::make_shared(getAccessTypeEnumValues()))}, + return ColumnsDescription{ + {"privilege", + std::make_shared(getAccessTypeEnumValues()), + "Name of a privilege which can be used in the GRANT command."}, + {"aliases", + std::make_shared(std::make_shared()), + "List of aliases which can be used instead of the name of the privilege."}, + {"level", + std::make_shared(std::make_shared(getLevelEnumValues())), + "Level of the privilege. GLOBAL privileges can be granted only globally (ON *.*), DATABASE privileges can be granted " + "on a specific database (ON .*) or globally (ON *.*), TABLE privileges can be granted either on a specific table or " + "on a specific database or globally, and COLUMN privileges can be granted like TABLE privileges but also allow to specify columns."}, + {"parent_group", std::make_shared(std::make_shared(getAccessTypeEnumValues())), + "Parent privilege - if the parent privilege is granted then all its children privileges are considered as granted too." + }, }; } diff --git a/src/Storages/System/StorageSystemProcesses.cpp b/src/Storages/System/StorageSystemProcesses.cpp index b6fd1aabd45..bef98e59687 100644 --- a/src/Storages/System/StorageSystemProcesses.cpp +++ b/src/Storages/System/StorageSystemProcesses.cpp @@ -21,35 +21,35 @@ ColumnsDescription StorageSystemProcesses::getColumnsDescription() { auto description = ColumnsDescription { - {"is_initial_query", std::make_shared()}, + {"is_initial_query", std::make_shared(), "Whether this query comes directly from user or was issues by ClickHouse server in a scope of distributed query execution."}, {"user", std::make_shared(), "The user who made the query. Keep in mind that for distributed processing, queries are sent to remote servers under the default user. The field contains the username for a specific query, not for a query that this query initiated."}, {"query_id", std::make_shared(), "Query ID, if defined."}, - {"address", DataTypeFactory::instance().get("IPv6"), "The IP address the request was made from. The same for distributed processing. To track where a distributed query was originally made from, look at system.processes on the query requestor server."}, - {"port", std::make_shared()}, + {"address", DataTypeFactory::instance().get("IPv6"), "The IP address the query was made from. The same for distributed processing. To track where a distributed query was originally made from, look at system.processes on the query requestor server."}, + {"port", std::make_shared(), "The client port the query was made from."}, - {"initial_user", std::make_shared()}, - {"initial_query_id", std::make_shared()}, - {"initial_address", DataTypeFactory::instance().get("IPv6")}, - {"initial_port", std::make_shared()}, + {"initial_user", std::make_shared(), "Name of the user who ran the initial query (for distributed query execution)."}, + {"initial_query_id", std::make_shared(), "ID of the initial query (for distributed query execution)."}, + {"initial_address", DataTypeFactory::instance().get("IPv6"), "IP address that the parent query was launched from."}, + {"initial_port", std::make_shared(), "The client port that was used to make the parent query."}, - {"interface", std::make_shared()}, + {"interface", std::make_shared(), "The interface which was used to send the query. TCP = 1, HTTP = 2, GRPC = 3, MYSQL = 4, POSTGRESQL = 5, LOCAL = 6, TCP_INTERSERVER = 7."}, - {"os_user", std::make_shared()}, - {"client_hostname", std::make_shared()}, - {"client_name", std::make_shared()}, - {"client_revision", std::make_shared()}, - {"client_version_major", std::make_shared()}, - {"client_version_minor", std::make_shared()}, - {"client_version_patch", std::make_shared()}, + {"os_user", std::make_shared(), "Operating system username who runs clickhouse-client."}, + {"client_hostname", std::make_shared(), "Hostname of the client machine where the clickhouse-client or another TCP client is run."}, + {"client_name", std::make_shared(), "The clickhouse-client or another TCP client name."}, + {"client_revision", std::make_shared(), "Revision of the clickhouse-client or another TCP client."}, + {"client_version_major", std::make_shared(), "Major version of the clickhouse-client or another TCP client."}, + {"client_version_minor", std::make_shared(), "Minor version of the clickhouse-client or another TCP client."}, + {"client_version_patch", std::make_shared(), "Patch component of the clickhouse-client or another TCP client version."}, - {"http_method", std::make_shared()}, - {"http_user_agent", std::make_shared()}, - {"http_referer", std::make_shared()}, - {"forwarded_for", std::make_shared()}, + {"http_method", std::make_shared(), "HTTP method that initiated the query. Possible values: 0 — The query was launched from the TCP interface. 1 — GET method was used. 2 — POST method was used."}, + {"http_user_agent", std::make_shared(), "HTTP header UserAgent passed in the HTTP query."}, + {"http_referer", std::make_shared(), "HTTP header Referer passed in the HTTP query (contains an absolute or partial address of the page making the query)."}, + {"forwarded_for", std::make_shared(), "HTTP header X-Forwarded-For passed in the HTTP query."}, - {"quota_key", std::make_shared()}, - {"distributed_depth", std::make_shared()}, + {"quota_key", std::make_shared(), "The quota key specified in the quotas setting (see keyed)."}, + {"distributed_depth", std::make_shared(), "The number of times query was retransmitted between server nodes internally."}, {"elapsed", std::make_shared(), "The time in seconds since request execution started."}, {"is_cancelled", std::make_shared(), "Was query cancelled."}, @@ -57,18 +57,18 @@ ColumnsDescription StorageSystemProcesses::getColumnsDescription() {"read_rows", std::make_shared(), "The number of rows read from the table. For distributed processing, on the requestor server, this is the total for all remote servers."}, {"read_bytes", std::make_shared(), "The number of uncompressed bytes read from the table. For distributed processing, on the requestor server, this is the total for all remote servers."}, {"total_rows_approx", std::make_shared(), "The approximation of the total number of rows that should be read. For distributed processing, on the requestor server, this is the total for all remote servers. It can be updated during request processing, when new sources to process become known."}, - {"written_rows", std::make_shared()}, - {"written_bytes", std::make_shared()}, + {"written_rows", std::make_shared(), "The amount of rows written to the storage."}, + {"written_bytes", std::make_shared(), "The amount of bytes written to the storage."}, {"memory_usage", std::make_shared(), "Amount of RAM the query uses. It might not include some types of dedicated memory"}, - {"peak_memory_usage", std::make_shared()}, + {"peak_memory_usage", std::make_shared(), "The current peak of memory usage."}, {"query", std::make_shared(), "The query text. For INSERT, it does not include the data to insert."}, - {"query_kind", std::make_shared()}, + {"query_kind", std::make_shared(), "The type of the query - SELECT, INSERT, etc."}, - {"thread_ids", std::make_shared(std::make_shared())}, - {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared())}, - {"Settings", std::make_shared(std::make_shared(), std::make_shared())}, + {"thread_ids", std::make_shared(std::make_shared()), "The list of identificators of all threads which executed this query."}, + {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared()), "ProfileEvents calculated for this query."}, + {"Settings", std::make_shared(std::make_shared(), std::make_shared()), "The list of modified user-level settings."}, - {"current_database", std::make_shared()}, + {"current_database", std::make_shared(), "The name of the current database."}, }; description.setAliases({ diff --git a/src/Storages/System/StorageSystemProjectionParts.cpp b/src/Storages/System/StorageSystemProjectionParts.cpp index 016705f4e66..4bdcea67313 100644 --- a/src/Storages/System/StorageSystemProjectionParts.cpp +++ b/src/Storages/System/StorageSystemProjectionParts.cpp @@ -19,71 +19,71 @@ namespace DB StorageSystemProjectionParts::StorageSystemProjectionParts(const StorageID & table_id_) : StorageSystemPartsBase(table_id_, ColumnsDescription{ - {"partition", std::make_shared()}, - {"name", std::make_shared()}, - {"part_type", std::make_shared()}, - {"parent_name", std::make_shared()}, - {"parent_uuid", std::make_shared()}, - {"parent_part_type", std::make_shared()}, - {"active", std::make_shared()}, - {"marks", std::make_shared()}, - {"rows", std::make_shared()}, - {"bytes_on_disk", std::make_shared()}, - {"data_compressed_bytes", std::make_shared()}, - {"data_uncompressed_bytes", std::make_shared()}, - {"marks_bytes", std::make_shared()}, - {"parent_marks", std::make_shared()}, - {"parent_rows", std::make_shared()}, - {"parent_bytes_on_disk", std::make_shared()}, - {"parent_data_compressed_bytes", std::make_shared()}, - {"parent_data_uncompressed_bytes", std::make_shared()}, - {"parent_marks_bytes", std::make_shared()}, - {"modification_time", std::make_shared()}, - {"remove_time", std::make_shared()}, - {"refcount", std::make_shared()}, - {"min_date", std::make_shared()}, - {"max_date", std::make_shared()}, - {"min_time", std::make_shared()}, - {"max_time", std::make_shared()}, - {"partition_id", std::make_shared()}, - {"min_block_number", std::make_shared()}, - {"max_block_number", std::make_shared()}, - {"level", std::make_shared()}, - {"data_version", std::make_shared()}, - {"primary_key_bytes_in_memory", std::make_shared()}, - {"primary_key_bytes_in_memory_allocated", std::make_shared()}, - {"is_frozen", std::make_shared()}, + {"partition", std::make_shared(), "The partition name."}, + {"name", std::make_shared(), "Name of the data part."}, + {"part_type", std::make_shared(), "The data part storing format. Possible Values: Wide (a file per column) and Compact (a single file for all columns)."}, + {"parent_name", std::make_shared(), "The name of the source (parent) data part."}, + {"parent_uuid", std::make_shared(), "The UUID of the source (parent) data part."}, + {"parent_part_type", std::make_shared(), "The source (parent) data part storing format."}, + {"active", std::make_shared(), "Flag that indicates whether the data part is active. If a data part is active, it's used in a table. Otherwise, it's about to be deleted. Inactive data parts appear after merging and mutating operations."}, + {"marks", std::make_shared(), "The number of marks. To get the approximate number of rows in a data part, multiply marks by the index granularity (usually 8192) (this hint does not work for adaptive granularity)."}, + {"rows", std::make_shared(), "The number of rows."}, + {"bytes_on_disk", std::make_shared(), "Total size of all the data part files in bytes."}, + {"data_compressed_bytes", std::make_shared(), "Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included."}, + {"data_uncompressed_bytes", std::make_shared(), "Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included."}, + {"marks_bytes", std::make_shared(), "The size of the file with marks."}, + {"parent_marks", std::make_shared(), "The number of marks in the source (parent) part."}, + {"parent_rows", std::make_shared(), "The number of rows in the source (parent) part."}, + {"parent_bytes_on_disk", std::make_shared(), "Total size of all the source (parent) data part files in bytes."}, + {"parent_data_compressed_bytes", std::make_shared(), "Total size of compressed data in the source (parent) data part."}, + {"parent_data_uncompressed_bytes", std::make_shared(), "Total size of uncompressed data in the source (parent) data part."}, + {"parent_marks_bytes", std::make_shared(), "The size of the file with marks in the source (parent) data part."}, + {"modification_time", std::make_shared(), "The time the directory with the data part was modified. This usually corresponds to the time of data part creation."}, + {"remove_time", std::make_shared(), "The time when the data part became inactive."}, + {"refcount", std::make_shared(), "The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges."}, + {"min_date", std::make_shared(), "The minimum value of the date key in the data part."}, + {"max_date", std::make_shared(), "The maximum value of the date key in the data part."}, + {"min_time", std::make_shared(), "The minimum value of the date and time key in the data part."}, + {"max_time", std::make_shared(), "The maximum value of the date and time key in the data part."}, + {"partition_id", std::make_shared(), "ID of the partition."}, + {"min_block_number", std::make_shared(), "The minimum number of data parts that make up the current part after merging."}, + {"max_block_number", std::make_shared(), "The maximum number of data parts that make up the current part after merging."}, + {"level", std::make_shared(), "Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts."}, + {"data_version", std::make_shared(), "Number that is used to determine which mutations should be applied to the data part (mutations with a version higher than data_version)."}, + {"primary_key_bytes_in_memory", std::make_shared(), "The amount of memory (in bytes) used by primary key values."}, + {"primary_key_bytes_in_memory_allocated", std::make_shared(), "The amount of memory (in bytes) reserved for primary key values."}, + {"is_frozen", std::make_shared(), "Flag that shows that a partition data backup exists. 1, the backup exists. 0, the backup does not exist. "}, - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"engine", std::make_shared()}, - {"disk_name", std::make_shared()}, - {"path", std::make_shared()}, + {"database", std::make_shared(), "Name of the database."}, + {"table", std::make_shared(), "Name of the table."}, + {"engine", std::make_shared(), "Name of the table engine without parameters."}, + {"disk_name", std::make_shared(), "Name of a disk that stores the data part."}, + {"path", std::make_shared(), "Absolute path to the folder with data part files."}, - {"hash_of_all_files", std::make_shared()}, - {"hash_of_uncompressed_files", std::make_shared()}, - {"uncompressed_hash_of_compressed_files", std::make_shared()}, + {"hash_of_all_files", std::make_shared(), "sipHash128 of compressed files."}, + {"hash_of_uncompressed_files", std::make_shared(), "sipHash128 of uncompressed files (files with marks, index file etc.)."}, + {"uncompressed_hash_of_compressed_files", std::make_shared(), "sipHash128 of data in the compressed files as if they were uncompressed."}, - {"delete_ttl_info_min", std::make_shared()}, - {"delete_ttl_info_max", std::make_shared()}, + {"delete_ttl_info_min", std::make_shared(), "The minimum value of the date and time key for TTL DELETE rule."}, + {"delete_ttl_info_max", std::make_shared(), "The maximum value of the date and time key for TTL DELETE rule."}, - {"move_ttl_info.expression", std::make_shared(std::make_shared())}, - {"move_ttl_info.min", std::make_shared(std::make_shared())}, - {"move_ttl_info.max", std::make_shared(std::make_shared())}, + {"move_ttl_info.expression", std::make_shared(std::make_shared()), "Array of expressions. Each expression defines a TTL MOVE rule."}, + {"move_ttl_info.min", std::make_shared(std::make_shared()), "Array of date and time values. Each element describes the minimum key value for a TTL MOVE rule."}, + {"move_ttl_info.max", std::make_shared(std::make_shared()), "Array of date and time values. Each element describes the maximum key value for a TTL MOVE rule."}, - {"default_compression_codec", std::make_shared()}, + {"default_compression_codec", std::make_shared(), "The name of the codec used to compress this data part (in case when there is no explicit codec for columns)."}, - {"recompression_ttl_info.expression", std::make_shared(std::make_shared())}, - {"recompression_ttl_info.min", std::make_shared(std::make_shared())}, - {"recompression_ttl_info.max", std::make_shared(std::make_shared())}, + {"recompression_ttl_info.expression", std::make_shared(std::make_shared()), "The TTL expression."}, + {"recompression_ttl_info.min", std::make_shared(std::make_shared()), "The minimum value of the calculated TTL expression within this part. Used to understand whether we have at least one row with expired TTL."}, + {"recompression_ttl_info.max", std::make_shared(std::make_shared()), "The maximum value of the calculated TTL expression within this part. Used to understand whether we have all rows with expired TTL."}, - {"group_by_ttl_info.expression", std::make_shared(std::make_shared())}, - {"group_by_ttl_info.min", std::make_shared(std::make_shared())}, - {"group_by_ttl_info.max", std::make_shared(std::make_shared())}, + {"group_by_ttl_info.expression", std::make_shared(std::make_shared()), "The TTL expression."}, + {"group_by_ttl_info.min", std::make_shared(std::make_shared()), "The minimum value of the calculated TTL expression within this part. Used to understand whether we have at least one row with expired TTL."}, + {"group_by_ttl_info.max", std::make_shared(std::make_shared()), "The maximum value of the calculated TTL expression within this part. Used to understand whether we have all rows with expired TTL."}, - {"rows_where_ttl_info.expression", std::make_shared(std::make_shared())}, - {"rows_where_ttl_info.min", std::make_shared(std::make_shared())}, - {"rows_where_ttl_info.max", std::make_shared(std::make_shared())} + {"rows_where_ttl_info.expression", std::make_shared(std::make_shared()), "The TTL expression."}, + {"rows_where_ttl_info.min", std::make_shared(std::make_shared()), "The minimum value of the calculated TTL expression within this part. Used to understand whether we have at least one row with expired TTL."}, + {"rows_where_ttl_info.max", std::make_shared(std::make_shared()), "The maximum value of the calculated TTL expression within this part. Used to understand whether we have all rows with expired TTL."}, } ) { diff --git a/src/Storages/System/StorageSystemProjectionPartsColumns.cpp b/src/Storages/System/StorageSystemProjectionPartsColumns.cpp index 2ff25f86366..9521605688d 100644 --- a/src/Storages/System/StorageSystemProjectionPartsColumns.cpp +++ b/src/Storages/System/StorageSystemProjectionPartsColumns.cpp @@ -19,56 +19,56 @@ namespace DB StorageSystemProjectionPartsColumns::StorageSystemProjectionPartsColumns(const StorageID & table_id_) : StorageSystemPartsBase(table_id_, ColumnsDescription{ - {"partition", std::make_shared()}, - {"name", std::make_shared()}, - {"part_type", std::make_shared()}, - {"parent_name", std::make_shared()}, - {"parent_uuid", std::make_shared()}, - {"parent_part_type", std::make_shared()}, - {"active", std::make_shared()}, - {"marks", std::make_shared()}, - {"rows", std::make_shared()}, - {"bytes_on_disk", std::make_shared()}, - {"data_compressed_bytes", std::make_shared()}, - {"data_uncompressed_bytes", std::make_shared()}, - {"marks_bytes", std::make_shared()}, - {"parent_marks", std::make_shared()}, - {"parent_rows", std::make_shared()}, - {"parent_bytes_on_disk", std::make_shared()}, - {"parent_data_compressed_bytes", std::make_shared()}, - {"parent_data_uncompressed_bytes", std::make_shared()}, - {"parent_marks_bytes", std::make_shared()}, - {"modification_time", std::make_shared()}, - {"remove_time", std::make_shared()}, - {"refcount", std::make_shared()}, - {"min_date", std::make_shared()}, - {"max_date", std::make_shared()}, - {"min_time", std::make_shared()}, - {"max_time", std::make_shared()}, - {"partition_id", std::make_shared()}, - {"min_block_number", std::make_shared()}, - {"max_block_number", std::make_shared()}, - {"level", std::make_shared()}, - {"data_version", std::make_shared()}, - {"primary_key_bytes_in_memory", std::make_shared()}, - {"primary_key_bytes_in_memory_allocated", std::make_shared()}, + {"partition", std::make_shared(), "The partition name. "}, + {"name", std::make_shared(), "Name of the data part."}, + {"part_type", std::make_shared(), "The data part storing format."}, + {"parent_name", std::make_shared(), "The name of the source (parent) data part."}, + {"parent_uuid", std::make_shared(), "The UUID of the source (parent) data part."}, + {"parent_part_type", std::make_shared(), "The source (parent) data part storing format."}, + {"active", std::make_shared(), "Flag that indicates whether the data part is active"}, + {"marks", std::make_shared(), "The number of marks."}, + {"rows", std::make_shared(), "The number of rows."}, + {"bytes_on_disk", std::make_shared(), "Total size of all the data part files in bytes."}, + {"data_compressed_bytes", std::make_shared(), "Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included."}, + {"data_uncompressed_bytes", std::make_shared(), "Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included."}, + {"marks_bytes", std::make_shared(), "The size of the file with marks."}, + {"parent_marks", std::make_shared(), "The number of marks in the source (parent) part."}, + {"parent_rows", std::make_shared(), "The number of rows in the source (parent) part."}, + {"parent_bytes_on_disk", std::make_shared(), "Total size of all the source (parent) data part files in bytes."}, + {"parent_data_compressed_bytes", std::make_shared(), "Total size of compressed data in the source (parent) data part."}, + {"parent_data_uncompressed_bytes", std::make_shared(), "Total size of uncompressed data in the source (parent) data part."}, + {"parent_marks_bytes", std::make_shared(), "The size of the file with marks in the source (parent) data part."}, + {"modification_time", std::make_shared(), "The time the directory with the data part was modified. This usually corresponds to the time of data part creation."}, + {"remove_time", std::make_shared(), "The time when the data part became inactive."}, + {"refcount", std::make_shared(), "The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges."}, + {"min_date", std::make_shared(), "The minimum value for the Date column if that is included in the partition key."}, + {"max_date", std::make_shared(), "The maximum value for the Date column if that is included in the partition key."}, + {"min_time", std::make_shared(), "The minimum value for the DateTime column if that is included in the partition key."}, + {"max_time", std::make_shared(), "The maximum value for the DateTime column if that is included in the partition key."}, + {"partition_id", std::make_shared(), "ID of the partition."}, + {"min_block_number", std::make_shared(), "The minimum number of data parts that make up the current part after merging."}, + {"max_block_number", std::make_shared(), "The maximum number of data parts that make up the current part after merging."}, + {"level", std::make_shared(), "Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts."}, + {"data_version", std::make_shared(), "Number that is used to determine which mutations should be applied to the data part (mutations with a version higher than data_version)."}, + {"primary_key_bytes_in_memory", std::make_shared(), "The amount of memory (in bytes) used by primary key values."}, + {"primary_key_bytes_in_memory_allocated", std::make_shared(), "The amount of memory (in bytes) reserved for primary key values."}, - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"engine", std::make_shared()}, - {"disk_name", std::make_shared()}, - {"path", std::make_shared()}, + {"database", std::make_shared(), "Name of the database."}, + {"table", std::make_shared(), "Name of the table."}, + {"engine", std::make_shared(), "Name of the table engine without parameters."}, + {"disk_name", std::make_shared(), "Name of a disk that stores the data part."}, + {"path", std::make_shared(), "Absolute path to the folder with data part files."}, - {"column", std::make_shared()}, - {"type", std::make_shared()}, - {"column_position", std::make_shared()}, - {"default_kind", std::make_shared()}, - {"default_expression", std::make_shared()}, - {"column_bytes_on_disk", std::make_shared()}, - {"column_data_compressed_bytes", std::make_shared()}, - {"column_data_uncompressed_bytes", std::make_shared()}, - {"column_marks_bytes", std::make_shared()}, - {"column_modification_time", std::make_shared(std::make_shared())}, + {"column", std::make_shared(), "Name of the column."}, + {"type", std::make_shared(), "Column type."}, + {"column_position", std::make_shared(), "Ordinal position of a column in a table starting with 1."}, + {"default_kind", std::make_shared(), "Expression type (DEFAULT, MATERIALIZED, ALIAS) for the default value, or an empty string if it is not defined."}, + {"default_expression", std::make_shared(), "Expression for the default value, or an empty string if it is not defined."}, + {"column_bytes_on_disk", std::make_shared(), "Total size of the column in bytes."}, + {"column_data_compressed_bytes", std::make_shared(), "Total size of compressed data in the column, in bytes."}, + {"column_data_uncompressed_bytes", std::make_shared(), "Total size of the decompressed data in the column, in bytes."}, + {"column_marks_bytes", std::make_shared(), "The size of the column with marks, in bytes."}, + {"column_modification_time", std::make_shared(std::make_shared()), "The last time the column was modified."}, } ) { diff --git a/src/Storages/System/StorageSystemQuotaLimits.cpp b/src/Storages/System/StorageSystemQuotaLimits.cpp index dba449d4f1d..65acfba0c1b 100644 --- a/src/Storages/System/StorageSystemQuotaLimits.cpp +++ b/src/Storages/System/StorageSystemQuotaLimits.cpp @@ -68,7 +68,8 @@ ColumnsDescription StorageSystemQuotaLimits::getColumnsDescription() data_type = std::make_shared(); else data_type = std::make_shared(); - result.add({column_name, std::make_shared(data_type)}); + + result.add({column_name, std::make_shared(data_type), type_info.max_allowed_usage_description}); } return result; diff --git a/src/Storages/System/StorageSystemQuotaUsage.cpp b/src/Storages/System/StorageSystemQuotaUsage.cpp index 2df36aee240..da9c174b0d3 100644 --- a/src/Storages/System/StorageSystemQuotaUsage.cpp +++ b/src/Storages/System/StorageSystemQuotaUsage.cpp @@ -81,8 +81,8 @@ ColumnsDescription StorageSystemQuotaUsage::getColumnsDescriptionImpl(bool add_c data_type = std::make_shared(); else data_type = std::make_shared(); - description.add({column_name, std::make_shared(data_type)}); - description.add({String("max_") + column_name, std::make_shared(data_type)}); + description.add({column_name, std::make_shared(data_type), type_info.current_usage_description}); + description.add({String("max_") + column_name, std::make_shared(data_type), type_info.max_allowed_usage_description}); } return description; diff --git a/src/Storages/System/StorageSystemRemoteDataPaths.cpp b/src/Storages/System/StorageSystemRemoteDataPaths.cpp index 55002d6d00f..7cc1951fc05 100644 --- a/src/Storages/System/StorageSystemRemoteDataPaths.cpp +++ b/src/Storages/System/StorageSystemRemoteDataPaths.cpp @@ -20,14 +20,14 @@ StorageSystemRemoteDataPaths::StorageSystemRemoteDataPaths(const StorageID & tab StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(ColumnsDescription( { - {"disk_name", std::make_shared()}, - {"path", std::make_shared()}, - {"cache_base_path", std::make_shared()}, - {"local_path", std::make_shared()}, - {"remote_path", std::make_shared()}, - {"size", std::make_shared()}, - {"common_prefix_for_blobs", std::make_shared()}, - {"cache_paths", std::make_shared(std::make_shared())}, + {"disk_name", std::make_shared(), "Disk name."}, + {"path", std::make_shared(), "Disk path."}, + {"cache_base_path", std::make_shared(), "Base directory of cache files."}, + {"local_path", std::make_shared(), "Path of ClickHouse file, also used as metadata path."}, + {"remote_path", std::make_shared(), "Blob path in object storage, with which ClickHouse file is associated with."}, + {"size", std::make_shared(), "Size of the file (compressed)."}, + {"common_prefix_for_blobs", std::make_shared(), "Common prefix for blobs in object storage."}, + {"cache_paths", std::make_shared(std::make_shared()), "Cache files for corresponding blob."}, })); setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index 26dead8cb01..af8d67cbc21 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -196,41 +196,47 @@ StorageSystemReplicas::StorageSystemReplicas(const StorageID & table_id_) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(ColumnsDescription({ - { "database", std::make_shared() }, - { "table", std::make_shared() }, - { "engine", std::make_shared() }, - { "is_leader", std::make_shared() }, - { "can_become_leader", std::make_shared() }, - { "is_readonly", std::make_shared() }, - { "is_session_expired", std::make_shared() }, - { "future_parts", std::make_shared() }, - { "parts_to_check", std::make_shared() }, - { "zookeeper_name", std::make_shared() }, - { "zookeeper_path", std::make_shared() }, - { "replica_name", std::make_shared() }, - { "replica_path", std::make_shared() }, - { "columns_version", std::make_shared() }, - { "queue_size", std::make_shared() }, - { "inserts_in_queue", std::make_shared() }, - { "merges_in_queue", std::make_shared() }, - { "part_mutations_in_queue", std::make_shared() }, - { "queue_oldest_time", std::make_shared() }, - { "inserts_oldest_time", std::make_shared() }, - { "merges_oldest_time", std::make_shared() }, - { "part_mutations_oldest_time", std::make_shared() }, - { "oldest_part_to_get", std::make_shared() }, - { "oldest_part_to_merge_to", std::make_shared() }, - { "oldest_part_to_mutate_to", std::make_shared() }, - { "log_max_index", std::make_shared() }, - { "log_pointer", std::make_shared() }, - { "last_queue_update", std::make_shared() }, - { "absolute_delay", std::make_shared() }, - { "total_replicas", std::make_shared() }, - { "active_replicas", std::make_shared() }, - { "lost_part_count", std::make_shared() }, - { "last_queue_update_exception", std::make_shared() }, - { "zookeeper_exception", std::make_shared() }, - { "replica_is_active", std::make_shared(std::make_shared(), std::make_shared()) } + { "database", std::make_shared(), "Database name."}, + { "table", std::make_shared(), "Table name."}, + { "engine", std::make_shared(), "Table engine name."}, + { "is_leader", std::make_shared(), "Whether the replica is the leader. Multiple replicas can be leaders at the same time. " + "A replica can be prevented from becoming a leader using the merge_tree setting replicated_can_become_leader. " + "The leaders are responsible for scheduling background merges. " + "Note that writes can be performed to any replica that is available and has a session in ZK, regardless of whether it is a leader."}, + { "can_become_leader", std::make_shared(), "Whether the replica can be a leader."}, + { "is_readonly", std::make_shared(), "Whether the replica is in read-only mode. This mode is turned on if the config does not have sections with ClickHouse Keeper, " + "if an unknown error occurred when reinitializing sessions in ClickHouse Keeper, and during session reinitialization in ClickHouse Keeper."}, + { "is_session_expired", std::make_shared(), "Whether the session with ClickHouse Keeper has expired. Basically the same as `is_readonly`."}, + { "future_parts", std::make_shared(), "The number of data parts that will appear as the result of INSERTs or merges that haven't been done yet."}, + { "parts_to_check", std::make_shared(), "The number of data parts in the queue for verification. A part is put in the verification queue if there is suspicion that it might be damaged."}, + { "zookeeper_name", std::make_shared(), "The name of the the [Zoo]Keeper cluster (possibly auxiliary one) where the table's metadata is stored"}, + { "zookeeper_path", std::make_shared(), "Path to table data in ClickHouse Keeper."}, + { "replica_name", std::make_shared(), "Replica name in ClickHouse Keeper. Different replicas of the same table have different names."}, + { "replica_path", std::make_shared(), "Path to replica data in ClickHouse Keeper. The same as concatenating 'zookeeper_path/replicas/replica_path'."}, + { "columns_version", std::make_shared(), "Version number of the table structure. Indicates how many times ALTER was performed. " + "If replicas have different versions, it means some replicas haven't made all of the ALTERs yet."}, + { "queue_size", std::make_shared(), "Size of the queue for operations waiting to be performed. Operations include inserting blocks of data, merges, and certain other actions. It usually coincides with future_parts."}, + { "inserts_in_queue", std::make_shared(), "Number of inserts of blocks of data that need to be made. Insertions are usually replicated fairly quickly. If this number is large, it means something is wrong."}, + { "merges_in_queue", std::make_shared(), "The number of merges waiting to be made. Sometimes merges are lengthy, so this value may be greater than zero for a long time."}, + { "part_mutations_in_queue", std::make_shared(), "The number of mutations waiting to be made."}, + { "queue_oldest_time", std::make_shared(), "If `queue_size` greater than 0, shows when the oldest operation was added to the queue."}, + { "inserts_oldest_time", std::make_shared(), "See `queue_oldest_time`."}, + { "merges_oldest_time", std::make_shared(), "See `queue_oldest_time`."}, + { "part_mutations_oldest_time", std::make_shared(), "See `queue_oldest_time`."}, + { "oldest_part_to_get", std::make_shared(), "The name of the part to fetch from other replicas obtained from the oldest GET_PARTS entry in the replication queue."}, + { "oldest_part_to_merge_to", std::make_shared(), "The result part name to merge to obtained from the oldest MERGE_PARTS entry in the replication queue."}, + { "oldest_part_to_mutate_to", std::make_shared(), "The result part name to mutate to obtained from the oldest MUTATE_PARTS entry in the replication queue."}, + { "log_max_index", std::make_shared(), "Maximum entry number in the log of general activity."}, + { "log_pointer", std::make_shared(), "Maximum entry number in the log of general activity that the replica copied to its execution queue, plus one. " + "If log_pointer is much smaller than log_max_index, something is wrong."}, + { "last_queue_update", std::make_shared(), "When the queue was updated last time."}, + { "absolute_delay", std::make_shared(), "How big lag in seconds the current replica has."}, + { "total_replicas", std::make_shared(), "The total number of known replicas of this table."}, + { "active_replicas", std::make_shared(), "The number of replicas of this table that have a session in ClickHouse Keeper (i.e., the number of functioning replicas)."}, + { "lost_part_count", std::make_shared(), "The number of data parts lost in the table by all replicas in total since table creation. Value is persisted in ClickHouse Keeper and can only increase."}, + { "last_queue_update_exception", std::make_shared(), "When the queue contains broken entries. Especially important when ClickHouse breaks backward compatibility between versions and log entries written by newer versions aren't parseable by old versions."}, + { "zookeeper_exception", std::make_shared(), "The last exception message, got if the error happened when fetching the info from ClickHouse Keeper."}, + { "replica_is_active", std::make_shared(std::make_shared(), std::make_shared()), "Map between replica name and is replica active."} })); setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemRowPolicies.cpp b/src/Storages/System/StorageSystemRowPolicies.cpp index ea819e88993..93c5ba60a7f 100644 --- a/src/Storages/System/StorageSystemRowPolicies.cpp +++ b/src/Storages/System/StorageSystemRowPolicies.cpp @@ -38,8 +38,8 @@ ColumnsDescription StorageSystemRowPolicies::getColumnsDescription() for (auto filter_type : collections::range(RowPolicyFilterType::MAX)) { - const String & column_name = RowPolicyFilterTypeInfo::get(filter_type).name; - description.add({column_name, std::make_shared(std::make_shared())}); + const auto & filter_type_info = RowPolicyFilterTypeInfo::get(filter_type); + description.add({filter_type_info.name, std::make_shared(std::make_shared()), filter_type_info.description}); } description.add({"is_restrictive", std::make_shared(), diff --git a/src/Storages/System/StorageSystemS3Queue.cpp b/src/Storages/System/StorageSystemS3Queue.cpp index 557f0fd1208..a6bb7da2b6e 100644 --- a/src/Storages/System/StorageSystemS3Queue.cpp +++ b/src/Storages/System/StorageSystemS3Queue.cpp @@ -25,14 +25,14 @@ ColumnsDescription StorageSystemS3Queue::getColumnsDescription() /// TODO: Fill in all the comments return ColumnsDescription { - {"zookeeper_path", std::make_shared()}, - {"file_name", std::make_shared()}, - {"rows_processed", std::make_shared()}, - {"status", std::make_shared()}, - {"processing_start_time", std::make_shared(std::make_shared())}, - {"processing_end_time", std::make_shared(std::make_shared())}, - {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared())}, - {"exception", std::make_shared()}, + {"zookeeper_path", std::make_shared(), "Path in zookeeper to S3Queue metadata"}, + {"file_name", std::make_shared(), "File name of a file which is being processed by S3Queue"}, + {"rows_processed", std::make_shared(), "Currently processed number of rows"}, + {"status", std::make_shared(), "Status of processing: Processed, Processing, Failed"}, + {"processing_start_time", std::make_shared(std::make_shared()), "Time at which processing of the file started"}, + {"processing_end_time", std::make_shared(std::make_shared()), "Time at which processing of the file ended"}, + {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared()), "Profile events collected during processing of the file"}, + {"exception", std::make_shared(), "Exception which happened during processing"}, }; } diff --git a/src/Storages/System/StorageSystemSettingsChanges.cpp b/src/Storages/System/StorageSystemSettingsChanges.cpp index ecac76b73a3..de47ec52031 100644 --- a/src/Storages/System/StorageSystemSettingsChanges.cpp +++ b/src/Storages/System/StorageSystemSettingsChanges.cpp @@ -12,7 +12,7 @@ ColumnsDescription StorageSystemSettingsChanges::getColumnsDescription() /// TODO: Fill in all the comments return ColumnsDescription { - {"version", std::make_shared()}, + {"version", std::make_shared(), "The ClickHouse server version."}, {"changes", std::make_shared(std::make_shared( DataTypes{ @@ -20,7 +20,7 @@ ColumnsDescription StorageSystemSettingsChanges::getColumnsDescription() std::make_shared(), std::make_shared(), std::make_shared()}, - Names{"name", "previous_value", "new_value", "reason"}))}, + Names{"name", "previous_value", "new_value", "reason"})), "The list of changes in settings which changed the behaviour of ClickHouse."}, }; } diff --git a/src/Storages/System/StorageSystemSettingsProfileElements.cpp b/src/Storages/System/StorageSystemSettingsProfileElements.cpp index 6ac5d13a249..2af3e6dfd05 100644 --- a/src/Storages/System/StorageSystemSettingsProfileElements.cpp +++ b/src/Storages/System/StorageSystemSettingsProfileElements.cpp @@ -42,7 +42,7 @@ ColumnsDescription StorageSystemSettingsProfileElements::getColumnsDescription() {"value", std::make_shared(std::make_shared()), "Setting value."}, {"min", std::make_shared(std::make_shared()), "The minimum value of the setting. NULL if not set."}, {"max", std::make_shared(std::make_shared()), "The maximum value of the setting. NULL if not set."}, - {"writability", std::make_shared(std::make_shared(getSettingConstraintWritabilityEnumValues()))}, + {"writability", std::make_shared(std::make_shared(getSettingConstraintWritabilityEnumValues())), "The property which shows whether a setting can be changed or not."}, {"inherit_profile", std::make_shared(std::make_shared()), "A parent profile for this setting profile. NULL if not set. " "Setting profile will inherit all the settings' values and constraints (min, max, readonly) from its parent profiles." diff --git a/src/Storages/System/StorageSystemStackTrace.cpp b/src/Storages/System/StorageSystemStackTrace.cpp index 74864bb50e1..ba7433fb9ae 100644 --- a/src/Storages/System/StorageSystemStackTrace.cpp +++ b/src/Storages/System/StorageSystemStackTrace.cpp @@ -507,11 +507,11 @@ StorageSystemStackTrace::StorageSystemStackTrace(const StorageID & table_id_) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(ColumnsDescription({ - { "thread_name", std::make_shared() }, - { "thread_id", std::make_shared() }, - { "query_id", std::make_shared() }, - { "trace", std::make_shared(std::make_shared()) }, - }, { /* aliases */ })); + {"thread_name", std::make_shared(), "The name of the thread."}, + {"thread_id", std::make_shared(), "The thread identifier"}, + {"query_id", std::make_shared(), "The ID of the query this thread belongs to."}, + {"trace", std::make_shared(std::make_shared()), "The stacktrace of this thread. Basically just an array of addresses."}, + })); setInMemoryMetadata(storage_metadata); notification_pipe.open(); diff --git a/src/Storages/System/StorageSystemStoragePolicies.cpp b/src/Storages/System/StorageSystemStoragePolicies.cpp index 39e99884e1d..21251136f7d 100644 --- a/src/Storages/System/StorageSystemStoragePolicies.cpp +++ b/src/Storages/System/StorageSystemStoragePolicies.cpp @@ -37,16 +37,16 @@ StorageSystemStoragePolicies::StorageSystemStoragePolicies(const StorageID & tab StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns( ColumnsDescription({ - {"policy_name", std::make_shared()}, - {"volume_name", std::make_shared()}, - {"volume_priority", std::make_shared()}, - {"disks", std::make_shared(std::make_shared())}, - {"volume_type", std::make_shared(getTypeEnumValues())}, - {"max_data_part_size", std::make_shared()}, - {"move_factor", std::make_shared()}, - {"prefer_not_to_merge", std::make_shared()}, - {"perform_ttl_move_on_insert", std::make_shared()}, - {"load_balancing", std::make_shared(getTypeEnumValues())} + {"policy_name", std::make_shared(), "The name of the storage policy."}, + {"volume_name", std::make_shared(), "The name of the volume."}, + {"volume_priority", std::make_shared(), "The priority of the volume."}, + {"disks", std::make_shared(std::make_shared()), "The list of all disks names which are a part of this storage policy."}, + {"volume_type", std::make_shared(getTypeEnumValues()), "The type of the volume - JBOD or a single disk."}, + {"max_data_part_size", std::make_shared(), "the maximum size of a part that can be stored on any of the volumes disks."}, + {"move_factor", std::make_shared(), "When the amount of available space gets lower than this factor, data automatically starts to move on the next volume if any (by default, 0.1)."}, + {"prefer_not_to_merge", std::make_shared(), "You should not use this setting. Disables merging of data parts on this volume (this is harmful and leads to performance degradation)."}, + {"perform_ttl_move_on_insert", std::make_shared(), "Disables TTL move on data part INSERT. By default (if enabled) if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule."}, + {"load_balancing", std::make_shared(getTypeEnumValues()), "Policy for disk balancing, `round_robin` or `least_used`."} })); // TODO: Add string column with custom volume-type-specific options setInMemoryMetadata(storage_metadata); diff --git a/src/Storages/System/StorageSystemSymbols.cpp b/src/Storages/System/StorageSystemSymbols.cpp index 56195544448..6b313407e61 100644 --- a/src/Storages/System/StorageSystemSymbols.cpp +++ b/src/Storages/System/StorageSystemSymbols.cpp @@ -22,9 +22,9 @@ StorageSystemSymbols::StorageSystemSymbols(const StorageID & table_id_) StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(ColumnsDescription( { - {"symbol", std::make_shared()}, - {"address_begin", std::make_shared()}, - {"address_end", std::make_shared()}, + {"symbol", std::make_shared(), "Symbol name in the binary. It is mangled. You can apply demangle(symbol) to obtain a readable name."}, + {"address_begin", std::make_shared(), "Start address of the symbol in the binary."}, + {"address_end", std::make_shared(), "End address of the symbol in the binary."}, })); setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 6904eba8ab5..639c1455b83 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -67,9 +67,9 @@ StorageSystemTables::StorageSystemTables(const StorageID & table_id_) "Total number of uncompressed bytes, if it's possible to quickly determine the exact number " "of bytes from the part checksums for the table on storage, otherwise NULL (does not take underlying storage (if any) into account)." }, - {"parts", std::make_shared(std::make_shared())}, - {"active_parts", std::make_shared(std::make_shared())}, - {"total_marks", std::make_shared(std::make_shared())}, + {"parts", std::make_shared(std::make_shared()), "The total number of parts in this table."}, + {"active_parts", std::make_shared(std::make_shared()), "The number of active parts in this table."}, + {"total_marks", std::make_shared(std::make_shared()), "The total number of marks in all parts in this table."}, {"lifetime_rows", std::make_shared(std::make_shared()), "Total number of rows INSERTed since server start (only for Buffer tables)." }, diff --git a/src/Storages/System/StorageSystemTransactions.cpp b/src/Storages/System/StorageSystemTransactions.cpp index edc3739e713..295d93edf7d 100644 --- a/src/Storages/System/StorageSystemTransactions.cpp +++ b/src/Storages/System/StorageSystemTransactions.cpp @@ -23,14 +23,13 @@ static DataTypePtr getStateEnumType() ColumnsDescription StorageSystemTransactions::getColumnsDescription() { - /// TODO: Fill in all the comments. return ColumnsDescription { - {"tid", getTransactionIDDataType()}, - {"tid_hash", std::make_shared()}, - {"elapsed", std::make_shared()}, - {"is_readonly", std::make_shared()}, - {"state", getStateEnumType()}, + {"tid", getTransactionIDDataType(), "The identifier of the transaction."}, + {"tid_hash", std::make_shared(), "The hash of the identifier."}, + {"elapsed", std::make_shared(), "The amount of time the transaction being processed."}, + {"is_readonly", std::make_shared(), "The flag which shows whether the transaction has executed any write operation."}, + {"state", getStateEnumType(), "The state of the transaction. Possible values: RUNNING, COMMITTING, COMMITTED, ROLLED_BACK."}, }; } diff --git a/src/Storages/System/StorageSystemUserDirectories.cpp b/src/Storages/System/StorageSystemUserDirectories.cpp index 7b6c1144ae1..1b3469e7597 100644 --- a/src/Storages/System/StorageSystemUserDirectories.cpp +++ b/src/Storages/System/StorageSystemUserDirectories.cpp @@ -11,13 +11,12 @@ namespace DB { ColumnsDescription StorageSystemUserDirectories::getColumnsDescription() { - /// TODO: Fill in all the comments. return ColumnsDescription { - {"name", std::make_shared()}, - {"type", std::make_shared()}, - {"params", std::make_shared()}, - {"precedence", std::make_shared()}, + {"name", std::make_shared(), "The name of the directory."}, + {"type", std::make_shared(), "The type of the access storage e.g. users.xml or replicated or memory etc."}, + {"params", std::make_shared(), "JSON with the parameters of the access storage."}, + {"precedence", std::make_shared(), "The order in which this directory is declared in the config. The same order is used when ClickHouse tries to find a user or role."}, }; } diff --git a/src/Storages/System/StorageSystemUserProcesses.cpp b/src/Storages/System/StorageSystemUserProcesses.cpp index d36129aea63..65fbeedf406 100644 --- a/src/Storages/System/StorageSystemUserProcesses.cpp +++ b/src/Storages/System/StorageSystemUserProcesses.cpp @@ -18,10 +18,10 @@ ColumnsDescription StorageSystemUserProcesses::getColumnsDescription() { auto description = ColumnsDescription { - {"user", std::make_shared()}, - {"memory_usage", std::make_shared()}, - {"peak_memory_usage", std::make_shared()}, - {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared())}, + {"user", std::make_shared(), "User name."}, + {"memory_usage", std::make_shared(), "Sum of RAM used by all processes of the user. It might not include some types of dedicated memory. See the max_memory_usage setting."}, + {"peak_memory_usage", std::make_shared(), "The peak of memory usage of the user. It can be reset when no queries are run for the user."}, + {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared()), "Summary of ProfileEvents that measure different metrics for the user. The description of them could be found in the table system.events"}, }; description.setAliases({ diff --git a/src/Storages/System/StorageSystemUsers.cpp b/src/Storages/System/StorageSystemUsers.cpp index 4734aeaaa82..0c34f04844d 100644 --- a/src/Storages/System/StorageSystemUsers.cpp +++ b/src/Storages/System/StorageSystemUsers.cpp @@ -69,10 +69,10 @@ ColumnsDescription StorageSystemUsers::getColumnsDescription() {"default_roles_except", std::make_shared(std::make_shared()), "All the granted roles set as default excepting of the listed ones." }, - {"grantees_any", std::make_shared()}, - {"grantees_list", std::make_shared(std::make_shared())}, - {"grantees_except", std::make_shared(std::make_shared())}, - {"default_database", std::make_shared()}, + {"grantees_any", std::make_shared(), "The flag that indicates whether a user with any grant option can grant it to anyone."}, + {"grantees_list", std::make_shared(std::make_shared()), "The list of users or roles to which this user is allowed to grant options to."}, + {"grantees_except", std::make_shared(std::make_shared()), "The list of users or roles to which this user is forbidden from grant options to."}, + {"default_database", std::make_shared(), "The name of the default database for this user."}, }; } diff --git a/src/Storages/System/StorageSystemViewRefreshes.cpp b/src/Storages/System/StorageSystemViewRefreshes.cpp index a0ed1d3c3d3..30539ed6b6a 100644 --- a/src/Storages/System/StorageSystemViewRefreshes.cpp +++ b/src/Storages/System/StorageSystemViewRefreshes.cpp @@ -36,15 +36,15 @@ ColumnsDescription StorageSystemViewRefreshes::getColumnsDescription() }, {"refresh_count", std::make_shared(), "Number of successful refreshes since last server restart or table creation."}, {"progress", std::make_shared(), "Progress of the current refresh, between 0 and 1."}, - {"elapsed", std::make_shared()}, - {"read_rows", std::make_shared(), "Number of rows read by the current refresh so far."}, - {"read_bytes", std::make_shared()}, + {"elapsed", std::make_shared(), "The amount of nanoseconds the current refresh took."}, + {"read_rows", std::make_shared(), "Number of rows read during the current refresh."}, + {"read_bytes", std::make_shared(), "Number of bytes read during the current refresh."}, {"total_rows", std::make_shared(), "Estimated total number of rows that need to be read by the current refresh."}, - {"total_bytes", std::make_shared()}, - {"written_rows", std::make_shared()}, - {"written_bytes", std::make_shared()}, - {"result_rows", std::make_shared()}, - {"result_bytes", std::make_shared()}, + {"total_bytes", std::make_shared(), "Estimated total number of bytes that need to be read by the current refresh."}, + {"written_rows", std::make_shared(), "Number of rows written during the current refresh."}, + {"written_bytes", std::make_shared(), "Number rof bytes written during the current refresh."}, + {"result_rows", std::make_shared(), "Estimated total number of rows in the result set of the SELECT query."}, + {"result_bytes", std::make_shared(), "Estimated total number of bytes in the result set of the SELECT query."}, }; } diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index 2845e194614..d1bf86ba8ef 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -233,9 +233,9 @@ private: StorageSystemZooKeeper::StorageSystemZooKeeper(const StorageID & table_id_) : IStorage(table_id_) { - StorageInMemoryMetadata storage_metadata; - storage_metadata.setColumns(getColumnsDescription()); - setInMemoryMetadata(storage_metadata); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(getColumnsDescription()); + setInMemoryMetadata(storage_metadata); } void StorageSystemZooKeeper::read( @@ -248,7 +248,7 @@ void StorageSystemZooKeeper::read( size_t max_block_size, size_t /*num_streams*/) { - auto header = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtuals()); + auto header = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtualsList()); auto read_step = std::make_unique( column_names, query_info, diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 386dcd17733..22466b3d4c2 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -109,23 +109,28 @@ void filterBlockWithDAG(ActionsDAGPtr dag, Block & block, ContextPtr context) } } -NamesAndTypesList getPathFileAndSizeVirtualsForStorage(NamesAndTypesList storage_columns) +NameSet getVirtualNamesForFileLikeStorage() { - auto default_virtuals = NamesAndTypesList{ - {"_path", std::make_shared(std::make_shared())}, - {"_file", std::make_shared(std::make_shared())}, - {"_size", makeNullable(std::make_shared())}}; + return {"_path", "_file", "_size"}; +} - default_virtuals.sort(); - storage_columns.sort(); +VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns) +{ + VirtualColumnsDescription desc; - NamesAndTypesList result_virtuals; - std::set_difference( - default_virtuals.begin(), default_virtuals.end(), storage_columns.begin(), storage_columns.end(), - std::back_inserter(result_virtuals), - [](const NameAndTypePair & lhs, const NameAndTypePair & rhs){ return lhs.name < rhs.name; }); + auto add_virtual = [&](const auto & name, const auto & type) + { + if (storage_columns.has(name)) + return; - return result_virtuals; + desc.addEphemeral(name, type, ""); + }; + + add_virtual("_path", std::make_shared(std::make_shared())); + add_virtual("_file", std::make_shared(std::make_shared())); + add_virtual("_size", makeNullable(std::make_shared())); + + return desc; } static void addPathAndFileToVirtualColumns(Block & block, const String & path, size_t idx) diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index 3e7299b4a63..b5526fc5c7f 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -39,7 +40,8 @@ auto extractSingleValueFromBlock(const Block & block, const String & name) return res; } -NamesAndTypesList getPathFileAndSizeVirtualsForStorage(NamesAndTypesList storage_columns); +NameSet getVirtualNamesForFileLikeStorage(); +VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns); ActionsDAGPtr createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns); diff --git a/src/Storages/VirtualColumnsDescription.cpp b/src/Storages/VirtualColumnsDescription.cpp new file mode 100644 index 00000000000..64097224ed9 --- /dev/null +++ b/src/Storages/VirtualColumnsDescription.cpp @@ -0,0 +1,94 @@ +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int DUPLICATE_COLUMN; + extern const int NO_SUCH_COLUMN_IN_TABLE; +} + +VirtualColumnDescription::VirtualColumnDescription( + String name_, DataTypePtr type_, ASTPtr codec_, String comment_, VirtualsKind kind_) + : ColumnDescription(std::move(name_), std::move(type_), std::move(codec_), std::move(comment_)) + , kind(kind_) +{ +} + +void VirtualColumnsDescription::add(VirtualColumnDescription desc) +{ + if (container.get<1>().contains(desc.name)) + throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Virtual column {} already exists", desc.name); + + container.get<0>().push_back(std::move(desc)); +} + +void VirtualColumnsDescription::addEphemeral(String name, DataTypePtr type, String comment) +{ + add({std::move(name), std::move(type), nullptr, std::move(comment), VirtualsKind::Ephemeral}); +} + +void VirtualColumnsDescription::addPersistent(String name, DataTypePtr type, ASTPtr codec, String comment) +{ + add({std::move(name), std::move(type), std::move(codec), std::move(comment), VirtualsKind::Persistent}); +} + +std::optional VirtualColumnsDescription::tryGet(const String & name, VirtualsKind kind) const +{ + auto it = container.get<1>().find(name); + if (it != container.get<1>().end() && (static_cast(it->kind) & static_cast(kind))) + return NameAndTypePair{it->name, it->type}; + return {}; +} + +NameAndTypePair VirtualColumnsDescription::get(const String & name, VirtualsKind kind) const +{ + auto column = tryGet(name, kind); + if (!column) + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no virtual column {}", name); + return *column; +} + +const VirtualColumnDescription * VirtualColumnsDescription::tryGetDescription(const String & name, VirtualsKind kind) const +{ + auto it = container.get<1>().find(name); + if (it != container.get<1>().end() && (static_cast(it->kind) & static_cast(kind))) + return &(*it); + return nullptr; +} + +const VirtualColumnDescription & VirtualColumnsDescription::getDescription(const String & name, VirtualsKind kind) const +{ + const auto * column = tryGetDescription(name, kind); + if (!column) + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no virtual column {}", name); + return *column; +} + +Block VirtualColumnsDescription::getSampleBlock() const +{ + Block result; + for (const auto & desc : container) + result.insert({desc.type->createColumn(), desc.type, desc.name}); + return result; +} + +NamesAndTypesList VirtualColumnsDescription::getNamesAndTypesList() const +{ + NamesAndTypesList result; + for (const auto & desc : container) + result.emplace_back(desc.name, desc.type); + return result; +} + +NamesAndTypesList VirtualColumnsDescription::getNamesAndTypesList(VirtualsKind kind) const +{ + NamesAndTypesList result; + for (const auto & column : container) + if (static_cast(column.kind) & static_cast(kind)) + result.emplace_back(column.name, column.type); + return result; +} + +} diff --git a/src/Storages/VirtualColumnsDescription.h b/src/Storages/VirtualColumnsDescription.h new file mode 100644 index 00000000000..e11ba706718 --- /dev/null +++ b/src/Storages/VirtualColumnsDescription.h @@ -0,0 +1,71 @@ +#pragma once +#include +#include + +namespace DB +{ + +struct VirtualColumnDescription : public ColumnDescription +{ + using Self = VirtualColumnDescription; + VirtualsKind kind; + + VirtualColumnDescription() = default; + VirtualColumnDescription(String name_, DataTypePtr type_, ASTPtr codec_, String comment_, VirtualsKind kind_); + + bool isEphemeral() const { return kind == VirtualsKind::Ephemeral; } + bool isPersistent() const { return kind == VirtualsKind::Persistent; } + + /// This method is needed for boost::multi_index because field + /// of base class cannot be referenced in boost::multi_index::member. + const String & getName() const { return name; } +}; + +class VirtualColumnsDescription +{ +public: + using Container = boost::multi_index_container< + VirtualColumnDescription, + boost::multi_index::indexed_by< + boost::multi_index::sequenced<>, + boost::multi_index::ordered_unique>>>; + + using const_iterator = Container::const_iterator; + + const_iterator begin() const { return container.begin(); } + const_iterator end() const { return container.end(); } + + VirtualColumnsDescription() = default; + + void add(VirtualColumnDescription desc); + void addEphemeral(String name, DataTypePtr type, String comment); + void addPersistent(String name, DataTypePtr type, ASTPtr codec, String comment); + + size_t size() const { return container.size(); } + bool empty() const { return container.empty(); } + bool has(const String & name) const { return container.get<1>().contains(name); } + + NameAndTypePair get(const String & name, VirtualsKind kind) const; + std::optional tryGet(const String & name, VirtualsKind kind) const; + + NameAndTypePair get(const String & name) const { return get(name, VirtualsKind::All); } + std::optional tryGet(const String & name) const { return tryGet(name, VirtualsKind::All); } + + const VirtualColumnDescription * tryGetDescription(const String & name, VirtualsKind kind) const; + const VirtualColumnDescription & getDescription(const String & name, VirtualsKind kind) const; + + const VirtualColumnDescription * tryGetDescription(const String & name) const { return tryGetDescription(name, VirtualsKind::All); } + const VirtualColumnDescription & getDescription(const String & name) const { return getDescription(name, VirtualsKind::All); } + + Block getSampleBlock() const; + NamesAndTypesList getNamesAndTypesList() const; + NamesAndTypesList getNamesAndTypesList(VirtualsKind kind) const; + +private: + Container container; +}; + +using VirtualsDescriptionPtr = std::shared_ptr; +using MultiVersionVirtualsDescriptionPtr = MultiVersion; + +} diff --git a/src/Storages/prepareReadingFromFormat.cpp b/src/Storages/prepareReadingFromFormat.cpp index 6be4213ec6b..406b7f379f9 100644 --- a/src/Storages/prepareReadingFromFormat.cpp +++ b/src/Storages/prepareReadingFromFormat.cpp @@ -4,7 +4,7 @@ namespace DB { -ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, bool supports_subset_of_columns, const NamesAndTypesList & virtuals) +ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, bool supports_subset_of_columns) { ReadFromFormatInfo info; /// Collect requested virtual columns and remove them from requested columns. @@ -12,11 +12,11 @@ ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, c for (const auto & column_name : requested_columns) { bool is_virtual = false; - for (const auto & virtual_column : virtuals) + for (const auto & virtual_column : *storage_snapshot->virtual_columns) { if (column_name == virtual_column.name) { - info.requested_virtual_columns.push_back(virtual_column); + info.requested_virtual_columns.emplace_back(virtual_column.name, virtual_column.type); is_virtual = true; break; } diff --git a/src/Storages/prepareReadingFromFormat.h b/src/Storages/prepareReadingFromFormat.h index c5f3959a550..e4d62c29ec6 100644 --- a/src/Storages/prepareReadingFromFormat.h +++ b/src/Storages/prepareReadingFromFormat.h @@ -22,5 +22,5 @@ namespace DB }; /// Get all needed information for reading from data in some input format. - ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, bool supports_subset_of_columns, const NamesAndTypesList & virtuals); + ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, bool supports_subset_of_columns); } diff --git a/src/TableFunctions/ITableFunctionFileLike.cpp b/src/TableFunctions/ITableFunctionFileLike.cpp index b697f3df925..1a58be4f75b 100644 --- a/src/TableFunctions/ITableFunctionFileLike.cpp +++ b/src/TableFunctions/ITableFunctionFileLike.cpp @@ -7,6 +7,7 @@ #include #include +#include #include @@ -37,6 +38,11 @@ bool ITableFunctionFileLike::supportsReadingSubsetOfColumns(const ContextPtr & c return format != "auto" && FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format, context); } +NameSet ITableFunctionFileLike::getVirtualsToCheckBeforeUsingStructureHint() const +{ + return VirtualColumnUtils::getVirtualNamesForFileLikeStorage(); +} + void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, ContextPtr context) { /// Parse args diff --git a/src/TableFunctions/ITableFunctionFileLike.h b/src/TableFunctions/ITableFunctionFileLike.h index c8412905e44..ba1b7d2bb3f 100644 --- a/src/TableFunctions/ITableFunctionFileLike.h +++ b/src/TableFunctions/ITableFunctionFileLike.h @@ -1,6 +1,7 @@ #pragma once #include +#include "Core/Names.h" #include "Parsers/IAST_fwd.h" namespace DB @@ -29,6 +30,8 @@ public: bool supportsReadingSubsetOfColumns(const ContextPtr & context) override; + NameSet getVirtualsToCheckBeforeUsingStructureHint() const override; + static size_t getMaxNumberOfArguments() { return 4; } static void updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure, const String & format, const ContextPtr &); diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp index 8f558adb09b..275cd2a9cbb 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp +++ b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "registerTableFunctions.h" #include @@ -348,8 +349,7 @@ bool TableFunctionAzureBlobStorage::supportsReadingSubsetOfColumns(const Context std::unordered_set TableFunctionAzureBlobStorage::getVirtualsToCheckBeforeUsingStructureHint() const { - auto virtual_column_names = StorageAzureBlob::getVirtualColumnNames(); - return {virtual_column_names.begin(), virtual_column_names.end()}; + return VirtualColumnUtils::getVirtualNamesForFileLikeStorage(); } StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const diff --git a/src/TableFunctions/TableFunctionExecutable.cpp b/src/TableFunctions/TableFunctionExecutable.cpp index 9fa5ddf15c2..2c3802e8667 100644 --- a/src/TableFunctions/TableFunctionExecutable.cpp +++ b/src/TableFunctions/TableFunctionExecutable.cpp @@ -71,6 +71,9 @@ std::vector TableFunctionExecutable::skipAnalysisForArguments(const Quer const auto & table_function_node_arguments = table_function_node.getArguments().getNodes(); size_t table_function_node_arguments_size = table_function_node_arguments.size(); + if (table_function_node_arguments_size <= 2) + return {}; + std::vector result_indexes; result_indexes.reserve(table_function_node_arguments_size - 2); for (size_t i = 2; i < table_function_node_arguments_size; ++i) diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index b481076e9b6..28bf72e07fb 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -112,12 +112,6 @@ ColumnsDescription TableFunctionFile::getActualTableStructure(ContextPtr context return parseColumnsListFromString(structure, context); } -std::unordered_set TableFunctionFile::getVirtualsToCheckBeforeUsingStructureHint() const -{ - auto virtual_column_names = StorageFile::getVirtualColumnNames(); - return {virtual_column_names.begin(), virtual_column_names.end()}; -} - void registerTableFunctionFile(TableFunctionFactory & factory) { factory.registerFunction(); diff --git a/src/TableFunctions/TableFunctionFile.h b/src/TableFunctions/TableFunctionFile.h index c1924028b49..aaf5ba8873a 100644 --- a/src/TableFunctions/TableFunctionFile.h +++ b/src/TableFunctions/TableFunctionFile.h @@ -22,8 +22,6 @@ public: ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; - std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override; - protected: int fd = -1; String path_to_archive; diff --git a/src/TableFunctions/TableFunctionHDFS.cpp b/src/TableFunctions/TableFunctionHDFS.cpp index 2dac4398144..45829245551 100644 --- a/src/TableFunctions/TableFunctionHDFS.cpp +++ b/src/TableFunctions/TableFunctionHDFS.cpp @@ -41,12 +41,6 @@ ColumnsDescription TableFunctionHDFS::getActualTableStructure(ContextPtr context return parseColumnsListFromString(structure, context); } -std::unordered_set TableFunctionHDFS::getVirtualsToCheckBeforeUsingStructureHint() const -{ - auto virtual_column_names = StorageHDFS::getVirtualColumnNames(); - return {virtual_column_names.begin(), virtual_column_names.end()}; -} - void registerTableFunctionHDFS(TableFunctionFactory & factory) { factory.registerFunction(); diff --git a/src/TableFunctions/TableFunctionHDFS.h b/src/TableFunctions/TableFunctionHDFS.h index 3a719496b26..f1c0b8a7eae 100644 --- a/src/TableFunctions/TableFunctionHDFS.h +++ b/src/TableFunctions/TableFunctionHDFS.h @@ -36,8 +36,6 @@ public: ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; - std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override; - private: StoragePtr getStorage( const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context, diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index c00b1e2e3e5..a8c100ebd44 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "registerTableFunctions.h" #include @@ -401,8 +402,7 @@ bool TableFunctionS3::supportsReadingSubsetOfColumns(const ContextPtr & context) std::unordered_set TableFunctionS3::getVirtualsToCheckBeforeUsingStructureHint() const { - auto virtual_column_names = StorageS3::getVirtualColumnNames(); - return {virtual_column_names.begin(), virtual_column_names.end()}; + return VirtualColumnUtils::getVirtualNamesForFileLikeStorage(); } StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool /*is_insert_query*/) const diff --git a/src/TableFunctions/TableFunctionURL.cpp b/src/TableFunctions/TableFunctionURL.cpp index a78b2affa9a..2bdc0b449e0 100644 --- a/src/TableFunctions/TableFunctionURL.cpp +++ b/src/TableFunctions/TableFunctionURL.cpp @@ -159,12 +159,6 @@ ColumnsDescription TableFunctionURL::getActualTableStructure(ContextPtr context, return parseColumnsListFromString(structure, context); } -std::unordered_set TableFunctionURL::getVirtualsToCheckBeforeUsingStructureHint() const -{ - auto virtual_column_names = StorageURL::getVirtualColumnNames(); - return {virtual_column_names.begin(), virtual_column_names.end()}; -} - std::optional TableFunctionURL::tryGetFormatFromFirstArgument() { return FormatFactory::instance().tryGetFormatFromFileName(Poco::URI(filename).getPath()); diff --git a/src/TableFunctions/TableFunctionURL.h b/src/TableFunctions/TableFunctionURL.h index 54e223283ba..a1efddb84c6 100644 --- a/src/TableFunctions/TableFunctionURL.h +++ b/src/TableFunctions/TableFunctionURL.h @@ -36,8 +36,6 @@ public: static void updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure_, const String & format_, const ContextPtr & context); - std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override; - protected: void parseArguments(const ASTPtr & ast, ContextPtr context) override; void parseArgumentsImpl(ASTs & args, const ContextPtr & context) override; diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt index 858b45731a4..31527dc3476 100644 --- a/tests/analyzer_integration_broken_tests.txt +++ b/tests/analyzer_integration_broken_tests.txt @@ -2,4 +2,3 @@ test_build_sets_from_multiple_threads/test.py::test_set test_concurrent_backups_s3/test.py::test_concurrent_backups test_distributed_type_object/test.py::test_distributed_type_object test_merge_table_over_distributed/test.py::test_global_in -test_merge_table_over_distributed/test.py::test_select_table_name_from_merge_over_distributed diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 6b754787d5e..1eec9a6771b 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1111,7 +1111,7 @@ def _configure_jobs( digests: Dict[str, str] = {} print("::group::Job Digests") - for job in CI_CONFIG.job_generator(): + for job in CI_CONFIG.job_generator(pr_info.head_ref): digest = job_digester.get_job_digest(CI_CONFIG.get_digest_config(job)) digests[job] = digest print(f" job [{job.rjust(50)}] has digest [{digest}]") diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 44dea116cbe..df8bfb1c2a8 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -11,6 +11,14 @@ from ci_utils import WithIter from integration_test_images import IMAGES +class WorkFlows(metaclass=WithIter): + PULL_REQUEST = "PULL_REQUEST" + MASTER = "MASTER" + BACKPORT = "BACKPORT" + RELEASE = "RELEASE" + SYNC = "SYNC" + + class CIStages(metaclass=WithIter): NA = "UNKNOWN" BUILDS_1 = "Builds_1" @@ -694,10 +702,11 @@ class CIConfig: ), f"Invalid check_name or CI_CONFIG outdated, config not found for [{check_name}]" return res # type: ignore - def job_generator(self) -> Iterable[str]: + def job_generator(self, branch: str) -> Iterable[str]: """ traverses all check names in CI pipeline """ + assert branch for config in ( self.other_jobs_configs, self.build_config, diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index e5268947304..eebc846f4b1 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -11,7 +11,7 @@ from commit_status_helper import ( get_commit, get_commit_filtered_statuses, post_commit_status, - update_mergeable_check, + trigger_mergeable_check, ) from get_robot_token import get_best_robot_token from pr_info import PRInfo @@ -24,14 +24,11 @@ def main(): pr_info = PRInfo(need_orgs=True) gh = Github(get_best_robot_token(), per_page=100) commit = get_commit(gh, pr_info.sha) - # Update the Mergeable Check at the final step - update_mergeable_check(commit, pr_info, CI_STATUS_NAME) + # Unconditionally update the Mergeable Check at the final step + statuses = get_commit_filtered_statuses(commit) + trigger_mergeable_check(commit, statuses) - statuses = [ - status - for status in get_commit_filtered_statuses(commit) - if status.context == CI_STATUS_NAME - ] + statuses = [s for s in statuses if s.context == CI_STATUS_NAME] if not statuses: return # Take the latest status diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index d004f3ed215..d0565e136d3 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 import argparse +from concurrent.futures import ProcessPoolExecutor import csv import logging import os @@ -119,7 +120,7 @@ def checkout_last_ref(pr_info: PRInfo) -> None: def main(): logging.basicConfig(level=logging.INFO) logging.getLogger("git_helper").setLevel(logging.DEBUG) - args = parse_args() + # args = parse_args() stopwatch = Stopwatch() @@ -127,28 +128,46 @@ def main(): temp_path = Path(TEMP_PATH) temp_path.mkdir(parents=True, exist_ok=True) - pr_info = PRInfo() + # pr_info = PRInfo() IMAGE_NAME = "clickhouse/style-test" image = pull_image(get_docker_image(IMAGE_NAME)) - cmd = ( + cmd_1 = ( f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --cap-add=SYS_PTRACE " f"--volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output " - f"{image}" + f"--entrypoint= -w/ClickHouse/utils/check-style " + f"{image} ./check_cpp_docs.sh" ) + cmd_2 = ( + f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --cap-add=SYS_PTRACE " + f"--volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output " + f"--entrypoint= -w/ClickHouse/utils/check-style " + f"{image} ./check_py.sh" + ) + logging.info("Is going to run the command: %s", cmd_1) + logging.info("Is going to run the command: %s", cmd_2) - if args.push: - checkout_head(pr_info) + with ProcessPoolExecutor(max_workers=2) as executor: + # Submit commands for execution in parallel + future1 = executor.submit(subprocess.run, cmd_1, shell=True) + future2 = executor.submit(subprocess.run, cmd_2, shell=True) + # Wait for both commands to complete + _ = future1.result() + _ = future2.result() + + # if args.push: + # checkout_head(pr_info) - logging.info("Is going to run the command: %s", cmd) subprocess.check_call( - cmd, + f"python3 ../../utils/check-style/process_style_check_result.py --in-results-dir {temp_path} " + f"--out-results-file {temp_path}/test_results.tsv --out-status-file {temp_path}/check_status.tsv || " + f'echo -e "failure\tCannot parse results" > {temp_path}/check_status.tsv', shell=True, ) - if args.push: - commit_push_staged(pr_info) - checkout_last_ref(pr_info) + # if args.push: + # commit_push_staged(pr_info) + # checkout_last_ref(pr_info) state, description, test_results, additional_files = process_result(temp_path) diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py index b47f86a843d..bd80ac25468 100644 --- a/tests/integration/test_replicated_database/test.py +++ b/tests/integration/test_replicated_database/test.py @@ -508,12 +508,9 @@ def test_alters_from_different_replicas(started_cluster): dummy_node.stop_clickhouse(kill=True) settings = {"distributed_ddl_task_timeout": 5} - assert ( - "There are 1 unfinished hosts (0 of them are currently executing the task" - in competing_node.query_and_get_error( - "ALTER TABLE alters_from_different_replicas.concurrent_test ADD COLUMN Added0 UInt32;", - settings=settings, - ) + assert "is not finished on 1 of 3 hosts" in competing_node.query_and_get_error( + "ALTER TABLE alters_from_different_replicas.concurrent_test ADD COLUMN Added0 UInt32;", + settings=settings, ) settings = { "distributed_ddl_task_timeout": 5, diff --git a/tests/integration/test_replicated_database_cluster_groups/test.py b/tests/integration/test_replicated_database_cluster_groups/test.py index 647626d8014..91361c1850b 100644 --- a/tests/integration/test_replicated_database_cluster_groups/test.py +++ b/tests/integration/test_replicated_database_cluster_groups/test.py @@ -95,12 +95,9 @@ def test_cluster_groups(started_cluster): # Exception main_node_2.stop_clickhouse() settings = {"distributed_ddl_task_timeout": 5} - assert ( - "There are 1 unfinished hosts (0 of them are currently executing the task)" - in main_node_1.query_and_get_error( - "CREATE TABLE cluster_groups.table_2 (d Date, k UInt64) ENGINE=ReplicatedMergeTree ORDER BY k PARTITION BY toYYYYMM(d);", - settings=settings, - ) + assert "is not finished on 1 of 2 hosts" in main_node_1.query_and_get_error( + "CREATE TABLE cluster_groups.table_2 (d Date, k UInt64) ENGINE=ReplicatedMergeTree ORDER BY k PARTITION BY toYYYYMM(d);", + settings=settings, ) # 3. After start both groups are synced diff --git a/tests/performance/multiif.xml b/tests/performance/multiif.xml index ad56ab3f5f2..0c2d95cc553 100644 --- a/tests/performance/multiif.xml +++ b/tests/performance/multiif.xml @@ -5,4 +5,12 @@ select count(1) from test_multiif_t where multiIf(d > 2, d-2, d > 1, d-1, d >0, d, 0) > 1 SETTINGS max_threads=1 DROP TABLE IF EXISTS test_multiif_t + + + + SELECT count() FROM zeros(10000000) WHERE NOT ignore(multiIf( rand(1) % 2 = 0, materialize(1::Nullable(Decimal256(3))), rand(2) % 2 = 0, materialize(2::Nullable(Decimal256(3))), rand(3) % 2 = 0, materialize(3::Nullable(Decimal256(3))), rand(4) % 2 = 0, materialize(4::Nullable(Decimal256(3))), rand(5) % 2 = 0, materialize(5::Nullable(Decimal256(3))), materialize(6::Nullable(Decimal256(3))))) + SELECT count() FROM zeros(10000000) WHERE NOT ignore(multiIf( rand(1) % 2 = 0, materialize(1::Decimal256(3)), rand(2) % 2 = 0, materialize(2::Decimal256(3)), rand(3) % 2 = 0, materialize(3::Decimal256(3)), rand(4) % 2 = 0, materialize(4::Decimal256(3)), rand(5) % 2 = 0, materialize(5::Decimal256(3)), materialize(6::Decimal256(3)))) + + + SELECT count() FROM zeros(10000000) WHERE NOT ignore(multiIf(rand() % 2 = 0, rand()+2, rand() % 3 = 0, rand()+3, rand() % 4 = 0, rand()+4, rand() % 5 = 0, rand() + 5, rand() % 6 = 0, rand() + 6, rand())) diff --git a/tests/queries/0_stateless/00597_push_down_predicate_long.reference b/tests/queries/0_stateless/00597_push_down_predicate_long.reference index 04b0432c0a9..2c46edc98bf 100644 --- a/tests/queries/0_stateless/00597_push_down_predicate_long.reference +++ b/tests/queries/0_stateless/00597_push_down_predicate_long.reference @@ -114,7 +114,7 @@ FROM ( SELECT 1 AS id, - identity(_CAST(1, \'Nullable(UInt8)\')) AS subquery + __scalarSubqueryResult(_CAST(1, \'Nullable(UInt8)\')) AS subquery WHERE subquery = 1 ) WHERE subquery = 1 diff --git a/tests/queries/0_stateless/01029_early_constant_folding.reference b/tests/queries/0_stateless/01029_early_constant_folding.reference index abcb2ddc6a7..4df5414ba4a 100644 --- a/tests/queries/0_stateless/01029_early_constant_folding.reference +++ b/tests/queries/0_stateless/01029_early_constant_folding.reference @@ -2,7 +2,7 @@ SELECT 1 WHERE 0 SELECT 1 SELECT 1 -WHERE (1 IN (0, 2)) AND (2 = (identity(_CAST(2, \'Nullable(UInt8)\')) AS subquery)) +WHERE (1 IN (0, 2)) AND (2 = (__scalarSubqueryResult(_CAST(2, \'Nullable(UInt8)\')) AS subquery)) SELECT 1 WHERE 1 IN (( SELECT arrayJoin([1, 2, 3]) diff --git a/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.reference b/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.reference index b9a66a1e1a9..2151328d8b7 100644 --- a/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.reference +++ b/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.reference @@ -3,7 +3,7 @@ Received exception from server: Code: 57. Error: Received from localhost:9000. Error: There was an error on [localhost:9000]: Code: 57. Error: Table default.none already exists. (TABLE_ALREADY_EXISTS) (query: create table none on cluster test_shard_localhost (n int) engine=Memory;) Received exception from server: -Code: 159. Error: Received from localhost:9000. Error: Watching task is executing longer than distributed_ddl_task_timeout (=1) seconds. There are 1 unfinished hosts (0 of them are currently executing the task), they are going to execute the query in background. (TIMEOUT_EXCEEDED) +Code: 159. Error: Received from localhost:9000. Error: Distributed DDL task is not finished on 1 of 2 hosts (0 of them are currently executing the task, 0 are inactive). They are going to execute the query in background. Was waiting for seconds, which is longer than distributed_ddl_task_timeout. (TIMEOUT_EXCEEDED) (query: drop table if exists none on cluster test_unavailable_shard;) throw localhost 9000 0 0 0 @@ -12,7 +12,7 @@ Code: 57. Error: Received from localhost:9000. Error: There was an error on [loc (query: create table throw on cluster test_shard_localhost (n int) engine=Memory format Null;) localhost 9000 0 1 0 Received exception from server: -Code: 159. Error: Received from localhost:9000. Error: Watching task is executing longer than distributed_ddl_task_timeout (=1) seconds. There are 1 unfinished hosts (0 of them are currently executing the task), they are going to execute the query in background. (TIMEOUT_EXCEEDED) +Code: 159. Error: Received from localhost:9000. Error: Distributed DDL task is not finished on 1 of 2 hosts (0 of them are currently executing the task, 0 are inactive). They are going to execute the query in background. Was waiting for seconds, which is longer than distributed_ddl_task_timeout. (TIMEOUT_EXCEEDED) (query: drop table if exists throw on cluster test_unavailable_shard;) null_status_on_timeout localhost 9000 0 0 0 diff --git a/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.sh b/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.sh index 12e142adda9..f17e85da60a 100755 --- a/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.sh +++ b/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.sh @@ -54,14 +54,14 @@ $CLIENT --distributed_ddl_output_mode=none -q "create table none on cluster test $CLIENT --distributed_ddl_output_mode=none -q "create table none on cluster test_shard_localhost (n int) engine=Memory;" 2>&1 | sed "s/DB::Exception/Error/g" | sed "s/ (version.*)//" # Timeout -run_until_out_contains 'There are 1 unfinished hosts' $CLICKHOUSE_CLIENT_WITH_SETTINGS --distributed_ddl_output_mode=none -q "drop table if exists none on cluster test_unavailable_shard;" 2>&1 | sed "s/DB::Exception/Error/g" | sed "s/ (version.*)//" | sed "s/Watching task .* is executing longer/Watching task is executing longer/" +run_until_out_contains 'not finished on 1 ' $CLICKHOUSE_CLIENT_WITH_SETTINGS --distributed_ddl_output_mode=none -q "drop table if exists none on cluster test_unavailable_shard;" 2>&1 | sed "s/DB::Exception/Error/g" | sed "s/ (version.*)//" | sed "s/Distributed DDL task .* is not finished/Distributed DDL task is not finished/" | sed "s/for .* seconds/for seconds/" $CLIENT --distributed_ddl_output_mode=throw -q "select value from system.settings where name='distributed_ddl_output_mode';" $CLIENT --distributed_ddl_output_mode=throw -q "create table throw on cluster test_shard_localhost (n int) engine=Memory;" $CLIENT --distributed_ddl_output_mode=throw -q "create table throw on cluster test_shard_localhost (n int) engine=Memory format Null;" 2>&1 | sed "s/DB::Exception/Error/g" | sed "s/ (version.*)//" -run_until_out_contains 'There are 1 unfinished hosts' $CLICKHOUSE_CLIENT_WITH_SETTINGS --distributed_ddl_output_mode=throw -q "drop table if exists throw on cluster test_unavailable_shard;" 2>&1 | sed "s/DB::Exception/Error/g" | sed "s/ (version.*)//" | sed "s/Watching task .* is executing longer/Watching task is executing longer/" +run_until_out_contains 'not finished on 1 ' $CLICKHOUSE_CLIENT_WITH_SETTINGS --distributed_ddl_output_mode=throw -q "drop table if exists throw on cluster test_unavailable_shard;" 2>&1 | sed "s/DB::Exception/Error/g" | sed "s/ (version.*)//" | sed "s/Distributed DDL task .* is not finished/Distributed DDL task is not finished/" | sed "s/for .* seconds/for seconds/" $CLIENT --distributed_ddl_output_mode=null_status_on_timeout -q "select value from system.settings where name='distributed_ddl_output_mode';" diff --git a/tests/queries/0_stateless/01281_parseDateTime64BestEffort.sql b/tests/queries/0_stateless/01281_parseDateTime64BestEffort.sql index c1cec6ea212..808eaf291d5 100644 --- a/tests/queries/0_stateless/01281_parseDateTime64BestEffort.sql +++ b/tests/queries/0_stateless/01281_parseDateTime64BestEffort.sql @@ -7,7 +7,7 @@ SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 'bar'); -- {ser SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 3, 4); -- {serverError 43} -- invalid timezone parameter SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 3, 'baz'); -- {serverError BAD_ARGUMENTS} -- unknown timezone -SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', materialize(3), 4); -- {serverError 44} -- non-const precision +SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', materialize(3), 4); -- {serverError 43, 44} -- non-const precision SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184Z', 3, materialize('UTC')); -- {serverError 44} -- non-const timezone SELECT parseDateTime64BestEffort('2020-05-14T03:37:03.253184012345678910111213141516171819Z', 3, 'UTC'); -- {serverError 6} diff --git a/tests/queries/0_stateless/01611_constant_folding_subqueries.reference b/tests/queries/0_stateless/01611_constant_folding_subqueries.reference index c3df2314112..327a4694aa8 100644 --- a/tests/queries/0_stateless/01611_constant_folding_subqueries.reference +++ b/tests/queries/0_stateless/01611_constant_folding_subqueries.reference @@ -5,7 +5,13 @@ SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n) FO 1,10 EXPLAIN SYNTAX SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n); SELECT - identity(_CAST(0, \'Nullable(UInt64)\')) AS n, + __scalarSubqueryResult(_CAST(0, \'Nullable(UInt64)\')) AS n, toUInt64(10 / n) SELECT * FROM (WITH (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n) as q SELECT * FROM system.one WHERE q > 0); 0 +SELECT * FROM (SELECT (SELECT '\d[a-z]') AS n, extractAll('5abc', assumeNotNull(n))) FORMAT CSV; +"\d[a-z]","['5a']" +EXPLAIN SYNTAX SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n); +SELECT + __scalarSubqueryResult(_CAST(0, \'Nullable(UInt64)\')) AS n, + toUInt64(10 / n) diff --git a/tests/queries/0_stateless/01611_constant_folding_subqueries.sql b/tests/queries/0_stateless/01611_constant_folding_subqueries.sql index 59f057d1ec5..b30fb43f621 100644 --- a/tests/queries/0_stateless/01611_constant_folding_subqueries.sql +++ b/tests/queries/0_stateless/01611_constant_folding_subqueries.sql @@ -3,3 +3,6 @@ SELECT * FROM (SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUI SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n) FORMAT CSV; EXPLAIN SYNTAX SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n); SELECT * FROM (WITH (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n) as q SELECT * FROM system.one WHERE q > 0); + +SELECT * FROM (SELECT (SELECT '\d[a-z]') AS n, extractAll('5abc', assumeNotNull(n))) FORMAT CSV; +EXPLAIN SYNTAX SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n); diff --git a/tests/queries/0_stateless/01848_partition_value_column.sql b/tests/queries/0_stateless/01848_partition_value_column.sql index 28d842af3e9..de5e766c92c 100644 --- a/tests/queries/0_stateless/01848_partition_value_column.sql +++ b/tests/queries/0_stateless/01848_partition_value_column.sql @@ -14,8 +14,8 @@ select count() from tbl where _partition_value.3 = 4 settings max_rows_to_read = create table tbl2(i int) engine MergeTree order by i; insert into tbl2 values (1); -select _partition_value from tbl2; -- { serverError 16 } -select _partition_value from tbl2 group by 1; -- { serverError 16 } +select _partition_value from tbl2; -- { serverError UNKNOWN_IDENTIFIER } +select _partition_value from tbl2 group by 1; -- { serverError UNKNOWN_IDENTIFIER } drop table tbl; drop table tbl2; diff --git a/tests/queries/0_stateless/02116_tuple_element.sql b/tests/queries/0_stateless/02116_tuple_element.sql index 97f6c049705..64d9b9db331 100644 --- a/tests/queries/0_stateless/02116_tuple_element.sql +++ b/tests/queries/0_stateless/02116_tuple_element.sql @@ -19,7 +19,7 @@ SELECT tupleElement(t1) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMEN SELECT tupleElement(t1, 'b') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } SELECT tupleElement(t1, 0) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } SELECT tupleElement(t1, 3) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT, NOT_FOUND_COLUMN_IN_BLOCK } SELECT t2.1 FROM t_tuple_element; EXPLAIN SYNTAX SELECT t2.1 FROM t_tuple_element; @@ -31,7 +31,7 @@ SELECT tupleElement(t2) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMEN SELECT tupleElement(t2, 'a') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } SELECT tupleElement(t2, 0) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } SELECT tupleElement(t2, 3) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT, NOT_FOUND_COLUMN_IN_BLOCK } DROP TABLE t_tuple_element; diff --git a/tests/queries/0_stateless/02227_union_match_by_name.reference b/tests/queries/0_stateless/02227_union_match_by_name.reference index c28035fab49..d726ae86de7 100644 --- a/tests/queries/0_stateless/02227_union_match_by_name.reference +++ b/tests/queries/0_stateless/02227_union_match_by_name.reference @@ -36,7 +36,7 @@ Header: avgWeighted(x, y) Nullable(Float64) Header: x Nullable(Nothing) y UInt8 Expression (Projection) - Header: NULL_Nullable(Nothing) Nullable(Nothing) + Header: _CAST(NULL_Nullable(Nothing), \'Nullable(Nothing)\'_String) Nullable(Nothing) 1_UInt8 UInt8 Expression (Change column names to column identifiers) Header: __table5.dummy UInt8 diff --git a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.reference b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.reference index 2455f50b7f2..e88abb35ab4 100644 --- a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.reference +++ b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.reference @@ -50,6 +50,8 @@ SOME GRANULES FILTERED OUT 100002 foo PREWHERE 301408 164953047376 164953047376 +335872 166463369216 166463369216 +301407 164952947376 164952947376 42 10042 20042 diff --git a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql index 73ae6eb499f..5af6565c03d 100644 --- a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql +++ b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql @@ -52,7 +52,7 @@ SELECT _part_offset, foo FROM t_1 where granule == 0 AND _part_offset >= 100000 SELECT 'PREWHERE'; SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere granule == 0 where _part_offset >= 100000; -SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part != '' where granule == 0; -- { serverError 10, 16 } -SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part_offset > 100000 where granule == 0; -- { serverError 10, 16 } +SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part != '' where granule == 0; +SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part_offset > 100000 where granule == 0; SELECT _part_offset FROM t_1 PREWHERE order_0 % 10000 == 42 ORDER BY order_0 LIMIT 3; SELECT _part_offset, foo FROM t_1 PREWHERE order_0 % 10000 == 42 ORDER BY order_0 LIMIT 3; diff --git a/tests/queries/0_stateless/02265_test_dns_profile_events.sh b/tests/queries/0_stateless/02265_test_dns_profile_events.sh index 756a761a0ae..50fa6ba2cda 100755 --- a/tests/queries/0_stateless/02265_test_dns_profile_events.sh +++ b/tests/queries/0_stateless/02265_test_dns_profile_events.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel +# Tags: no-parallel, no-fasttest CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02294_system_certificates.reference b/tests/queries/0_stateless/02294_system_certificates.reference index 4655f996c40..432df9110a2 100644 --- a/tests/queries/0_stateless/02294_system_certificates.reference +++ b/tests/queries/0_stateless/02294_system_certificates.reference @@ -1,10 +1,10 @@ -version Int32 -serial_number Nullable(String) -signature_algo Nullable(String) -issuer Nullable(String) -not_before Nullable(String) -not_after Nullable(String) -subject Nullable(String) -pkey_algo Nullable(String) -path String -default UInt8 +version Int32 Version of the certificate. Values are 0 for v1, 1 for v2, 2 for v3. +serial_number Nullable(String) Serial Number of the certificate assigned by the issuer. +signature_algo Nullable(String) Signature Algorithm - an algorithm used by the issuer to sign this certificate. +issuer Nullable(String) Issuer - an unique identifier for the Certificate Authority issuing this certificate. +not_before Nullable(String) The beginning of the time window when this certificate is valid. +not_after Nullable(String) The end of the time window when this certificate is valid. +subject Nullable(String) Subject - identifies the owner of the public key. +pkey_algo Nullable(String) Public Key Algorithm defines the algorithm the public key can be used with. +path String Path to the file or directory containing this certificate. +default UInt8 Certificate is in the default certificate location. diff --git a/tests/queries/0_stateless/02326_settings_changes_system_table.reference b/tests/queries/0_stateless/02326_settings_changes_system_table.reference index 1c8c4fa1880..946b2727d30 100644 --- a/tests/queries/0_stateless/02326_settings_changes_system_table.reference +++ b/tests/queries/0_stateless/02326_settings_changes_system_table.reference @@ -1,3 +1,3 @@ -version String -changes Array(Tuple(\n name String,\n previous_value String,\n new_value String,\n reason String)) +version String The ClickHouse server version. +changes Array(Tuple(\n name String,\n previous_value String,\n new_value String,\n reason String)) The list of changes in settings which changed the behaviour of ClickHouse. 22.5 [('memory_overcommit_ratio_denominator','0','1073741824','Enable memory overcommit feature by default'),('memory_overcommit_ratio_denominator_for_user','0','1073741824','Enable memory overcommit feature by default')] diff --git a/tests/queries/0_stateless/02366_kql_mvexpand.sql b/tests/queries/0_stateless/02366_kql_mvexpand.sql index e7798609646..af336a19638 100644 --- a/tests/queries/0_stateless/02366_kql_mvexpand.sql +++ b/tests/queries/0_stateless/02366_kql_mvexpand.sql @@ -33,3 +33,7 @@ print '-- mv_expand_test_table | mv-expand with_itemindex=index c,d to typeof(bo mv_expand_test_table | mv-expand with_itemindex=index c,d to typeof(bool); print '-- mv_expand_test_table | mv-expand c to typeof(bool) --'; mv_expand_test_table | mv-expand c to typeof(bool); +SET max_query_size = 28; +SET dialect='kusto'; +mv_expand_test_table | mv-expand c, d; -- { serverError SYNTAX_ERROR } +SET max_query_size=262144; diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 379eea4dbbb..cd776611857 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -62,6 +62,7 @@ __bitBoolMaskOr __bitSwapLastTwo __bitWrapperFunc __getScalar +__scalarSubqueryResult abs accurateCast accurateCastOrDefault diff --git a/tests/queries/0_stateless/02447_drop_database_replica.reference b/tests/queries/0_stateless/02447_drop_database_replica.reference index 1af3ee244f1..bd3b689ca3c 100644 --- a/tests/queries/0_stateless/02447_drop_database_replica.reference +++ b/tests/queries/0_stateless/02447_drop_database_replica.reference @@ -13,6 +13,7 @@ t rdb_default 1 1 s1 r1 1 2 2 +2 s1 r1 OK 2 0 s1 r2 QUEUED 2 0 s2 r1 QUEUED 2 0 @@ -24,4 +25,5 @@ rdb_default 1 2 s1 r2 0 t t2 t3 +t4 rdb_default_4 1 1 s1 r1 1 diff --git a/tests/queries/0_stateless/02447_drop_database_replica.sh b/tests/queries/0_stateless/02447_drop_database_replica.sh index fb89db5045b..5c4604bc8cd 100755 --- a/tests/queries/0_stateless/02447_drop_database_replica.sh +++ b/tests/queries/0_stateless/02447_drop_database_replica.sh @@ -33,8 +33,9 @@ $CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num, database_shard_na $CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from database $db2" 2>&1| grep -Fac "is active, cannot drop it" # Also check that it doesn't exceed distributed_ddl_task_timeout waiting for inactive replicas -timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t2 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED" -timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=null_status_on_timeout_only_active -q "create table $db.t3 (n int) engine=Log" | sort +timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=none_only_active -q "create table $db.t2 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED" +timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t3 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED" +timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=null_status_on_timeout_only_active -q "create table $db.t4 (n int) engine=Log" | sort $CLICKHOUSE_CLIENT -q "detach database $db3" $CLICKHOUSE_CLIENT -q "system drop database replica 'r1' from shard 's2' from database $db" diff --git a/tests/queries/0_stateless/02534_join_prewhere_bug_44062.reference b/tests/queries/0_stateless/02534_join_prewhere_bug.reference similarity index 88% rename from tests/queries/0_stateless/02534_join_prewhere_bug_44062.reference rename to tests/queries/0_stateless/02534_join_prewhere_bug.reference index aaef17371d8..115ea994de1 100644 --- a/tests/queries/0_stateless/02534_join_prewhere_bug_44062.reference +++ b/tests/queries/0_stateless/02534_join_prewhere_bug.reference @@ -34,5 +34,12 @@ ORDER BY test2.col1 ; 5600 123 123 5601 321 -32 +SELECT col2, col2 + 1 FROM test1 +FULL OUTER JOIN test2 USING (col1) +PREWHERE (col2 * 2) :: UInt8 +; +123 124 +-32 -31 +-30 -29 DROP TABLE IF EXISTS test1; DROP TABLE IF EXISTS test2; diff --git a/tests/queries/0_stateless/02534_join_prewhere_bug_44062.sql b/tests/queries/0_stateless/02534_join_prewhere_bug.sql similarity index 92% rename from tests/queries/0_stateless/02534_join_prewhere_bug_44062.sql rename to tests/queries/0_stateless/02534_join_prewhere_bug.sql index 073f81e4ff3..016c92597ec 100644 --- a/tests/queries/0_stateless/02534_join_prewhere_bug_44062.sql +++ b/tests/queries/0_stateless/02534_join_prewhere_bug.sql @@ -42,5 +42,10 @@ WHERE test2.col1 IS NOT NULL ORDER BY test2.col1 ; +SELECT col2, col2 + 1 FROM test1 +FULL OUTER JOIN test2 USING (col1) +PREWHERE (col2 * 2) :: UInt8 +; + DROP TABLE IF EXISTS test1; DROP TABLE IF EXISTS test2; diff --git a/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_query_tree_rewrite.reference b/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_query_tree_rewrite.reference index 63658890119..fca48238778 100644 --- a/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_query_tree_rewrite.reference +++ b/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_query_tree_rewrite.reference @@ -24,21 +24,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 15, constant_value: \'1994-01-01\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 14, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date_t @@ -66,21 +66,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 15, constant_value: \'1994-01-01\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 14, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date_t @@ -244,21 +244,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 15, constant_value: \'1998-01-01\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 14, constant_value: \'1998-01-01\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date_t @@ -289,34 +289,34 @@ QUERY id: 0 FUNCTION id: 14, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 15, nodes: 2 - COLUMN id: 16, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 17, constant_value: \'1994-01-01\', constant_value_type: String - FUNCTION id: 18, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 12, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 16, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 17, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - FUNCTION id: 20, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + FUNCTION id: 19, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 21, nodes: 2 - COLUMN id: 22, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 23, constant_value: \'1994-01-01\', constant_value_type: String - FUNCTION id: 24, function_name: less, function_type: ordinary, result_type: UInt8 + LIST id: 20, nodes: 2 + COLUMN id: 12, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 21, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 22, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 25, nodes: 2 - COLUMN id: 26, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 27, constant_value: \'1995-01-01\', constant_value_type: String - FUNCTION id: 28, function_name: and, function_type: ordinary, result_type: UInt8 + LIST id: 23, nodes: 2 + COLUMN id: 12, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 24, constant_value: \'1995-01-01\', constant_value_type: String + FUNCTION id: 25, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 29, nodes: 2 - FUNCTION id: 30, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 26, nodes: 2 + FUNCTION id: 27, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 31, nodes: 2 - COLUMN id: 32, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 33, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 34, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 28, nodes: 2 + COLUMN id: 29, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 30, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 31, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 35, nodes: 2 - COLUMN id: 32, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 36, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 32, nodes: 2 + COLUMN id: 29, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 33, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1, @@ -346,26 +346,26 @@ QUERY id: 0 FUNCTION id: 11, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 12, nodes: 2 - COLUMN id: 13, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 14, constant_value: \'1993-01-01\', constant_value_type: String - FUNCTION id: 15, function_name: less, function_type: ordinary, result_type: UInt8 + COLUMN id: 6, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 13, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 14, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 16, nodes: 2 - COLUMN id: 17, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 18, constant_value: \'1994-01-01\', constant_value_type: String - FUNCTION id: 19, function_name: and, function_type: ordinary, result_type: UInt8 + LIST id: 15, nodes: 2 + COLUMN id: 6, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 16, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 17, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 20, nodes: 2 - FUNCTION id: 21, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + FUNCTION id: 19, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 22, nodes: 2 - COLUMN id: 23, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 25, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 20, nodes: 2 + COLUMN id: 21, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 22, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 23, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 26, nodes: 2 - COLUMN id: 23, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 27, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 24, nodes: 2 + COLUMN id: 21, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 25, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date_t @@ -425,22 +425,22 @@ QUERY id: 0 FUNCTION id: 10, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 11, nodes: 2 - COLUMN id: 12, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 13, constant_value: \'1994-01-01\', constant_value_type: String + COLUMN id: 8, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 12, constant_value: \'1994-01-01\', constant_value_type: String WHERE - FUNCTION id: 14, function_name: and, function_type: ordinary, result_type: UInt8 + FUNCTION id: 13, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 15, nodes: 2 - FUNCTION id: 16, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - COLUMN id: 18, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 19, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 20, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + COLUMN id: 17, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 18, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 19, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 21, nodes: 2 - COLUMN id: 18, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 22, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 20, nodes: 2 + COLUMN id: 17, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date_t @@ -479,8 +479,8 @@ QUERY id: 0 FUNCTION id: 19, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 20, nodes: 2 - COLUMN id: 21, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 22, constant_value: \'1994-01-01\', constant_value_type: String + COLUMN id: 17, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 21, constant_value: \'1994-01-01\', constant_value_type: String SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date_t @@ -582,21 +582,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 15, constant_value: \'1994-01-01\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 14, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date_t @@ -624,21 +624,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 15, constant_value: \'1992-04-01\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 14, constant_value: \'1992-04-01\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date_t @@ -666,21 +666,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 15, constant_value: \'1992-04-01\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 14, constant_value: \'1992-04-01\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date_t @@ -847,26 +847,26 @@ QUERY id: 0 FUNCTION id: 14, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 15, nodes: 2 - COLUMN id: 16, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 17, constant_value: \'1993-01-01\', constant_value_type: String - FUNCTION id: 18, function_name: less, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 16, constant_value: \'1993-01-01\', constant_value_type: String + FUNCTION id: 17, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 21, constant_value: \'1994-01-01\', constant_value_type: String - FUNCTION id: 22, function_name: and, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 10, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 19, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 20, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - FUNCTION id: 24, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 21, nodes: 2 + FUNCTION id: 22, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 25, nodes: 2 - COLUMN id: 26, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 27, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 28, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 23, nodes: 2 + COLUMN id: 24, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 25, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 26, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 29, nodes: 2 - COLUMN id: 26, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 30, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 27, nodes: 2 + COLUMN id: 24, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 28, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM datetime_t @@ -894,21 +894,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: DateTime, source_id: 3 - CONSTANT id: 15, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: DateTime, source_id: 3 + CONSTANT id: 14, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM datetime_t @@ -936,21 +936,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: DateTime, source_id: 3 - CONSTANT id: 15, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: DateTime, source_id: 3 + CONSTANT id: 14, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date32_t @@ -978,21 +978,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: Date32, source_id: 3 - CONSTANT id: 15, constant_value: \'1994-01-01\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: Date32, source_id: 3 + CONSTANT id: 14, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM date32_t @@ -1020,21 +1020,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: Date32, source_id: 3 - CONSTANT id: 15, constant_value: \'1994-01-01\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: Date32, source_id: 3 + CONSTANT id: 14, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM datetime64_t @@ -1062,21 +1062,21 @@ QUERY id: 0 FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: DateTime64(3), source_id: 3 - CONSTANT id: 15, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: DateTime64(3), source_id: 3 + CONSTANT id: 14, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT value1 FROM datetime64_t @@ -1104,19 +1104,19 @@ QUERY id: 0 FUNCTION id: 12, function_name: less, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 13, nodes: 2 - COLUMN id: 14, column_name: date1, result_type: DateTime64(3), source_id: 3 - CONSTANT id: 15, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String - FUNCTION id: 16, function_name: and, function_type: ordinary, result_type: UInt8 + COLUMN id: 10, column_name: date1, result_type: DateTime64(3), source_id: 3 + CONSTANT id: 14, constant_value: \'1994-01-01 00:00:00\', constant_value_type: String + FUNCTION id: 15, function_name: and, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 17, nodes: 2 - FUNCTION id: 18, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 21, constant_value: UInt64_1, constant_value_type: UInt8 - FUNCTION id: 22, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 20, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 23, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - CONSTANT id: 24, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 22, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + CONSTANT id: 23, constant_value: UInt64_3, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 diff --git a/tests/queries/0_stateless/02813_seriesDecomposeSTL.sql b/tests/queries/0_stateless/02813_seriesDecomposeSTL.sql index 929d0474e09..496267f2476 100644 --- a/tests/queries/0_stateless/02813_seriesDecomposeSTL.sql +++ b/tests/queries/0_stateless/02813_seriesDecomposeSTL.sql @@ -1,6 +1,3 @@ --- Tags: no-cpu-aarch64 --- Tag no-cpu-aarch64: values generated are slighly different on aarch64 - DROP TABLE IF EXISTS tb2; CREATE TABLE tb2 (`period` UInt32, `ts` Array(Float64)) ENGINE = Memory; diff --git a/tests/queries/0_stateless/02890_describe_table_options.reference b/tests/queries/0_stateless/02890_describe_table_options.reference index 5d99df36bb4..ff58202ae49 100644 --- a/tests/queries/0_stateless/02890_describe_table_options.reference +++ b/tests/queries/0_stateless/02890_describe_table_options.reference @@ -2,237 +2,197 @@ SET describe_compact_output = 0, describe_include_virtual_columns = 0, describe_include_subcolumns = 0; DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes; -┌─name─┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┐ -│ id │ UInt64 │ │ │ index column │ │ │ -│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ -│ t │ Tuple( - a String, - b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ -└──────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┘ +┌─name─┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┐ +│ id │ UInt64 │ │ │ index column │ │ │ +│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ +│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ +└──────┴───────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┘ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes; -┌─name─┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┐ -│ id │ UInt64 │ │ │ index column │ │ │ -│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ -│ t │ Tuple( - a String, - b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ -└──────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┘ +┌─name─┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┐ +│ id │ UInt64 │ │ │ index column │ │ │ +│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ +│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ +└──────┴───────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┘ SET describe_compact_output = 0, describe_include_virtual_columns = 0, describe_include_subcolumns = 1; DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes; -┌─name──────┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┐ -│ id │ UInt64 │ │ │ index column │ │ │ 0 │ -│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ -│ arr.size0 │ UInt64 │ │ │ │ │ │ 1 │ -│ t.a │ String │ │ │ │ ZSTD(1) │ │ 1 │ -│ t.b │ UInt64 │ │ │ │ ZSTD(1) │ │ 1 │ -└───────────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴──────────────┘ +┌─name──────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┐ +│ id │ UInt64 │ │ │ index column │ │ │ 0 │ +│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ +│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ +│ arr.size0 │ UInt64 │ │ │ │ │ │ 1 │ +│ t.a │ String │ │ │ │ ZSTD(1) │ │ 1 │ +│ t.b │ UInt64 │ │ │ │ ZSTD(1) │ │ 1 │ +└───────────┴───────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴──────────────┘ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes; -┌─name──────┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┐ -│ id │ UInt64 │ │ │ index column │ │ │ 0 │ -│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ -│ arr.size0 │ UInt64 │ │ │ │ │ │ 1 │ -│ t.a │ String │ │ │ │ ZSTD(1) │ │ 1 │ -│ t.b │ UInt64 │ │ │ │ ZSTD(1) │ │ 1 │ -└───────────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴──────────────┘ +┌─name──────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┐ +│ id │ UInt64 │ │ │ index column │ │ │ 0 │ +│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ +│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ +│ arr.size0 │ UInt64 │ │ │ │ │ │ 1 │ +│ t.a │ String │ │ │ │ ZSTD(1) │ │ 1 │ +│ t.b │ UInt64 │ │ │ │ ZSTD(1) │ │ 1 │ +└───────────┴───────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴──────────────┘ SET describe_compact_output = 0, describe_include_virtual_columns = 1, describe_include_subcolumns = 0; DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes; -┌─name─────────────┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_virtual─┐ -│ id │ UInt64 │ │ │ index column │ │ │ 0 │ -│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ -│ _part │ LowCardinality(String) │ │ │ │ │ │ 1 │ -│ _part_index │ UInt64 │ │ │ │ │ │ 1 │ -│ _part_uuid │ UUID │ │ │ │ │ │ 1 │ -│ _partition_id │ LowCardinality(String) │ │ │ │ │ │ 1 │ -│ _partition_value │ UInt8 │ │ │ │ │ │ 1 │ -│ _sample_factor │ Float64 │ │ │ │ │ │ 1 │ -│ _part_offset │ UInt64 │ │ │ │ │ │ 1 │ -│ _row_exists │ UInt8 │ │ │ │ │ │ 1 │ -│ _block_number │ UInt64 │ │ │ │ │ │ 1 │ -└──────────────────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴────────────┘ +┌─name───────────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment─────────────────────────────────────────────────────────────────────────────────┬─codec_expression─┬─ttl_expression─┬─is_virtual─┐ +│ id │ UInt64 │ │ │ index column │ │ │ 0 │ +│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ +│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ +│ _part │ LowCardinality(String) │ │ │ Name of part │ │ │ 1 │ +│ _part_index │ UInt64 │ │ │ Sequential index of the part in the query result │ │ │ 1 │ +│ _part_uuid │ UUID │ │ │ Unique part identifier (if enabled MergeTree setting assign_part_uuids) │ │ │ 1 │ +│ _partition_id │ LowCardinality(String) │ │ │ Name of partition │ │ │ 1 │ +│ _sample_factor │ Float64 │ │ │ Sample factor (from the query) │ │ │ 1 │ +│ _part_offset │ UInt64 │ │ │ Number of row in the part │ │ │ 1 │ +│ _row_exists │ UInt8 │ │ │ Persisted mask created by lightweight delete that show whether row exists or is deleted │ │ │ 1 │ +│ _block_number │ UInt64 │ │ │ Persisted original number of block that was assigned at insert │ Delta, LZ4 │ │ 1 │ +└────────────────┴───────────────────────────┴──────────────┴────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────┴──────────────────┴────────────────┴────────────┘ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes; -┌─name───────────┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_virtual─┐ -│ id │ UInt64 │ │ │ index column │ │ │ 0 │ -│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ -│ _table │ LowCardinality(String) │ │ │ │ │ │ 1 │ -│ _part │ LowCardinality(String) │ │ │ │ │ │ 1 │ -│ _part_index │ UInt64 │ │ │ │ │ │ 1 │ -│ _part_uuid │ UUID │ │ │ │ │ │ 1 │ -│ _partition_id │ LowCardinality(String) │ │ │ │ │ │ 1 │ -│ _sample_factor │ Float64 │ │ │ │ │ │ 1 │ -│ _part_offset │ UInt64 │ │ │ │ │ │ 1 │ -│ _row_exists │ UInt8 │ │ │ │ │ │ 1 │ -│ _block_number │ UInt64 │ │ │ │ │ │ 1 │ -│ _shard_num │ UInt32 │ │ │ │ │ │ 1 │ -└────────────────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴────────────┘ +┌─name───────────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment─────────────────────────────────────────────────────────────────────────────────┬─codec_expression─┬─ttl_expression─┬─is_virtual─┐ +│ id │ UInt64 │ │ │ index column │ │ │ 0 │ +│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ +│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ +│ _part │ LowCardinality(String) │ │ │ Name of part │ │ │ 1 │ +│ _part_index │ UInt64 │ │ │ Sequential index of the part in the query result │ │ │ 1 │ +│ _part_uuid │ UUID │ │ │ Unique part identifier (if enabled MergeTree setting assign_part_uuids) │ │ │ 1 │ +│ _partition_id │ LowCardinality(String) │ │ │ Name of partition │ │ │ 1 │ +│ _sample_factor │ Float64 │ │ │ Sample factor (from the query) │ │ │ 1 │ +│ _part_offset │ UInt64 │ │ │ Number of row in the part │ │ │ 1 │ +│ _row_exists │ UInt8 │ │ │ Persisted mask created by lightweight delete that show whether row exists or is deleted │ │ │ 1 │ +│ _block_number │ UInt64 │ │ │ Persisted original number of block that was assigned at insert │ Delta, LZ4 │ │ 1 │ +│ _shard_num │ UInt32 │ │ │ Deprecated. Use function shardNum instead │ │ │ 1 │ +└────────────────┴───────────────────────────┴──────────────┴────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────┴──────────────────┴────────────────┴────────────┘ SET describe_compact_output = 0, describe_include_virtual_columns = 1, describe_include_subcolumns = 1; DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes; -┌─name─────────────┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┬─is_virtual─┐ -│ id │ UInt64 │ │ │ index column │ │ │ 0 │ 0 │ -│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ 0 │ -│ _part │ LowCardinality(String) │ │ │ │ │ │ 0 │ 1 │ -│ _part_index │ UInt64 │ │ │ │ │ │ 0 │ 1 │ -│ _part_uuid │ UUID │ │ │ │ │ │ 0 │ 1 │ -│ _partition_id │ LowCardinality(String) │ │ │ │ │ │ 0 │ 1 │ -│ _partition_value │ UInt8 │ │ │ │ │ │ 0 │ 1 │ -│ _sample_factor │ Float64 │ │ │ │ │ │ 0 │ 1 │ -│ _part_offset │ UInt64 │ │ │ │ │ │ 0 │ 1 │ -│ _row_exists │ UInt8 │ │ │ │ │ │ 0 │ 1 │ -│ _block_number │ UInt64 │ │ │ │ │ │ 0 │ 1 │ -│ arr.size0 │ UInt64 │ │ │ │ │ │ 1 │ 0 │ -│ t.a │ String │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ -│ t.b │ UInt64 │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ -└──────────────────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴──────────────┴────────────┘ +┌─name───────────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment─────────────────────────────────────────────────────────────────────────────────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┬─is_virtual─┐ +│ id │ UInt64 │ │ │ index column │ │ │ 0 │ 0 │ +│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ 0 │ +│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ 0 │ +│ _part │ LowCardinality(String) │ │ │ Name of part │ │ │ 0 │ 1 │ +│ _part_index │ UInt64 │ │ │ Sequential index of the part in the query result │ │ │ 0 │ 1 │ +│ _part_uuid │ UUID │ │ │ Unique part identifier (if enabled MergeTree setting assign_part_uuids) │ │ │ 0 │ 1 │ +│ _partition_id │ LowCardinality(String) │ │ │ Name of partition │ │ │ 0 │ 1 │ +│ _sample_factor │ Float64 │ │ │ Sample factor (from the query) │ │ │ 0 │ 1 │ +│ _part_offset │ UInt64 │ │ │ Number of row in the part │ │ │ 0 │ 1 │ +│ _row_exists │ UInt8 │ │ │ Persisted mask created by lightweight delete that show whether row exists or is deleted │ │ │ 0 │ 1 │ +│ _block_number │ UInt64 │ │ │ Persisted original number of block that was assigned at insert │ Delta, LZ4 │ │ 0 │ 1 │ +│ arr.size0 │ UInt64 │ │ │ │ │ │ 1 │ 0 │ +│ t.a │ String │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ +│ t.b │ UInt64 │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ +└────────────────┴───────────────────────────┴──────────────┴────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────┴──────────────────┴────────────────┴──────────────┴────────────┘ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes; -┌─name───────────┬─type─────────────────────────────┬─default_type─┬─default_expression─┬─comment──────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┬─is_virtual─┐ -│ id │ UInt64 │ │ │ index column │ │ │ 0 │ 0 │ -│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ 0 │ -│ _table │ LowCardinality(String) │ │ │ │ │ │ 0 │ 1 │ -│ _part │ LowCardinality(String) │ │ │ │ │ │ 0 │ 1 │ -│ _part_index │ UInt64 │ │ │ │ │ │ 0 │ 1 │ -│ _part_uuid │ UUID │ │ │ │ │ │ 0 │ 1 │ -│ _partition_id │ LowCardinality(String) │ │ │ │ │ │ 0 │ 1 │ -│ _sample_factor │ Float64 │ │ │ │ │ │ 0 │ 1 │ -│ _part_offset │ UInt64 │ │ │ │ │ │ 0 │ 1 │ -│ _row_exists │ UInt8 │ │ │ │ │ │ 0 │ 1 │ -│ _block_number │ UInt64 │ │ │ │ │ │ 0 │ 1 │ -│ _shard_num │ UInt32 │ │ │ │ │ │ 0 │ 1 │ -│ arr.size0 │ UInt64 │ │ │ │ │ │ 1 │ 0 │ -│ t.a │ String │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ -│ t.b │ UInt64 │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ -└────────────────┴──────────────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴──────────────┴────────────┘ +┌─name───────────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment─────────────────────────────────────────────────────────────────────────────────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┬─is_virtual─┐ +│ id │ UInt64 │ │ │ index column │ │ │ 0 │ 0 │ +│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ 0 │ +│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ 0 │ +│ _part │ LowCardinality(String) │ │ │ Name of part │ │ │ 0 │ 1 │ +│ _part_index │ UInt64 │ │ │ Sequential index of the part in the query result │ │ │ 0 │ 1 │ +│ _part_uuid │ UUID │ │ │ Unique part identifier (if enabled MergeTree setting assign_part_uuids) │ │ │ 0 │ 1 │ +│ _partition_id │ LowCardinality(String) │ │ │ Name of partition │ │ │ 0 │ 1 │ +│ _sample_factor │ Float64 │ │ │ Sample factor (from the query) │ │ │ 0 │ 1 │ +│ _part_offset │ UInt64 │ │ │ Number of row in the part │ │ │ 0 │ 1 │ +│ _row_exists │ UInt8 │ │ │ Persisted mask created by lightweight delete that show whether row exists or is deleted │ │ │ 0 │ 1 │ +│ _block_number │ UInt64 │ │ │ Persisted original number of block that was assigned at insert │ Delta, LZ4 │ │ 0 │ 1 │ +│ _shard_num │ UInt32 │ │ │ Deprecated. Use function shardNum instead │ │ │ 0 │ 1 │ +│ arr.size0 │ UInt64 │ │ │ │ │ │ 1 │ 0 │ +│ t.a │ String │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ +│ t.b │ UInt64 │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ +└────────────────┴───────────────────────────┴──────────────┴────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────┴──────────────────┴────────────────┴──────────────┴────────────┘ SET describe_compact_output = 1, describe_include_virtual_columns = 0, describe_include_subcolumns = 0; DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes; -┌─name─┬─type─────────────────────────────┐ -│ id │ UInt64 │ -│ arr │ Array(UInt64) │ -│ t │ Tuple( - a String, - b UInt64) │ -└──────┴──────────────────────────────────┘ +┌─name─┬─type──────────────────────┐ +│ id │ UInt64 │ +│ arr │ Array(UInt64) │ +│ t │ Tuple(a String, b UInt64) │ +└──────┴───────────────────────────┘ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes; -┌─name─┬─type─────────────────────────────┐ -│ id │ UInt64 │ -│ arr │ Array(UInt64) │ -│ t │ Tuple( - a String, - b UInt64) │ -└──────┴──────────────────────────────────┘ +┌─name─┬─type──────────────────────┐ +│ id │ UInt64 │ +│ arr │ Array(UInt64) │ +│ t │ Tuple(a String, b UInt64) │ +└──────┴───────────────────────────┘ SET describe_compact_output = 1, describe_include_virtual_columns = 0, describe_include_subcolumns = 1; DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes; -┌─name──────┬─type─────────────────────────────┬─is_subcolumn─┐ -│ id │ UInt64 │ 0 │ -│ arr │ Array(UInt64) │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ 0 │ -│ arr.size0 │ UInt64 │ 1 │ -│ t.a │ String │ 1 │ -│ t.b │ UInt64 │ 1 │ -└───────────┴──────────────────────────────────┴──────────────┘ +┌─name──────┬─type──────────────────────┬─is_subcolumn─┐ +│ id │ UInt64 │ 0 │ +│ arr │ Array(UInt64) │ 0 │ +│ t │ Tuple(a String, b UInt64) │ 0 │ +│ arr.size0 │ UInt64 │ 1 │ +│ t.a │ String │ 1 │ +│ t.b │ UInt64 │ 1 │ +└───────────┴───────────────────────────┴──────────────┘ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes; -┌─name──────┬─type─────────────────────────────┬─is_subcolumn─┐ -│ id │ UInt64 │ 0 │ -│ arr │ Array(UInt64) │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ 0 │ -│ arr.size0 │ UInt64 │ 1 │ -│ t.a │ String │ 1 │ -│ t.b │ UInt64 │ 1 │ -└───────────┴──────────────────────────────────┴──────────────┘ +┌─name──────┬─type──────────────────────┬─is_subcolumn─┐ +│ id │ UInt64 │ 0 │ +│ arr │ Array(UInt64) │ 0 │ +│ t │ Tuple(a String, b UInt64) │ 0 │ +│ arr.size0 │ UInt64 │ 1 │ +│ t.a │ String │ 1 │ +│ t.b │ UInt64 │ 1 │ +└───────────┴───────────────────────────┴──────────────┘ SET describe_compact_output = 1, describe_include_virtual_columns = 1, describe_include_subcolumns = 0; DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes; -┌─name─────────────┬─type─────────────────────────────┬─is_virtual─┐ -│ id │ UInt64 │ 0 │ -│ arr │ Array(UInt64) │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ 0 │ -│ _part │ LowCardinality(String) │ 1 │ -│ _part_index │ UInt64 │ 1 │ -│ _part_uuid │ UUID │ 1 │ -│ _partition_id │ LowCardinality(String) │ 1 │ -│ _partition_value │ UInt8 │ 1 │ -│ _sample_factor │ Float64 │ 1 │ -│ _part_offset │ UInt64 │ 1 │ -│ _row_exists │ UInt8 │ 1 │ -│ _block_number │ UInt64 │ 1 │ -└──────────────────┴──────────────────────────────────┴────────────┘ +┌─name───────────┬─type──────────────────────┬─is_virtual─┐ +│ id │ UInt64 │ 0 │ +│ arr │ Array(UInt64) │ 0 │ +│ t │ Tuple(a String, b UInt64) │ 0 │ +│ _part │ LowCardinality(String) │ 1 │ +│ _part_index │ UInt64 │ 1 │ +│ _part_uuid │ UUID │ 1 │ +│ _partition_id │ LowCardinality(String) │ 1 │ +│ _sample_factor │ Float64 │ 1 │ +│ _part_offset │ UInt64 │ 1 │ +│ _row_exists │ UInt8 │ 1 │ +│ _block_number │ UInt64 │ 1 │ +└────────────────┴───────────────────────────┴────────────┘ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes; -┌─name───────────┬─type─────────────────────────────┬─is_virtual─┐ -│ id │ UInt64 │ 0 │ -│ arr │ Array(UInt64) │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ 0 │ -│ _table │ LowCardinality(String) │ 1 │ -│ _part │ LowCardinality(String) │ 1 │ -│ _part_index │ UInt64 │ 1 │ -│ _part_uuid │ UUID │ 1 │ -│ _partition_id │ LowCardinality(String) │ 1 │ -│ _sample_factor │ Float64 │ 1 │ -│ _part_offset │ UInt64 │ 1 │ -│ _row_exists │ UInt8 │ 1 │ -│ _block_number │ UInt64 │ 1 │ -│ _shard_num │ UInt32 │ 1 │ -└────────────────┴──────────────────────────────────┴────────────┘ +┌─name───────────┬─type──────────────────────┬─is_virtual─┐ +│ id │ UInt64 │ 0 │ +│ arr │ Array(UInt64) │ 0 │ +│ t │ Tuple(a String, b UInt64) │ 0 │ +│ _part │ LowCardinality(String) │ 1 │ +│ _part_index │ UInt64 │ 1 │ +│ _part_uuid │ UUID │ 1 │ +│ _partition_id │ LowCardinality(String) │ 1 │ +│ _sample_factor │ Float64 │ 1 │ +│ _part_offset │ UInt64 │ 1 │ +│ _row_exists │ UInt8 │ 1 │ +│ _block_number │ UInt64 │ 1 │ +│ _shard_num │ UInt32 │ 1 │ +└────────────────┴───────────────────────────┴────────────┘ SET describe_compact_output = 1, describe_include_virtual_columns = 1, describe_include_subcolumns = 1; DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes; -┌─name─────────────┬─type─────────────────────────────┬─is_subcolumn─┬─is_virtual─┐ -│ id │ UInt64 │ 0 │ 0 │ -│ arr │ Array(UInt64) │ 0 │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ 0 │ 0 │ -│ _part │ LowCardinality(String) │ 0 │ 1 │ -│ _part_index │ UInt64 │ 0 │ 1 │ -│ _part_uuid │ UUID │ 0 │ 1 │ -│ _partition_id │ LowCardinality(String) │ 0 │ 1 │ -│ _partition_value │ UInt8 │ 0 │ 1 │ -│ _sample_factor │ Float64 │ 0 │ 1 │ -│ _part_offset │ UInt64 │ 0 │ 1 │ -│ _row_exists │ UInt8 │ 0 │ 1 │ -│ _block_number │ UInt64 │ 0 │ 1 │ -│ arr.size0 │ UInt64 │ 1 │ 0 │ -│ t.a │ String │ 1 │ 0 │ -│ t.b │ UInt64 │ 1 │ 0 │ -└──────────────────┴──────────────────────────────────┴──────────────┴────────────┘ +┌─name───────────┬─type──────────────────────┬─is_subcolumn─┬─is_virtual─┐ +│ id │ UInt64 │ 0 │ 0 │ +│ arr │ Array(UInt64) │ 0 │ 0 │ +│ t │ Tuple(a String, b UInt64) │ 0 │ 0 │ +│ _part │ LowCardinality(String) │ 0 │ 1 │ +│ _part_index │ UInt64 │ 0 │ 1 │ +│ _part_uuid │ UUID │ 0 │ 1 │ +│ _partition_id │ LowCardinality(String) │ 0 │ 1 │ +│ _sample_factor │ Float64 │ 0 │ 1 │ +│ _part_offset │ UInt64 │ 0 │ 1 │ +│ _row_exists │ UInt8 │ 0 │ 1 │ +│ _block_number │ UInt64 │ 0 │ 1 │ +│ arr.size0 │ UInt64 │ 1 │ 0 │ +│ t.a │ String │ 1 │ 0 │ +│ t.b │ UInt64 │ 1 │ 0 │ +└────────────────┴───────────────────────────┴──────────────┴────────────┘ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes; -┌─name───────────┬─type─────────────────────────────┬─is_subcolumn─┬─is_virtual─┐ -│ id │ UInt64 │ 0 │ 0 │ -│ arr │ Array(UInt64) │ 0 │ 0 │ -│ t │ Tuple( - a String, - b UInt64) │ 0 │ 0 │ -│ _table │ LowCardinality(String) │ 0 │ 1 │ -│ _part │ LowCardinality(String) │ 0 │ 1 │ -│ _part_index │ UInt64 │ 0 │ 1 │ -│ _part_uuid │ UUID │ 0 │ 1 │ -│ _partition_id │ LowCardinality(String) │ 0 │ 1 │ -│ _sample_factor │ Float64 │ 0 │ 1 │ -│ _part_offset │ UInt64 │ 0 │ 1 │ -│ _row_exists │ UInt8 │ 0 │ 1 │ -│ _block_number │ UInt64 │ 0 │ 1 │ -│ _shard_num │ UInt32 │ 0 │ 1 │ -│ arr.size0 │ UInt64 │ 1 │ 0 │ -│ t.a │ String │ 1 │ 0 │ -│ t.b │ UInt64 │ 1 │ 0 │ -└────────────────┴──────────────────────────────────┴──────────────┴────────────┘ +┌─name───────────┬─type──────────────────────┬─is_subcolumn─┬─is_virtual─┐ +│ id │ UInt64 │ 0 │ 0 │ +│ arr │ Array(UInt64) │ 0 │ 0 │ +│ t │ Tuple(a String, b UInt64) │ 0 │ 0 │ +│ _part │ LowCardinality(String) │ 0 │ 1 │ +│ _part_index │ UInt64 │ 0 │ 1 │ +│ _part_uuid │ UUID │ 0 │ 1 │ +│ _partition_id │ LowCardinality(String) │ 0 │ 1 │ +│ _sample_factor │ Float64 │ 0 │ 1 │ +│ _part_offset │ UInt64 │ 0 │ 1 │ +│ _row_exists │ UInt8 │ 0 │ 1 │ +│ _block_number │ UInt64 │ 0 │ 1 │ +│ _shard_num │ UInt32 │ 0 │ 1 │ +│ arr.size0 │ UInt64 │ 1 │ 0 │ +│ t.a │ String │ 1 │ 0 │ +│ t.b │ UInt64 │ 1 │ 0 │ +└────────────────┴───────────────────────────┴──────────────┴────────────┘ diff --git a/tests/queries/0_stateless/02890_describe_table_options.sql b/tests/queries/0_stateless/02890_describe_table_options.sql index 236100148ff..63806c7ee3d 100644 --- a/tests/queries/0_stateless/02890_describe_table_options.sql +++ b/tests/queries/0_stateless/02890_describe_table_options.sql @@ -1,5 +1,7 @@ DROP TABLE IF EXISTS t_describe_options; +SET print_pretty_type_names = 0; + CREATE TABLE t_describe_options ( id UInt64 COMMENT 'index column', arr Array(UInt64) DEFAULT [10, 20] CODEC(ZSTD), diff --git a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference index af0e50ec332..f1ca07ef408 100644 --- a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference +++ b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference @@ -1,5 +1,5 @@ Creating 300 tables -Making making 200 requests to system.replicas +Making 200 requests to system.replicas Query system.replicas while waiting for other concurrent requests to finish 0 900 diff --git a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh index f93175529c0..d3eed891ab9 100755 --- a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh +++ b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh @@ -33,7 +33,7 @@ done wait; -echo "Making making $CONCURRENCY requests to system.replicas" +echo "Making $CONCURRENCY requests to system.replicas" for i in `seq 1 $CONCURRENCY`; do diff --git a/tests/queries/0_stateless/02932_refreshable_materialized_views.reference b/tests/queries/0_stateless/02932_refreshable_materialized_views.reference index b52d0847ff9..aa76806da9d 100644 --- a/tests/queries/0_stateless/02932_refreshable_materialized_views.reference +++ b/tests/queries/0_stateless/02932_refreshable_materialized_views.reference @@ -1,8 +1,8 @@ <1: created view> a [] 1 -CREATE MATERIALIZED VIEW default.a\nREFRESH AFTER 1 SECOND\n(\n `x` UInt64\n)\nENGINE = Memory\nAS SELECT number AS x\nFROM numbers(2)\nUNION ALL\nSELECT rand64() AS x +CREATE MATERIALIZED VIEW default.a\nREFRESH AFTER 2 SECOND\n(\n `x` UInt64\n)\nENGINE = Memory\nAS SELECT number AS x\nFROM numbers(2)\nUNION ALL\nSELECT rand64() AS x <2: refreshed> 3 1 1 -<3: time difference at least> 500 -<4: next refresh in> 1 +<3: time difference at least> 1000 +<4: next refresh in> 2 <4.5: altered> Scheduled Finished 2052-01-01 00:00:00 CREATE MATERIALIZED VIEW default.a\nREFRESH EVERY 2 YEAR\n(\n `x` Int16\n)\nENGINE = Memory\nAS SELECT x * 2 AS x\nFROM default.src <5: no refresh> 3 diff --git a/tests/queries/0_stateless/02932_refreshable_materialized_views.sh b/tests/queries/0_stateless/02932_refreshable_materialized_views.sh index 8daea063fc5..89942e25b67 100755 --- a/tests/queries/0_stateless/02932_refreshable_materialized_views.sh +++ b/tests/queries/0_stateless/02932_refreshable_materialized_views.sh @@ -18,7 +18,7 @@ $CLICKHOUSE_CLIENT -nq "create view refreshes as select * from system.view_refre # Basic refreshing. $CLICKHOUSE_CLIENT -nq " create materialized view a - refresh after 1 second + refresh after 2 second engine Memory empty as select number as x from numbers(2) union all select rand64() as x" @@ -29,6 +29,7 @@ while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes -- $ do sleep 0.1 done +start_time="`$CLICKHOUSE_CLIENT -nq "select reinterpret(now64(), 'Int64')"`" # Check table contents. $CLICKHOUSE_CLIENT -nq "select '<2: refreshed>', count(), sum(x=0), sum(x=1) from a" # Wait for table contents to change. @@ -39,7 +40,6 @@ do [ "$res2" == "$res1" ] || break sleep 0.1 done -time2="`$CLICKHOUSE_CLIENT -nq "select reinterpret(now64(), 'Int64')"`" # Wait for another change. while : do @@ -47,11 +47,11 @@ do [ "$res3" == "$res2" ] || break sleep 0.1 done -# Check that the two changes were at least 500ms apart, in particular that we're not refreshing +# Check that the two changes were at least 1 second apart, in particular that we're not refreshing # like crazy. This is potentially flaky, but we need at least one test that uses non-mocked timer # to make sure the clock+timer code works at all. If it turns out flaky, increase refresh period above. $CLICKHOUSE_CLIENT -nq " - select '<3: time difference at least>', min2(reinterpret(now64(), 'Int64') - $time2, 500); + select '<3: time difference at least>', min2(reinterpret(now64(), 'Int64') - $start_time, 1000); select '<4: next refresh in>', next_refresh_time-last_refresh_time from refreshes;" # Create a source table from which views will read. @@ -61,7 +61,7 @@ $CLICKHOUSE_CLIENT -nq " # Switch to fake clock, change refresh schedule, change query. $CLICKHOUSE_CLIENT -nq " system test view a set fake time '2050-01-01 00:00:01';" -while [ "`$CLICKHOUSE_CLIENT -nq "select status, last_refresh_time, next_refresh_time from refreshes -- $LINENO" | xargs`" != 'Scheduled 2050-01-01 00:00:01 2050-01-01 00:00:02' ] +while [ "`$CLICKHOUSE_CLIENT -nq "select status, last_refresh_time, next_refresh_time from refreshes -- $LINENO" | xargs`" != 'Scheduled 2050-01-01 00:00:01 2050-01-01 00:00:03' ] do sleep 0.1 done diff --git a/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.reference b/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.reference index 4d33751c699..8ae3cdf8f3a 100644 --- a/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.reference +++ b/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.reference @@ -2,7 +2,12 @@ 2 test2 8 3 test3 8 4 test4 1985 +2 test2 8 +3 test3 8 +4 test4 1985 --- 1 test1 42 +1 test1 42 --- 3 test3 +3 test3 diff --git a/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.sql b/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.sql index 53b8a761cda..ab6e1532299 100644 --- a/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.sql +++ b/tests/queries/0_stateless/02949_parallel_replicas_in_subquery.sql @@ -8,19 +8,23 @@ INSERT INTO merge_tree_in_subqueries VALUES(5, 'test5', 0); SET max_parallel_replicas=3, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost', parallel_replicas_for_non_replicated_merge_tree=1; -SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 0) SETTINGS allow_experimental_parallel_reading_from_replicas=2; -- { serverError SUPPORT_IS_DISABLED } +SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 0) SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_in_with_subquery=0; -- { serverError SUPPORT_IS_DISABLED } +SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 0) SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_in_with_subquery=1; SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 0) SETTINGS allow_experimental_parallel_reading_from_replicas=1; SELECT '---'; -SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 2, 3) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=2; -- { serverError SUPPORT_IS_DISABLED }; +SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 2, 3) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_in_with_subquery=0; -- { serverError SUPPORT_IS_DISABLED }; +SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 2, 3) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_in_with_subquery=1; SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT * FROM system.numbers LIMIT 2, 3) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=1; SELECT '---'; -SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT 1) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=2; -- { serverError SUPPORT_IS_DISABLED }; +SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT 1) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_in_with_subquery=0; -- { serverError SUPPORT_IS_DISABLED }; +SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT 1) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_in_with_subquery=1; SELECT * FROM merge_tree_in_subqueries WHERE id IN (SELECT 1) ORDER BY id SETTINGS allow_experimental_parallel_reading_from_replicas=1; -- IN with tuples is allowed SELECT '---'; -SELECT id, name FROM merge_tree_in_subqueries WHERE (id, name) IN (3, 'test3') SETTINGS allow_experimental_parallel_reading_from_replicas=2; +SELECT id, name FROM merge_tree_in_subqueries WHERE (id, name) IN (3, 'test3') SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_in_with_subquery=0; +SELECT id, name FROM merge_tree_in_subqueries WHERE (id, name) IN (3, 'test3') SETTINGS allow_experimental_parallel_reading_from_replicas=2, parallel_replicas_allow_in_with_subquery=1; DROP TABLE IF EXISTS merge_tree_in_subqueries; diff --git a/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference index 6b1fdfd42a2..100e4e500cd 100644 --- a/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference +++ b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference @@ -237,7 +237,7 @@ sub2 as (select y, z from tab2 where y != 4), sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), sub4 as (select z, a from tab3 where z != 8), sub5 as (select z, a, x, y, r.y, ll.z from sub4 rr any right join sub3 ll on ll.z = rr.z) -select * from sub5 order by x SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1;-- { echoOn } +select * from sub5 order by x SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; Expression Sorting Expression @@ -250,6 +250,93 @@ Expression ReadFromRemoteParallelReplicas Expression ReadFromRemoteParallelReplicas +-- +-- Subqueries for IN allowed +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; +0 0 0 0 0 0 +1 1 0 0 0 0 +3 3 0 0 0 0 +4 4 0 0 0 0 +5 5 0 0 0 0 +6 6 6 6 0 0 +7 7 0 0 0 0 +8 8 8 8 0 0 +9 9 0 0 0 0 +10 10 10 10 0 0 +11 11 0 0 0 0 +12 12 12 12 12 12 +13 13 0 0 0 0 +14 14 14 14 0 0 +15 15 0 0 0 0 +explain description=0 +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; +Expression + Sorting + Expression + ReadFromRemoteParallelReplicas +-- +-- Subqueries for IN are not allowed +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1, parallel_replicas_allow_in_with_subquery=0; +0 0 0 0 0 0 +1 1 0 0 0 0 +3 3 0 0 0 0 +4 4 0 0 0 0 +5 5 0 0 0 0 +6 6 6 6 0 0 +7 7 0 0 0 0 +8 8 8 8 0 0 +9 9 0 0 0 0 +10 10 10 10 0 0 +11 11 0 0 0 0 +12 12 12 12 12 12 +13 13 0 0 0 0 +14 14 14 14 0 0 +15 15 0 0 0 0 +explain description=0 +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1, parallel_replicas_allow_in_with_subquery=0;-- { echoOn } +Expression + Sorting + Expression + Join + Expression + Join + Expression + CreatingSets + Expression + Expression + ReadFromMergeTree + CreatingSet + Expression + Filter + ReadFromSystemNumbers + Expression + ReadFromRemoteParallelReplicas + Expression + ReadFromRemoteParallelReplicas set parallel_replicas_prefer_local_join = 1; -- A query with only INNER/LEFT joins is fully send to replicas. JOIN is executed in GLOBAL mode. select x, y, r.y, z, rr.z, a from (select l.x, l.y, r.y, r.z as z from (select x, y from tab1 where x != 2) l any left join (select y, z from tab2 where y != 4) r on l.y = r.y) ll any left join (select z, a from tab3 where z != 8) rr on ll.z = rr.z order by x SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; @@ -500,3 +587,90 @@ Expression ReadFromRemoteParallelReplicas Expression ReadFromRemoteParallelReplicas +-- +-- Subqueries for IN allowed +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; +0 0 0 0 0 0 +1 1 0 0 0 0 +3 3 0 0 0 0 +4 4 0 0 0 0 +5 5 0 0 0 0 +6 6 6 6 0 0 +7 7 0 0 0 0 +8 8 8 8 0 0 +9 9 0 0 0 0 +10 10 10 10 0 0 +11 11 0 0 0 0 +12 12 12 12 12 12 +13 13 0 0 0 0 +14 14 14 14 0 0 +15 15 0 0 0 0 +explain description=0 +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; +Expression + Sorting + Expression + ReadFromRemoteParallelReplicas +-- +-- Subqueries for IN are not allowed +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1, parallel_replicas_allow_in_with_subquery=0; +0 0 0 0 0 0 +1 1 0 0 0 0 +3 3 0 0 0 0 +4 4 0 0 0 0 +5 5 0 0 0 0 +6 6 6 6 0 0 +7 7 0 0 0 0 +8 8 8 8 0 0 +9 9 0 0 0 0 +10 10 10 10 0 0 +11 11 0 0 0 0 +12 12 12 12 12 12 +13 13 0 0 0 0 +14 14 14 14 0 0 +15 15 0 0 0 0 +explain description=0 +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1, parallel_replicas_allow_in_with_subquery=0; +Expression + Sorting + Expression + Join + Expression + Join + Expression + CreatingSets + Expression + Expression + ReadFromMergeTree + CreatingSet + Expression + Filter + ReadFromSystemNumbers + Expression + ReadFromRemoteParallelReplicas + Expression + ReadFromRemoteParallelReplicas diff --git a/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql.j2 b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql.j2 index 7d2766d52f8..54505b147a3 100644 --- a/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql.j2 +++ b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql.j2 @@ -126,4 +126,42 @@ sub4 as (select z, a from tab3 where z != 8), sub5 as (select z, a, x, y, r.y, ll.z from sub4 rr any right join sub3 ll on ll.z = rr.z) select * from sub5 order by x SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; +-- +-- Subqueries for IN allowed +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; + +explain description=0 +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; + +-- +-- Subqueries for IN are not allowed +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1, parallel_replicas_allow_in_with_subquery=0; + +explain description=0 +with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), +sub2 as (select y, z from tab2 where y != 4), +sub3 as (select l.x, l.y, r.y, r.z as z from sub1 l any left join sub2 r on l.y = r.y), +sub4 as (select z, a from tab3 where z != 8), +sub5 as (select x, y, r.y, z, rr.z, a from sub3 ll any left join sub4 rr on ll.z = rr.z) +select * from sub5 order by x +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1, parallel_replicas_allow_in_with_subquery=0; + {%- endfor %} diff --git a/tests/queries/0_stateless/02992_all_columns_should_have_comment.reference b/tests/queries/0_stateless/02992_all_columns_should_have_comment.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02992_all_columns_should_have_comment.sql b/tests/queries/0_stateless/02992_all_columns_should_have_comment.sql new file mode 100644 index 00000000000..b984c12e285 --- /dev/null +++ b/tests/queries/0_stateless/02992_all_columns_should_have_comment.sql @@ -0,0 +1,4 @@ +SYSTEM FLUSH LOGS; +SELECT 'Column ' || name || ' from table ' || concat(database, '.', table) || ' should have a comment' +FROM system.columns +WHERE (database = 'system') AND (comment = '') AND (table NOT ILIKE '%_log_%') AND (table NOT IN ('numbers', 'numbers_mt', 'one')) AND (default_kind != 'ALIAS'); diff --git a/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.reference b/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.reference new file mode 100644 index 00000000000..ffd2f68990b --- /dev/null +++ b/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.reference @@ -0,0 +1,71 @@ +-- { echoOn } +SET allow_experimental_analyzer = 1; +EXPLAIN QUERY TREE SELECT encrypt('aes-256-ofb', (SELECT 'qwerty'), '12345678901234567890123456789012'), encrypt('aes-256-ofb', (SELECT 'asdf'), '12345678901234567890123456789012'); +QUERY id: 0 + PROJECTION COLUMNS + encrypt(\'aes-256-ofb\', [HIDDEN id: 1], [HIDDEN id: 2]) Nullable(String) + encrypt(\'aes-256-ofb\', [HIDDEN id: 3], [HIDDEN id: 2]) Nullable(String) + PROJECTION + LIST id: 1, nodes: 2 + CONSTANT id: 2, constant_value: \'\\n&\', constant_value_type: Nullable(String) + EXPRESSION + FUNCTION id: 3, function_name: encrypt, function_type: ordinary, result_type: Nullable(String) + ARGUMENTS + LIST id: 4, nodes: 3 + CONSTANT id: 5, constant_value: \'aes-256-ofb\', constant_value_type: String + CONSTANT id: 6, constant_value: [HIDDEN id: 1], constant_value_type: Nullable(String) + CONSTANT id: 7, constant_value: [HIDDEN id: 2], constant_value_type: String + CONSTANT id: 8, constant_value: \'\', constant_value_type: Nullable(String) + EXPRESSION + FUNCTION id: 9, function_name: encrypt, function_type: ordinary, result_type: Nullable(String) + ARGUMENTS + LIST id: 10, nodes: 3 + CONSTANT id: 11, constant_value: \'aes-256-ofb\', constant_value_type: String + CONSTANT id: 12, constant_value: [HIDDEN id: 3], constant_value_type: Nullable(String) + CONSTANT id: 13, constant_value: [HIDDEN id: 2], constant_value_type: String + JOIN TREE + TABLE id: 14, alias: __table1, table_name: system.one +SET format_display_secrets_in_show_and_select = 1; +EXPLAIN QUERY TREE SELECT encrypt('aes-256-ofb', (SELECT 'qwerty'), '12345678901234567890123456789012'), encrypt('aes-256-ofb', (SELECT 'asdf'), '12345678901234567890123456789012'); +QUERY id: 0 + PROJECTION COLUMNS + encrypt(\'aes-256-ofb\', _subquery_1, \'12345678901234567890123456789012\') Nullable(String) + encrypt(\'aes-256-ofb\', _subquery_2, \'12345678901234567890123456789012\') Nullable(String) + PROJECTION + LIST id: 1, nodes: 2 + CONSTANT id: 2, constant_value: \'\\n&\', constant_value_type: Nullable(String) + EXPRESSION + FUNCTION id: 3, function_name: encrypt, function_type: ordinary, result_type: Nullable(String) + ARGUMENTS + LIST id: 4, nodes: 3 + CONSTANT id: 5, constant_value: \'aes-256-ofb\', constant_value_type: String + CONSTANT id: 6, constant_value: \'qwerty\', constant_value_type: Nullable(String) + EXPRESSION + QUERY id: 7, is_subquery: 1 + PROJECTION COLUMNS + \'qwerty\' String + PROJECTION + LIST id: 8, nodes: 1 + CONSTANT id: 9, constant_value: \'qwerty\', constant_value_type: String + JOIN TREE + TABLE id: 10, table_name: system.one + CONSTANT id: 11, constant_value: \'12345678901234567890123456789012\', constant_value_type: String + CONSTANT id: 12, constant_value: \'\', constant_value_type: Nullable(String) + EXPRESSION + FUNCTION id: 13, function_name: encrypt, function_type: ordinary, result_type: Nullable(String) + ARGUMENTS + LIST id: 14, nodes: 3 + CONSTANT id: 15, constant_value: \'aes-256-ofb\', constant_value_type: String + CONSTANT id: 16, constant_value: \'asdf\', constant_value_type: Nullable(String) + EXPRESSION + QUERY id: 17, is_subquery: 1 + PROJECTION COLUMNS + \'asdf\' String + PROJECTION + LIST id: 18, nodes: 1 + CONSTANT id: 19, constant_value: \'asdf\', constant_value_type: String + JOIN TREE + TABLE id: 20, table_name: system.one + CONSTANT id: 21, constant_value: \'12345678901234567890123456789012\', constant_value_type: String + JOIN TREE + TABLE id: 22, alias: __table1, table_name: system.one diff --git a/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.sql b/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.sql new file mode 100644 index 00000000000..f40b40b6c8c --- /dev/null +++ b/tests/queries/0_stateless/02998_analyzer_secret_args_tree_node.sql @@ -0,0 +1,12 @@ +-- Tags: no-fasttest +-- encrypt function doesn't exist in the fastest build + +-- { echoOn } +SET allow_experimental_analyzer = 1; + +EXPLAIN QUERY TREE SELECT encrypt('aes-256-ofb', (SELECT 'qwerty'), '12345678901234567890123456789012'), encrypt('aes-256-ofb', (SELECT 'asdf'), '12345678901234567890123456789012'); + +SET format_display_secrets_in_show_and_select = 1; + +EXPLAIN QUERY TREE SELECT encrypt('aes-256-ofb', (SELECT 'qwerty'), '12345678901234567890123456789012'), encrypt('aes-256-ofb', (SELECT 'asdf'), '12345678901234567890123456789012'); +-- { echoOff } diff --git a/tests/queries/0_stateless/02998_to_milliseconds.reference b/tests/queries/0_stateless/02998_to_milliseconds.reference new file mode 100644 index 00000000000..05139c19d1d --- /dev/null +++ b/tests/queries/0_stateless/02998_to_milliseconds.reference @@ -0,0 +1,8 @@ +2023-04-21 10:20:30 0 0 +2023-04-21 10:20:30 0 0 +2023-04-21 10:20:30.123 123 123 +2023-04-21 10:20:30.123456 123 123 +2023-04-21 10:20:30.123456789 123 123 +120 +2023-04-21 10:20:30 0 +2023-04-21 10:20:30 0 diff --git a/tests/queries/0_stateless/02998_to_milliseconds.sql b/tests/queries/0_stateless/02998_to_milliseconds.sql new file mode 100644 index 00000000000..f159f6aab50 --- /dev/null +++ b/tests/queries/0_stateless/02998_to_milliseconds.sql @@ -0,0 +1,17 @@ +-- Negative tests +SELECT toMillisecond(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT toMillisecond('string'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toMillisecond(toDate('2024-02-28')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toMillisecond(toDate32('2024-02-28')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +-- Tests with constant and non-constant arguments +SELECT toDateTime('2023-04-21 10:20:30') AS dt, toMillisecond(dt), toMillisecond(materialize(dt)); +SELECT toDateTime64('2023-04-21 10:20:30', 0) AS dt64, toMillisecond(dt64), toMillisecond(materialize(dt64)); +SELECT toDateTime64('2023-04-21 10:20:30.123', 3) AS dt64, toMillisecond(dt64), toMillisecond(materialize(dt64)); +SELECT toDateTime64('2023-04-21 10:20:30.123456', 6) AS dt64, toMillisecond(dt64), toMillisecond(materialize(dt64)); +SELECT toDateTime64('2023-04-21 10:20:30.123456789', 9) AS dt64, toMillisecond(dt64), toMillisecond(materialize(dt64)); + +-- Special cases +SELECT MILLISECOND(toDateTime64('2023-04-21 10:20:30.123456', 2)); -- Alias +SELECT toNullable(toDateTime('2023-04-21 10:20:30')) AS dt, toMillisecond(dt); -- Nullable +SELECT toLowCardinality(toDateTime('2023-04-21 10:20:30')) AS dt, toMillisecond(dt); -- LowCardinality diff --git a/tests/queries/0_stateless/02999_analyzer_preimage_null.reference b/tests/queries/0_stateless/02999_analyzer_preimage_null.reference index 6f9afedfd07..c5eb6b23d0d 100644 --- a/tests/queries/0_stateless/02999_analyzer_preimage_null.reference +++ b/tests/queries/0_stateless/02999_analyzer_preimage_null.reference @@ -108,14 +108,14 @@ QUERY id: 0 FUNCTION id: 14, function_name: greaterOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 15, nodes: 2 - COLUMN id: 16, column_name: date1, result_type: Date, source_id: 3 - CONSTANT id: 17, constant_value: \'1994-01-01\', constant_value_type: String - FUNCTION id: 18, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 + COLUMN id: 12, column_name: date1, result_type: Date, source_id: 3 + CONSTANT id: 16, constant_value: \'1994-01-01\', constant_value_type: String + FUNCTION id: 17, function_name: lessOrEquals, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: id, result_type: UInt32, source_id: 3 - FUNCTION id: 21, function_name: toYear, function_type: ordinary, result_type: UInt16 + LIST id: 18, nodes: 2 + COLUMN id: 19, column_name: id, result_type: UInt32, source_id: 3 + FUNCTION id: 20, function_name: toYear, function_type: ordinary, result_type: UInt16 ARGUMENTS - LIST id: 22, nodes: 1 - COLUMN id: 23, column_name: date1, result_type: Date, source_id: 3 + LIST id: 21, nodes: 1 + COLUMN id: 12, column_name: date1, result_type: Date, source_id: 3 SETTINGS optimize_time_filter_with_preimage=1 diff --git a/tests/queries/0_stateless/02999_scalar_subqueries_bug_1.reference b/tests/queries/0_stateless/02999_scalar_subqueries_bug_1.reference new file mode 100644 index 00000000000..0740afe92c6 --- /dev/null +++ b/tests/queries/0_stateless/02999_scalar_subqueries_bug_1.reference @@ -0,0 +1,66 @@ +0 0 +0 0 +0 0 +0 0 +1 \N +1 \N +2 \N +2 \N +3 \N +3 \N +4 \N +4 \N +5 \N +5 \N +6 \N +6 \N +7 \N +7 \N +8 \N +8 \N +9 \N +9 \N +10 10 +10 10 +10 10 +10 10 +11 \N +11 \N +12 \N +12 \N +13 \N +13 \N +14 \N +14 \N +15 \N +15 \N +16 \N +16 \N +17 \N +17 \N +18 \N +18 \N +19 \N +19 \N +20 20 +20 20 +20 20 +20 20 +21 \N +21 \N +22 \N +22 \N +23 \N +23 \N +24 \N +24 \N +25 \N +25 \N +26 \N +26 \N +27 \N +27 \N +28 \N +28 \N +29 \N +29 \N diff --git a/tests/queries/0_stateless/02999_scalar_subqueries_bug_1.sql b/tests/queries/0_stateless/02999_scalar_subqueries_bug_1.sql new file mode 100644 index 00000000000..88bcdeb7f77 --- /dev/null +++ b/tests/queries/0_stateless/02999_scalar_subqueries_bug_1.sql @@ -0,0 +1,8 @@ +drop table if exists t_table_select; +CREATE TABLE t_table_select (id UInt32) ENGINE = MergeTree ORDER BY id; +INSERT INTO t_table_select (id) SELECT number FROM numbers(30); + +CREATE TEMPORARY TABLE t_test (x UInt32, y Nullable(UInt32)) AS SELECT a.id, b.id FROM remote('127.0.0.{1,2}', currentDatabase(), t_table_select) AS a GLOBAL LEFT JOIN (SELECT id FROM remote('127.0.0.{1,2}', currentDatabase(), t_table_select) AS b WHERE (b.id % 10) = 0) AS b ON b.id = a.id SETTINGS join_use_nulls = 1; + +select * from t_test order by x; + diff --git a/tests/queries/0_stateless/02999_scalar_subqueries_bug_2.reference b/tests/queries/0_stateless/02999_scalar_subqueries_bug_2.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02999_scalar_subqueries_bug_2.sql b/tests/queries/0_stateless/02999_scalar_subqueries_bug_2.sql new file mode 100644 index 00000000000..03ac91e401a --- /dev/null +++ b/tests/queries/0_stateless/02999_scalar_subqueries_bug_2.sql @@ -0,0 +1,18 @@ +drop table if exists source; +drop table if exists target1; +drop table if exists target2; +drop table if exists v_heavy; + + +create table source(type String) engine=MergeTree order by type; + +create view v_heavy as +with nums as (select number from numbers(1e5)) +select count(*) n from (select number from numbers(1e5) n1 cross join nums); + +create table target1(type String) engine=MergeTree order by type; +create table target2(type String) engine=MergeTree order by type; + +set max_execution_time=2; +-- we should not execute scalar subquery here +create materialized view vm_target2 to target2 as select * from source where type='two' and (select sum(sleepEachRow(0.1)) from numbers(30)); diff --git a/tests/queries/0_stateless/03000_virtual_columns_in_prewhere.reference b/tests/queries/0_stateless/03000_virtual_columns_in_prewhere.reference new file mode 100644 index 00000000000..06b63ea6c2f --- /dev/null +++ b/tests/queries/0_stateless/03000_virtual_columns_in_prewhere.reference @@ -0,0 +1 @@ +0 0 0 diff --git a/tests/queries/0_stateless/03000_virtual_columns_in_prewhere.sql b/tests/queries/0_stateless/03000_virtual_columns_in_prewhere.sql new file mode 100644 index 00000000000..d57db9151b9 --- /dev/null +++ b/tests/queries/0_stateless/03000_virtual_columns_in_prewhere.sql @@ -0,0 +1,10 @@ +drop table if exists x; + +create table x (i int, j int, k int) engine MergeTree order by tuple() settings index_granularity=8192, index_granularity_bytes = '10Mi', min_bytes_for_wide_part=0, min_rows_for_wide_part=0, ratio_of_defaults_for_sparse_serialization=1; + +insert into x select number, number * 2, number * 3 from numbers(100000); + +-- One granule, (_part_offset (8 bytes) + (4 bytes)) * 8192 + (8 bytes) * 1 = 98312 +select * from x prewhere _part_offset = 0 settings max_bytes_to_read = 98312; + +drop table x; diff --git a/tests/queries/0_stateless/03003_prql_panic.reference b/tests/queries/0_stateless/03003_prql_panic.reference new file mode 100644 index 00000000000..8e0782c44f2 --- /dev/null +++ b/tests/queries/0_stateless/03003_prql_panic.reference @@ -0,0 +1 @@ +SYNTAX_ERROR diff --git a/tests/queries/0_stateless/03003_prql_panic.sh b/tests/queries/0_stateless/03003_prql_panic.sh new file mode 100755 index 00000000000..01d7beed99b --- /dev/null +++ b/tests/queries/0_stateless/03003_prql_panic.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# Requires Rust, which is not built for Fast Test. + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Before [1] this causes a panic, but it will be fixed soon, so do not check +# for panic, but just for SYNTAX_ERROR. +# +# [1]: https://github.com/PRQL/prql/pull/4285 +$CLICKHOUSE_CLIENT --dialect prql -q "SELECT id FROM distributed_test_table GROUP BY x -> concat(concat(materialize(toNullable(NULL)))) LIMIT 3" |& grep -o -m1 SYNTAX_ERROR diff --git a/tests/queries/0_stateless/03006_analyzer_executable_table_function.reference b/tests/queries/0_stateless/03006_analyzer_executable_table_function.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03006_analyzer_executable_table_function.sql b/tests/queries/0_stateless/03006_analyzer_executable_table_function.sql new file mode 100644 index 00000000000..4edbcc97f50 --- /dev/null +++ b/tests/queries/0_stateless/03006_analyzer_executable_table_function.sql @@ -0,0 +1,4 @@ +SELECT + toFixedString(toFixedString(toLowCardinality(toFixedString('--------------------', toNullable(20))), toLowCardinality(20)), 20), + * +FROM executable('data String', SETTINGS max_command_execution_time = 100); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} diff --git a/tests/queries/0_stateless/03006_async_insert_deadlock_log.reference b/tests/queries/0_stateless/03006_async_insert_deadlock_log.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/03006_async_insert_deadlock_log.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/03006_async_insert_deadlock_log.sh b/tests/queries/0_stateless/03006_async_insert_deadlock_log.sh new file mode 100755 index 00000000000..f01c34cdbda --- /dev/null +++ b/tests/queries/0_stateless/03006_async_insert_deadlock_log.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --query "CREATE TABLE t_async_insert_deadlock (a UInt64) ENGINE = Log" + +echo '{"a": 1}' | $CLICKHOUSE_CLIENT --async_insert 1 --wait_for_async_insert 1 --query "INSERT INTO t_async_insert_deadlock FORMAT JSONEachRow" + +$CLICKHOUSE_CLIENT --query "SELECT * FROM t_async_insert_deadlock ORDER BY a" +$CLICKHOUSE_CLIENT --query "DROP TABLE t_async_insert_deadlock" diff --git a/tests/queries/0_stateless/03006_buffer_overflow_join.reference b/tests/queries/0_stateless/03006_buffer_overflow_join.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03006_buffer_overflow_join.sql b/tests/queries/0_stateless/03006_buffer_overflow_join.sql new file mode 100644 index 00000000000..8c1fa3cecc0 --- /dev/null +++ b/tests/queries/0_stateless/03006_buffer_overflow_join.sql @@ -0,0 +1,6 @@ +CREATE TABLE 03006_buffer_overflow_l (`a` String, `b` Tuple(String, String)) ENGINE = Memory; +INSERT INTO 03006_buffer_overflow_l SELECT * FROM generateRandom() limit 1000; +CREATE TABLE 03006_buffer_overflow_r (`a` LowCardinality(Nullable(String)), `c` Tuple(LowCardinality(String), LowCardinality(String))) ENGINE = Memory; +INSERT INTO 03006_buffer_overflow_r SELECT * FROM generateRandom() limit 1000; + +SELECT a FROM 03006_buffer_overflow_l RIGHT JOIN 03006_buffer_overflow_r USING (a) ORDER BY a ASC NULLS FIRST FORMAT Null; diff --git a/tests/queries/0_stateless/03006_mv_deduplication_throw_if_async_insert.reference b/tests/queries/0_stateless/03006_mv_deduplication_throw_if_async_insert.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03006_mv_deduplication_throw_if_async_insert.sql b/tests/queries/0_stateless/03006_mv_deduplication_throw_if_async_insert.sql new file mode 100644 index 00000000000..808317c917e --- /dev/null +++ b/tests/queries/0_stateless/03006_mv_deduplication_throw_if_async_insert.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS 02985_test; + +SET async_insert = 1; +SET deduplicate_blocks_in_dependent_materialized_views = 1; + +CREATE TABLE 03006_test +( + d Date, + value UInt64 +) +ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO 03006_test VALUES ('2024-03-05', 1), ('2024-03-05', 2), ('2024-03-05', 1); -- { serverError SUPPORT_IS_DISABLED } +INSERT INTO 03006_test SETTINGS compatibility='24.1' VALUES ('2024-03-05', 1), ('2024-03-05', 2), ('2024-03-05', 1); +INSERT INTO 03006_test SETTINGS async_insert=0 VALUES ('2024-03-05', 1), ('2024-03-05', 2), ('2024-03-05', 1); +INSERT INTO 03006_test SETTINGS deduplicate_blocks_in_dependent_materialized_views=0 VALUES ('2024-03-05', 1), ('2024-03-05', 2), ('2024-03-05', 1); +INSERT INTO 03006_test SETTINGS throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert=0 VALUES ('2024-03-05', 1), ('2024-03-05', 2), ('2024-03-05', 1); + +DROP TABLE IF EXISTS 02985_test; diff --git a/tests/queries/0_stateless/03007_column_nullable_uninitialzed_value.reference b/tests/queries/0_stateless/03007_column_nullable_uninitialzed_value.reference new file mode 100644 index 00000000000..bead7ee1474 --- /dev/null +++ b/tests/queries/0_stateless/03007_column_nullable_uninitialzed_value.reference @@ -0,0 +1,3 @@ +\N 1000 + +\N 1000 diff --git a/tests/queries/0_stateless/03007_column_nullable_uninitialzed_value.sql b/tests/queries/0_stateless/03007_column_nullable_uninitialzed_value.sql new file mode 100644 index 00000000000..9479044e0e0 --- /dev/null +++ b/tests/queries/0_stateless/03007_column_nullable_uninitialzed_value.sql @@ -0,0 +1 @@ +SELECT count(NULL) IGNORE NULLS > avg(toDecimal32(NULL)) IGNORE NULLS, count() FROM numbers(1000) WITH TOTALS SETTINGS allow_experimental_analyzer = 1; diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 3614bcb7452..8bf12247ef2 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -2545,6 +2545,7 @@ toRelativeSecondNum toRelativeWeekNum toRelativeYearNum toSecond +toMillisecond toStartOfDay toStartOfFifteenMinutes toStartOfFiveMinutes diff --git a/utils/check-style/check-pylint b/utils/check-style/check-pylint new file mode 100755 index 00000000000..7959a414023 --- /dev/null +++ b/utils/check-style/check-pylint @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + + +LC_ALL="en_US.UTF-8" +ROOT_PATH=$(git rev-parse --show-toplevel) + +function xargs-pylint { + # $1 is number maximum arguments per pylint process + sort | awk '$2=="text/x-script.python" {print $1}' | \ + xargs -P "$(nproc)" -n "$1" pylint --rcfile="$ROOT_PATH/pyproject.toml" --persistent=no --score=n +} + +find "$ROOT_PATH/tests" -maxdepth 2 -type f -exec file -F' ' --mime-type {} + | xargs-pylint 50 +# Beware, there lambdas are checked. All of them contain `app`, and it causes brain-cucumber-zalgo +find "$ROOT_PATH/tests/ci" -mindepth 2 -type f -exec file -F' ' --mime-type {} + | xargs-pylint 1 diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 3a5d0c053ea..d7387c3f843 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -152,16 +152,6 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.xml' | grep -vP $EXCLUDE_DIRS | xargs xmllint --noout --nonet -function xargs-pylint { - # $1 is number maximum arguments per pylint process - sort | awk '$2=="text/x-script.python" {print $1}' | \ - xargs -P "$(nproc)" -n "$1" pylint --rcfile="$ROOT_PATH/pyproject.toml" --persistent=no --score=n -} - -find "$ROOT_PATH/tests" -maxdepth 2 -type f -exec file -F' ' --mime-type {} + | xargs-pylint 50 -# Beware, there lambdas are checked. All of them contain `app`, and it causes brain-cucumber-zalgo -find "$ROOT_PATH/tests/ci" -mindepth 2 -type f -exec file -F' ' --mime-type {} + | xargs-pylint 1 - find $ROOT_PATH -not -path $ROOT_PATH'/contrib*' \( -name '*.yaml' -or -name '*.yml' \) -type f | grep -vP $EXCLUDE_DIRS | xargs yamllint --config-file=$ROOT_PATH/.yamllint diff --git a/utils/check-style/check_cpp_docs.sh b/utils/check-style/check_cpp_docs.sh new file mode 100755 index 00000000000..7ad3cede758 --- /dev/null +++ b/utils/check-style/check_cpp_docs.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +# yaml check is not the best one + +cd /ClickHouse/utils/check-style || echo -e "failure\tRepo not found" > /test_output/check_status.tsv + +# FIXME: 30 sec to wait +# echo "Check duplicates" | ts +# ./check-duplicate-includes.sh |& tee /test_output/duplicate_includes_output.txt + +echo "Check style" | ts +./check-style -n |& tee /test_output/style_output.txt +echo "Check typos" | ts +./check-typos |& tee /test_output/typos_output.txt +echo "Check docs spelling" | ts +./check-doc-aspell |& tee /test_output/docs_spelling_output.txt +echo "Check whitespaces" | ts +./check-whitespaces -n |& tee /test_output/whitespaces_output.txt +echo "Check workflows" | ts +./check-workflows |& tee /test_output/workflows_output.txt +echo "Check submodules" | ts +./check-submodules |& tee /test_output/submodules_output.txt +echo "Check style. Done" | ts + +# FIXME: 6 min to wait +# echo "Check shell scripts with shellcheck" | ts +# ./shellcheck-run.sh |& tee /test_output/shellcheck_output.txt + + +# FIXME: move out +# /process_style_check_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv +# echo "Check help for changelog generator works" | ts +# cd ../changelog || exit 1 +# ./changelog.py -h 2>/dev/null 1>&2 diff --git a/utils/check-style/check_py.sh b/utils/check-style/check_py.sh new file mode 100755 index 00000000000..48c02013734 --- /dev/null +++ b/utils/check-style/check_py.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# yaml check is not the best one + +cd /ClickHouse/utils/check-style || echo -e "failure\tRepo not found" > /test_output/check_status.tsv + +# FIXME: 1 min to wait + head checkout +# echo "Check python formatting with black" | ts +# ./check-black -n |& tee /test_output/black_output.txt + +echo "Check pylint" | ts +./check-pylint -n |& tee /test_output/pylint_output.txt +echo "Check pylint. Done" | ts + +echo "Check python type hinting with mypy" | ts +./check-mypy -n |& tee /test_output/mypy_output.txt +echo "Check python type hinting with mypy. Done" | ts diff --git a/docker/test/style/process_style_check_result.py b/utils/check-style/process_style_check_result.py similarity index 96% rename from docker/test/style/process_style_check_result.py rename to utils/check-style/process_style_check_result.py index bc06df1af31..7980c01dd37 100755 --- a/docker/test/style/process_style_check_result.py +++ b/utils/check-style/process_style_check_result.py @@ -13,10 +13,11 @@ def process_result(result_folder): description = "" test_results = [] checks = ( - "duplicate includes", - "shellcheck", + #"duplicate includes", + #"shellcheck", "style", - "black", + "pylint", + #"black", "mypy", "typos", "whitespaces",