diff --git a/README.md b/README.md index fcbe65e8223..17b4df154a9 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ curl https://clickhouse.com/ | sh * [Tutorial](https://clickhouse.com/docs/en/getting_started/tutorial/) shows how to set up and query a small ClickHouse cluster. * [Documentation](https://clickhouse.com/docs/en/) provides more in-depth information. * [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format. -* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-1gh9ds7f4-PgDhJAaF8ad5RbWBAAjzFg) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time. +* [Slack](https://clickhouse.com/slack) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time. * [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events. * [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation. * [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev. diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt index ae6f270a768..4181f916d63 100644 --- a/contrib/arrow-cmake/CMakeLists.txt +++ b/contrib/arrow-cmake/CMakeLists.txt @@ -115,6 +115,13 @@ configure_file("${ORC_SOURCE_SRC_DIR}/Adaptor.hh.in" "${ORC_BUILD_INCLUDE_DIR}/A # ARROW_ORC + adapters/orc/CMakefiles set(ORC_SRCS + "${CMAKE_CURRENT_BINARY_DIR}/orc_proto.pb.h" + "${ORC_SOURCE_SRC_DIR}/sargs/ExpressionTree.cc" + "${ORC_SOURCE_SRC_DIR}/sargs/Literal.cc" + "${ORC_SOURCE_SRC_DIR}/sargs/PredicateLeaf.cc" + "${ORC_SOURCE_SRC_DIR}/sargs/SargsApplier.cc" + "${ORC_SOURCE_SRC_DIR}/sargs/SearchArgument.cc" + "${ORC_SOURCE_SRC_DIR}/sargs/TruthValue.cc" "${ORC_SOURCE_SRC_DIR}/Exceptions.cc" "${ORC_SOURCE_SRC_DIR}/OrcFile.cc" "${ORC_SOURCE_SRC_DIR}/Reader.cc" @@ -129,13 +136,20 @@ set(ORC_SRCS "${ORC_SOURCE_SRC_DIR}/MemoryPool.cc" "${ORC_SOURCE_SRC_DIR}/RLE.cc" "${ORC_SOURCE_SRC_DIR}/RLEv1.cc" - "${ORC_SOURCE_SRC_DIR}/RLEv2.cc" + "${ORC_SOURCE_SRC_DIR}/RleDecoderV2.cc" + "${ORC_SOURCE_SRC_DIR}/RleEncoderV2.cc" + "${ORC_SOURCE_SRC_DIR}/RLEV2Util.cc" "${ORC_SOURCE_SRC_DIR}/Statistics.cc" "${ORC_SOURCE_SRC_DIR}/StripeStream.cc" "${ORC_SOURCE_SRC_DIR}/Timezone.cc" "${ORC_SOURCE_SRC_DIR}/TypeImpl.cc" "${ORC_SOURCE_SRC_DIR}/Vector.cc" "${ORC_SOURCE_SRC_DIR}/Writer.cc" + "${ORC_SOURCE_SRC_DIR}/Adaptor.cc" + "${ORC_SOURCE_SRC_DIR}/BloomFilter.cc" + "${ORC_SOURCE_SRC_DIR}/Murmur3.cc" + "${ORC_SOURCE_SRC_DIR}/BlockBuffer.cc" + "${ORC_SOURCE_SRC_DIR}/wrap/orc-proto-wrapper.cc" "${ORC_SOURCE_SRC_DIR}/io/InputStream.cc" "${ORC_SOURCE_SRC_DIR}/io/OutputStream.cc" "${ORC_ADDITION_SOURCE_DIR}/orc_proto.pb.cc" @@ -358,6 +372,9 @@ SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zlib.cc" ${ARROW_SRCS}) add_definitions(-DARROW_WITH_ZSTD) SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zstd.cc" ${ARROW_SRCS}) +add_definitions(-DARROW_WITH_BROTLI) +SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_brotli.cc" ${ARROW_SRCS}) + add_library(_arrow ${ARROW_SRCS}) @@ -372,6 +389,7 @@ target_link_libraries(_arrow PRIVATE ch_contrib::snappy ch_contrib::zlib ch_contrib::zstd + ch_contrib::brotli ) target_link_libraries(_arrow PUBLIC _orc) diff --git a/contrib/orc b/contrib/orc index f9a393ed243..c5d7755ba0b 160000 --- a/contrib/orc +++ b/contrib/orc @@ -1 +1 @@ -Subproject commit f9a393ed2433a60034795284f82d093b348f2102 +Subproject commit c5d7755ba0b9a95631c8daea4d094101f26ec761 diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 500249b5bd6..34c1406b687 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -29,7 +29,7 @@ RUN arch=${TARGETARCH:-amd64} \ esac ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release" -ARG VERSION="23.2.3.17" +ARG VERSION="23.2.4.12" ARG PACKAGES="clickhouse-keeper" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 5f613eea4d0..f4ca498a7e7 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.2.3.17" +ARG VERSION="23.2.4.12" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 3e99bca8bc2..13b3ebdb01c 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -22,7 +22,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="23.2.3.17" +ARG VERSION="23.2.4.12" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index ce8a56c777e..f637ea24df3 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -60,6 +60,13 @@ install_packages previous_release_package_folder export USE_S3_STORAGE_FOR_MERGE_TREE=1 # Previous version may not be ready for fault injections export ZOOKEEPER_FAULT_INJECTION=0 + +# force_sync=false doesn't work correctly on some older versions +sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \ + | sed "s|false|true|" \ + > /etc/clickhouse-server/config.d/keeper_port.xml.tmp +sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml + configure # But we still need default disk because some tables loaded only into it @@ -161,7 +168,9 @@ rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \ -e "Authentication failed" \ -e "Cannot flush" \ -e "Container already exists" \ - /var/log/clickhouse-server/clickhouse-server.upgrade.log | zgrep -Fa "" > /test_output/upgrade_error_messages.txt \ + clickhouse-server.upgrade.log \ + | grep -av -e "_repl_01111_.*Mapping for table with UUID" \ + | zgrep -Fa "" > /test_output/upgrade_error_messages.txt \ && echo -e "Error message in clickhouse-server.log (see upgrade_error_messages.txt)$FAIL$(head_escaped /test_output/upgrade_error_messages.txt)" \ >> /test_output/test_results.tsv \ || echo -e "No Error messages after server upgrade$OK" >> /test_output/test_results.tsv diff --git a/docs/changelogs/v22.12.5.34-stable.md b/docs/changelogs/v22.12.5.34-stable.md new file mode 100644 index 00000000000..95befaa88ff --- /dev/null +++ b/docs/changelogs/v22.12.5.34-stable.md @@ -0,0 +1,29 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v22.12.5.34-stable (b82d6401ca1) FIXME as compared to v22.12.4.76-stable (cb5772db805) + +#### Improvement +* Backported in [#46983](https://github.com/ClickHouse/ClickHouse/issues/46983): - Apply `ALTER TABLE table_name ON CLUSTER cluster MOVE PARTITION|PART partition_expr TO DISK|VOLUME 'disk_name'` to all replicas. Because `ALTER TABLE t MOVE` is not replicated. [#46402](https://github.com/ClickHouse/ClickHouse/pull/46402) ([lizhuoyu5](https://github.com/lzydmxy)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#45729](https://github.com/ClickHouse/ClickHouse/issues/45729): Fix key description when encountering duplicate primary keys. This can happen in projections. See [#45590](https://github.com/ClickHouse/ClickHouse/issues/45590) for details. [#45686](https://github.com/ClickHouse/ClickHouse/pull/45686) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#46398](https://github.com/ClickHouse/ClickHouse/issues/46398): Fix `SYSTEM UNFREEZE` queries failing with the exception `CANNOT_PARSE_INPUT_ASSERTION_FAILED`. [#46325](https://github.com/ClickHouse/ClickHouse/pull/46325) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Backported in [#46903](https://github.com/ClickHouse/ClickHouse/issues/46903): - Fix incorrect alias recursion in QueryNormalizer. [#46609](https://github.com/ClickHouse/ClickHouse/pull/46609) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#47210](https://github.com/ClickHouse/ClickHouse/issues/47210): `INSERT` queries through native TCP protocol and HTTP protocol were not canceled correctly in some cases. It could lead to a partially applied query if a client canceled the query, or if a client died or, in rare cases, on network errors. As a result, it could lead to not working deduplication. Fixes [#27667](https://github.com/ClickHouse/ClickHouse/issues/27667) and [#45377](https://github.com/ClickHouse/ClickHouse/issues/45377). [#46681](https://github.com/ClickHouse/ClickHouse/pull/46681) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#47157](https://github.com/ClickHouse/ClickHouse/issues/47157): - Fix arithmetic operations in aggregate optimization with `min` and `max`. [#46705](https://github.com/ClickHouse/ClickHouse/pull/46705) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#46881](https://github.com/ClickHouse/ClickHouse/issues/46881): Fix MSan report in the `maxIntersections` function. This closes [#43126](https://github.com/ClickHouse/ClickHouse/issues/43126). [#46847](https://github.com/ClickHouse/ClickHouse/pull/46847) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#47359](https://github.com/ClickHouse/ClickHouse/issues/47359): Fix possible deadlock on distributed query cancellation. [#47161](https://github.com/ClickHouse/ClickHouse/pull/47161) ([Kruglov Pavel](https://github.com/Avogar)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Use /etc/default/clickhouse in systemd too [#47003](https://github.com/ClickHouse/ClickHouse/pull/47003) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update typing for a new PyGithub version [#47123](https://github.com/ClickHouse/ClickHouse/pull/47123) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Follow-up to [#46681](https://github.com/ClickHouse/ClickHouse/issues/46681) [#47284](https://github.com/ClickHouse/ClickHouse/pull/47284) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add a manual trigger for release workflow [#47302](https://github.com/ClickHouse/ClickHouse/pull/47302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v22.8.15.23-lts.md b/docs/changelogs/v22.8.15.23-lts.md new file mode 100644 index 00000000000..096a504c9c2 --- /dev/null +++ b/docs/changelogs/v22.8.15.23-lts.md @@ -0,0 +1,28 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v22.8.15.23-lts (d36fa168bbf) FIXME as compared to v22.8.14.53-lts (4ea67c40077) + +#### Improvement +* Backported in [#46981](https://github.com/ClickHouse/ClickHouse/issues/46981): - Apply `ALTER TABLE table_name ON CLUSTER cluster MOVE PARTITION|PART partition_expr TO DISK|VOLUME 'disk_name'` to all replicas. Because `ALTER TABLE t MOVE` is not replicated. [#46402](https://github.com/ClickHouse/ClickHouse/pull/46402) ([lizhuoyu5](https://github.com/lzydmxy)). + +#### Bug Fix +* Backported in [#47336](https://github.com/ClickHouse/ClickHouse/issues/47336): Sometimes after changing a role that could be not reflected on the access rights of a user who uses that role. This PR fixes that. [#46772](https://github.com/ClickHouse/ClickHouse/pull/46772) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#46901](https://github.com/ClickHouse/ClickHouse/issues/46901): - Fix incorrect alias recursion in QueryNormalizer. [#46609](https://github.com/ClickHouse/ClickHouse/pull/46609) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#47156](https://github.com/ClickHouse/ClickHouse/issues/47156): - Fix arithmetic operations in aggregate optimization with `min` and `max`. [#46705](https://github.com/ClickHouse/ClickHouse/pull/46705) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#46987](https://github.com/ClickHouse/ClickHouse/issues/46987): Fix result of LIKE predicates which translate to substring searches and contain quoted non-LIKE metacharacters. [#46875](https://github.com/ClickHouse/ClickHouse/pull/46875) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#47357](https://github.com/ClickHouse/ClickHouse/issues/47357): Fix possible deadlock on distributed query cancellation. [#47161](https://github.com/ClickHouse/ClickHouse/pull/47161) ([Kruglov Pavel](https://github.com/Avogar)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Reduce updates of Mergeable Check [#46781](https://github.com/ClickHouse/ClickHouse/pull/46781) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update typing for a new PyGithub version [#47123](https://github.com/ClickHouse/ClickHouse/pull/47123) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add a manual trigger for release workflow [#47302](https://github.com/ClickHouse/ClickHouse/pull/47302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v23.1.5.24-stable.md b/docs/changelogs/v23.1.5.24-stable.md new file mode 100644 index 00000000000..1c2c127a8c3 --- /dev/null +++ b/docs/changelogs/v23.1.5.24-stable.md @@ -0,0 +1,28 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.1.5.24-stable (0e51b53ba99) FIXME as compared to v23.1.4.58-stable (9ed562163a5) + +#### Build/Testing/Packaging Improvement +* Backported in [#47060](https://github.com/ClickHouse/ClickHouse/issues/47060): Fix error during server startup on old distros (e.g. Amazon Linux 2) and on ARM that glibc 2.28 symbols are not found. [#47008](https://github.com/ClickHouse/ClickHouse/pull/47008) ([Robert Schulze](https://github.com/rschu1ze)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#46401](https://github.com/ClickHouse/ClickHouse/issues/46401): Fix `SYSTEM UNFREEZE` queries failing with the exception `CANNOT_PARSE_INPUT_ASSERTION_FAILED`. [#46325](https://github.com/ClickHouse/ClickHouse/pull/46325) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Backported in [#46905](https://github.com/ClickHouse/ClickHouse/issues/46905): - Fix incorrect alias recursion in QueryNormalizer. [#46609](https://github.com/ClickHouse/ClickHouse/pull/46609) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#47211](https://github.com/ClickHouse/ClickHouse/issues/47211): `INSERT` queries through native TCP protocol and HTTP protocol were not canceled correctly in some cases. It could lead to a partially applied query if a client canceled the query, or if a client died or, in rare cases, on network errors. As a result, it could lead to not working deduplication. Fixes [#27667](https://github.com/ClickHouse/ClickHouse/issues/27667) and [#45377](https://github.com/ClickHouse/ClickHouse/issues/45377). [#46681](https://github.com/ClickHouse/ClickHouse/pull/46681) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#47118](https://github.com/ClickHouse/ClickHouse/issues/47118): - Fix arithmetic operations in aggregate optimization with `min` and `max`. [#46705](https://github.com/ClickHouse/ClickHouse/pull/46705) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#46883](https://github.com/ClickHouse/ClickHouse/issues/46883): Fix MSan report in the `maxIntersections` function. This closes [#43126](https://github.com/ClickHouse/ClickHouse/issues/43126). [#46847](https://github.com/ClickHouse/ClickHouse/pull/46847) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#47361](https://github.com/ClickHouse/ClickHouse/issues/47361): Fix possible deadlock on distributed query cancellation. [#47161](https://github.com/ClickHouse/ClickHouse/pull/47161) ([Kruglov Pavel](https://github.com/Avogar)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Use /etc/default/clickhouse in systemd too [#47003](https://github.com/ClickHouse/ClickHouse/pull/47003) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update typing for a new PyGithub version [#47123](https://github.com/ClickHouse/ClickHouse/pull/47123) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Follow-up to [#46681](https://github.com/ClickHouse/ClickHouse/issues/46681) [#47284](https://github.com/ClickHouse/ClickHouse/pull/47284) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add a manual trigger for release workflow [#47302](https://github.com/ClickHouse/ClickHouse/pull/47302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v23.2.4.12-stable.md b/docs/changelogs/v23.2.4.12-stable.md new file mode 100644 index 00000000000..2b6a689aee5 --- /dev/null +++ b/docs/changelogs/v23.2.4.12-stable.md @@ -0,0 +1,20 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.2.4.12-stable (8fe866cb035) FIXME as compared to v23.2.3.17-stable (dec18bf7281) + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#47277](https://github.com/ClickHouse/ClickHouse/issues/47277): Fix IPv4/IPv6 serialization/deserialization in binary formats that was broken in https://github.com/ClickHouse/ClickHouse/pull/43221. Closes [#46522](https://github.com/ClickHouse/ClickHouse/issues/46522). [#46616](https://github.com/ClickHouse/ClickHouse/pull/46616) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#47212](https://github.com/ClickHouse/ClickHouse/issues/47212): `INSERT` queries through native TCP protocol and HTTP protocol were not canceled correctly in some cases. It could lead to a partially applied query if a client canceled the query, or if a client died or, in rare cases, on network errors. As a result, it could lead to not working deduplication. Fixes [#27667](https://github.com/ClickHouse/ClickHouse/issues/27667) and [#45377](https://github.com/ClickHouse/ClickHouse/issues/45377). [#46681](https://github.com/ClickHouse/ClickHouse/pull/46681) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#47363](https://github.com/ClickHouse/ClickHouse/issues/47363): Fix possible deadlock on distributed query cancellation. [#47161](https://github.com/ClickHouse/ClickHouse/pull/47161) ([Kruglov Pavel](https://github.com/Avogar)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Follow-up to [#46681](https://github.com/ClickHouse/ClickHouse/issues/46681) [#47284](https://github.com/ClickHouse/ClickHouse/pull/47284) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add a manual trigger for release workflow [#47302](https://github.com/ClickHouse/ClickHouse/pull/47302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/en/development/build-cross-s390x.md b/docs/en/development/build-cross-s390x.md new file mode 100644 index 00000000000..bfb94ef7ed0 --- /dev/null +++ b/docs/en/development/build-cross-s390x.md @@ -0,0 +1,123 @@ +--- +slug: /en/development/build-cross-s390x +sidebar_position: 69 +title: How to Build, Run and Debug ClickHouse on Linux for s390x (zLinux) +sidebar_label: Build on Linux for s390x (zLinux) +--- + +As of writing (2023/3/10) building for s390x considered to be experimental. Not all features can be enabled, has broken features and is currently under active development. + + +## Building + +As s390x does not support boringssl, it uses OpenSSL and has two related build options. +- By default, the s390x build will dynamically link to OpenSSL libraries. It will build OpenSSL shared objects, so it's not necessary to install OpenSSL beforehand. (This option is recommended in all cases.) +- Another option is to build OpenSSL in-tree. In this case two build flags need to be supplied to cmake +```bash +-DENABLE_OPENSSL_DYNAMIC=0 -DENABLE_OPENSSL=1 +``` + +These instructions assume that the host machine is x86_64 and has all the tooling required to build natively based on the [build instructions](../development/build.md). It also assumes that the host is Ubuntu 22.04 but the following instructions should also work on Ubuntu 20.04. + +In addition to installing the tooling used to build natively, the following additional packages need to be installed: + +```bash +apt-get install binutils-s390x-linux-gnu libc6-dev-s390x-cross gcc-s390x-linux-gnu binfmt-support qemu-user-static +``` + +If you wish to cross compile rust code install the rust cross compile target for s390x: +```bash +rustup target add s390x-unknown-linux-gnu +``` + +To build for s390x: +```bash +cmake -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-s390x.cmake .. +ninja +``` + +## Running + +Once built, the binary can be run with, eg.: + +```bash +qemu-s390x-static -L /usr/s390x-linux-gnu ./clickhouse +``` + +## Debugging + +Install LLDB: + +```bash +apt-get install lldb-15 +``` + +To Debug a s390x executable, run clickhouse using QEMU in debug mode: + +```bash +qemu-s390x-static -g 31338 -L /usr/s390x-linux-gnu ./clickhouse +``` + +In another shell run LLDB and attach, replace `` and `` with the values corresponding to your environment. +```bash +lldb-15 +(lldb) target create ./clickhouse +Current executable set to '//ClickHouse//programs/clickhouse' (s390x). +(lldb) settings set target.source-map //ClickHouse +(lldb) gdb-remote 31338 +Process 1 stopped +* thread #1, stop reason = signal SIGTRAP + frame #0: 0x0000004020e74cd0 +-> 0x4020e74cd0: lgr %r2, %r15 + 0x4020e74cd4: aghi %r15, -160 + 0x4020e74cd8: xc 0(8,%r15), 0(%r15) + 0x4020e74cde: brasl %r14, 275429939040 +(lldb) b main +Breakpoint 1: 9 locations. +(lldb) c +Process 1 resuming +Process 1 stopped +* thread #1, stop reason = breakpoint 1.1 + frame #0: 0x0000004005cd9fc0 clickhouse`main(argc_=1, argv_=0x0000004020e594a8) at main.cpp:450:17 + 447 #if !defined(FUZZING_MODE) + 448 int main(int argc_, char ** argv_) + 449 { +-> 450 inside_main = true; + 451 SCOPE_EXIT({ inside_main = false; }); + 452 + 453 /// PHDR cache is required for query profiler to work reliably +``` + +## Visual Studio Code integration + +- (CodeLLDB extension)[https://github.com/vadimcn/vscode-lldb] is required for visual debugging, the (Command Variable)[https://github.com/rioj7/command-variable] extension can help dynamic launches if using (cmake variants)[https://github.com/microsoft/vscode-cmake-tools/blob/main/docs/variants.md]. +- Make sure to set the backend to your llvm installation eg. `"lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so"` +- Launcher: +```json +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Debug", + "type": "lldb", + "request": "custom", + "targetCreateCommands": ["target create ${command:cmake.launchTargetDirectory}/clickhouse"], + "processCreateCommands": ["settings set target.source-map ${input:targetdir} ${workspaceFolder}", "gdb-remote 31338"], + "sourceMap": { "${input:targetdir}": "${workspaceFolder}" }, + } + ], + "inputs": [ + { + "id": "targetdir", + "type": "command", + "command": "extension.commandvariable.transform", + "args": { + "text": "${command:cmake.launchTargetDirectory}", + "find": ".*/([^/]+)/[^/]+$", + "replace": "$1" + } + } + ] +} +``` +- Make sure to run the clickhouse executable in debug mode prior to launch. (It is also possible to create a `preLaunchTask` that automates this) \ No newline at end of file diff --git a/docs/en/development/contrib.md b/docs/en/development/contrib.md index 04158a0c3f7..4b296c43db4 100644 --- a/docs/en/development/contrib.md +++ b/docs/en/development/contrib.md @@ -1,6 +1,6 @@ --- slug: /en/development/contrib -sidebar_position: 71 +sidebar_position: 72 sidebar_label: Third-Party Libraries description: A list of third-party libraries used --- diff --git a/docs/en/development/style.md b/docs/en/development/style.md index 35683aa7822..78b1d1e9ebb 100644 --- a/docs/en/development/style.md +++ b/docs/en/development/style.md @@ -1,6 +1,6 @@ --- slug: /en/development/style -sidebar_position: 69 +sidebar_position: 70 sidebar_label: C++ Guide description: A list of recommendations regarding coding style, naming convention, formatting and more --- diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index 9ae49e8f707..eb424ee7cbe 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -1,6 +1,6 @@ --- slug: /en/development/tests -sidebar_position: 70 +sidebar_position: 71 sidebar_label: Testing title: ClickHouse Testing description: Most of ClickHouse features can be tested with functional tests and they are mandatory to use for every change in ClickHouse code that can be tested that way. @@ -31,6 +31,9 @@ folder and run the following command: PATH=$PATH: tests/clickhouse-test 01428_hash_set_nan_key ``` +Test results (`stderr` and `stdout`) are written to files `01428_hash_set_nan_key.[stderr|stdout]` which +are located near the test file itself (so for `queries/0_stateless/foo.sql` output will be in `queries/0_stateless/foo.stdout`). + For more options, see `tests/clickhouse-test --help`. You can simply run all tests or run subset of tests filtered by substring in test name: `./clickhouse-test substring`. There are also options to run tests in parallel or in randomized order. ### Adding a New Test diff --git a/docs/en/getting-started/example-datasets/nyc-taxi.md b/docs/en/getting-started/example-datasets/nyc-taxi.md index 69098f63037..9730faa873c 100644 --- a/docs/en/getting-started/example-datasets/nyc-taxi.md +++ b/docs/en/getting-started/example-datasets/nyc-taxi.md @@ -5,17 +5,19 @@ sidebar_position: 2 description: Data for billions of taxi and for-hire vehicle (Uber, Lyft, etc.) trips originating in New York City since 2009 --- +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + # New York Taxi Data The New York taxi data consists of 3+ billion taxi and for-hire vehicle (Uber, Lyft, etc.) trips originating in New York City since 2009. The dataset can be obtained in a couple of ways: -- insert the data directly into ClickHouse Cloud from S3 +- insert the data directly into ClickHouse Cloud from S3 or GCS - download prepared partitions -## Retrieve the Data from S3 +## Create the table trips -Let's grab a small subset of the data for getting familiar with it. The data is in TSV files in AWS S3, which is easily streamed into -ClickHouse Cloud using the `s3` table function. Start by creating a table for the taxi rides: +Start by creating a table for the taxi rides: ```sql CREATE TABLE trips ( @@ -38,9 +40,50 @@ CREATE TABLE trips ( dropoff_ntaname LowCardinality(String) ) ENGINE = MergeTree -PRIMARY KEY (pickup_datetime, dropoff_datetime) +PRIMARY KEY (pickup_datetime, dropoff_datetime); ``` +## Load the Data directly from Object Storage + +Let's grab a small subset of the data for getting familiar with it. The data is in TSV files in object storage, which is easily streamed into +ClickHouse Cloud using the `s3` table function. + +The same data is stored in both S3 and GCS; choose either tab. + + + + +The following command streams three files from a GCS bucket into the `trips` table (the `{0..2}` syntax is a wildcard for the values 0, 1, and 2): + +```sql +INSERT INTO trips +SELECT + trip_id, + pickup_datetime, + dropoff_datetime, + pickup_longitude, + pickup_latitude, + dropoff_longitude, + dropoff_latitude, + passenger_count, + trip_distance, + fare_amount, + extra, + tip_amount, + tolls_amount, + total_amount, + payment_type, + pickup_ntaname, + dropoff_ntaname +FROM s3( + 'https://storage.googleapis.com/clickhouse-public-datasets/nyc-taxi/trips_{0..2}.gz', + 'TabSeparatedWithNames' +); +``` + + + + The following command streams three files from an S3 bucket into the `trips` table (the `{0..2}` syntax is a wildcard for the values 0, 1, and 2): ```sql @@ -66,14 +109,19 @@ SELECT FROM s3( 'https://datasets-documentation.s3.eu-west-3.amazonaws.com/nyc-taxi/trips_{0..2}.gz', 'TabSeparatedWithNames' -) +); ``` + + + +## Sample Queries + Let's see how many rows were inserted: ```sql SELECT count() -FROM trips +FROM trips; ``` Each TSV file has about 1M rows, and the three files have 3,000,317 rows. Let's look at a few rows: @@ -81,7 +129,7 @@ Each TSV file has about 1M rows, and the three files have 3,000,317 rows. Let's ```sql SELECT * FROM trips -LIMIT 10 +LIMIT 10; ``` Notice there are columns for the pickup and dropoff dates, geo coordinates, fare details, New York neighborhoods, and more: @@ -110,7 +158,7 @@ SELECT FROM trips GROUP BY pickup_ntaname ORDER BY count DESC -LIMIT 10 +LIMIT 10; ``` The result is: @@ -137,7 +185,7 @@ SELECT passenger_count, avg(total_amount) FROM trips -GROUP BY passenger_count +GROUP BY passenger_count; ``` ```response @@ -165,7 +213,7 @@ SELECT count(*) FROM trips GROUP BY passenger_count, year, distance -ORDER BY year, count(*) DESC +ORDER BY year, count(*) DESC; ``` The first part of the result is: @@ -189,6 +237,10 @@ The first part of the result is: ## Download of Prepared Partitions {#download-of-prepared-partitions} +:::note +The following steps provide information about the original dataset, and a method for loading prepared partitions into a self-managed ClickHouse server environment. +::: + See https://github.com/toddwschneider/nyc-taxi-data and http://tech.marksblogg.com/billion-nyc-taxi-rides-redshift.html for the description of a dataset and instructions for downloading. Downloading will result in about 227 GB of uncompressed data in CSV files. The download takes about an hour over a 1 Gbit connection (parallel downloading from s3.amazonaws.com recovers at least half of a 1 Gbit channel). @@ -211,7 +263,7 @@ If you will run the queries described below, you have to use the full table name Q1: ``` sql -SELECT cab_type, count(*) FROM trips_mergetree GROUP BY cab_type +SELECT cab_type, count(*) FROM trips_mergetree GROUP BY cab_type; ``` 0.490 seconds. @@ -219,7 +271,7 @@ SELECT cab_type, count(*) FROM trips_mergetree GROUP BY cab_type Q2: ``` sql -SELECT passenger_count, avg(total_amount) FROM trips_mergetree GROUP BY passenger_count +SELECT passenger_count, avg(total_amount) FROM trips_mergetree GROUP BY passenger_count; ``` 1.224 seconds. @@ -227,7 +279,7 @@ SELECT passenger_count, avg(total_amount) FROM trips_mergetree GROUP BY passenge Q3: ``` sql -SELECT passenger_count, toYear(pickup_date) AS year, count(*) FROM trips_mergetree GROUP BY passenger_count, year +SELECT passenger_count, toYear(pickup_date) AS year, count(*) FROM trips_mergetree GROUP BY passenger_count, year; ``` 2.104 seconds. @@ -238,7 +290,7 @@ Q4: SELECT passenger_count, toYear(pickup_date) AS year, round(trip_distance) AS distance, count(*) FROM trips_mergetree GROUP BY passenger_count, year, distance -ORDER BY year, count(*) DESC +ORDER BY year, count(*) DESC; ``` 3.593 seconds. @@ -254,19 +306,19 @@ Creating a table on three servers: On each server: ``` sql -CREATE TABLE default.trips_mergetree_third ( trip_id UInt32, vendor_id Enum8('1' = 1, '2' = 2, 'CMT' = 3, 'VTS' = 4, 'DDS' = 5, 'B02512' = 10, 'B02598' = 11, 'B02617' = 12, 'B02682' = 13, 'B02764' = 14), pickup_date Date, pickup_datetime DateTime, dropoff_date Date, dropoff_datetime DateTime, store_and_fwd_flag UInt8, rate_code_id UInt8, pickup_longitude Float64, pickup_latitude Float64, dropoff_longitude Float64, dropoff_latitude Float64, passenger_count UInt8, trip_distance Float64, fare_amount Float32, extra Float32, mta_tax Float32, tip_amount Float32, tolls_amount Float32, ehail_fee Float32, improvement_surcharge Float32, total_amount Float32, payment_type_ Enum8('UNK' = 0, 'CSH' = 1, 'CRE' = 2, 'NOC' = 3, 'DIS' = 4), trip_type UInt8, pickup FixedString(25), dropoff FixedString(25), cab_type Enum8('yellow' = 1, 'green' = 2, 'uber' = 3), pickup_nyct2010_gid UInt8, pickup_ctlabel Float32, pickup_borocode UInt8, pickup_boroname Enum8('' = 0, 'Manhattan' = 1, 'Bronx' = 2, 'Brooklyn' = 3, 'Queens' = 4, 'Staten Island' = 5), pickup_ct2010 FixedString(6), pickup_boroct2010 FixedString(7), pickup_cdeligibil Enum8(' ' = 0, 'E' = 1, 'I' = 2), pickup_ntacode FixedString(4), pickup_ntaname Enum16('' = 0, 'Airport' = 1, 'Allerton-Pelham Gardens' = 2, 'Annadale-Huguenot-Prince\'s Bay-Eltingville' = 3, 'Arden Heights' = 4, 'Astoria' = 5, 'Auburndale' = 6, 'Baisley Park' = 7, 'Bath Beach' = 8, 'Battery Park City-Lower Manhattan' = 9, 'Bay Ridge' = 10, 'Bayside-Bayside Hills' = 11, 'Bedford' = 12, 'Bedford Park-Fordham North' = 13, 'Bellerose' = 14, 'Belmont' = 15, 'Bensonhurst East' = 16, 'Bensonhurst West' = 17, 'Borough Park' = 18, 'Breezy Point-Belle Harbor-Rockaway Park-Broad Channel' = 19, 'Briarwood-Jamaica Hills' = 20, 'Brighton Beach' = 21, 'Bronxdale' = 22, 'Brooklyn Heights-Cobble Hill' = 23, 'Brownsville' = 24, 'Bushwick North' = 25, 'Bushwick South' = 26, 'Cambria Heights' = 27, 'Canarsie' = 28, 'Carroll Gardens-Columbia Street-Red Hook' = 29, 'Central Harlem North-Polo Grounds' = 30, 'Central Harlem South' = 31, 'Charleston-Richmond Valley-Tottenville' = 32, 'Chinatown' = 33, 'Claremont-Bathgate' = 34, 'Clinton' = 35, 'Clinton Hill' = 36, 'Co-op City' = 37, 'College Point' = 38, 'Corona' = 39, 'Crotona Park East' = 40, 'Crown Heights North' = 41, 'Crown Heights South' = 42, 'Cypress Hills-City Line' = 43, 'DUMBO-Vinegar Hill-Downtown Brooklyn-Boerum Hill' = 44, 'Douglas Manor-Douglaston-Little Neck' = 45, 'Dyker Heights' = 46, 'East Concourse-Concourse Village' = 47, 'East Elmhurst' = 48, 'East Flatbush-Farragut' = 49, 'East Flushing' = 50, 'East Harlem North' = 51, 'East Harlem South' = 52, 'East New York' = 53, 'East New York (Pennsylvania Ave)' = 54, 'East Tremont' = 55, 'East Village' = 56, 'East Williamsburg' = 57, 'Eastchester-Edenwald-Baychester' = 58, 'Elmhurst' = 59, 'Elmhurst-Maspeth' = 60, 'Erasmus' = 61, 'Far Rockaway-Bayswater' = 62, 'Flatbush' = 63, 'Flatlands' = 64, 'Flushing' = 65, 'Fordham South' = 66, 'Forest Hills' = 67, 'Fort Greene' = 68, 'Fresh Meadows-Utopia' = 69, 'Ft. Totten-Bay Terrace-Clearview' = 70, 'Georgetown-Marine Park-Bergen Beach-Mill Basin' = 71, 'Glen Oaks-Floral Park-New Hyde Park' = 72, 'Glendale' = 73, 'Gramercy' = 74, 'Grasmere-Arrochar-Ft. Wadsworth' = 75, 'Gravesend' = 76, 'Great Kills' = 77, 'Greenpoint' = 78, 'Grymes Hill-Clifton-Fox Hills' = 79, 'Hamilton Heights' = 80, 'Hammels-Arverne-Edgemere' = 81, 'Highbridge' = 82, 'Hollis' = 83, 'Homecrest' = 84, 'Hudson Yards-Chelsea-Flatiron-Union Square' = 85, 'Hunters Point-Sunnyside-West Maspeth' = 86, 'Hunts Point' = 87, 'Jackson Heights' = 88, 'Jamaica' = 89, 'Jamaica Estates-Holliswood' = 90, 'Kensington-Ocean Parkway' = 91, 'Kew Gardens' = 92, 'Kew Gardens Hills' = 93, 'Kingsbridge Heights' = 94, 'Laurelton' = 95, 'Lenox Hill-Roosevelt Island' = 96, 'Lincoln Square' = 97, 'Lindenwood-Howard Beach' = 98, 'Longwood' = 99, 'Lower East Side' = 100, 'Madison' = 101, 'Manhattanville' = 102, 'Marble Hill-Inwood' = 103, 'Mariner\'s Harbor-Arlington-Port Ivory-Graniteville' = 104, 'Maspeth' = 105, 'Melrose South-Mott Haven North' = 106, 'Middle Village' = 107, 'Midtown-Midtown South' = 108, 'Midwood' = 109, 'Morningside Heights' = 110, 'Morrisania-Melrose' = 111, 'Mott Haven-Port Morris' = 112, 'Mount Hope' = 113, 'Murray Hill' = 114, 'Murray Hill-Kips Bay' = 115, 'New Brighton-Silver Lake' = 116, 'New Dorp-Midland Beach' = 117, 'New Springville-Bloomfield-Travis' = 118, 'North Corona' = 119, 'North Riverdale-Fieldston-Riverdale' = 120, 'North Side-South Side' = 121, 'Norwood' = 122, 'Oakland Gardens' = 123, 'Oakwood-Oakwood Beach' = 124, 'Ocean Hill' = 125, 'Ocean Parkway South' = 126, 'Old Astoria' = 127, 'Old Town-Dongan Hills-South Beach' = 128, 'Ozone Park' = 129, 'Park Slope-Gowanus' = 130, 'Parkchester' = 131, 'Pelham Bay-Country Club-City Island' = 132, 'Pelham Parkway' = 133, 'Pomonok-Flushing Heights-Hillcrest' = 134, 'Port Richmond' = 135, 'Prospect Heights' = 136, 'Prospect Lefferts Gardens-Wingate' = 137, 'Queens Village' = 138, 'Queensboro Hill' = 139, 'Queensbridge-Ravenswood-Long Island City' = 140, 'Rego Park' = 141, 'Richmond Hill' = 142, 'Ridgewood' = 143, 'Rikers Island' = 144, 'Rosedale' = 145, 'Rossville-Woodrow' = 146, 'Rugby-Remsen Village' = 147, 'Schuylerville-Throgs Neck-Edgewater Park' = 148, 'Seagate-Coney Island' = 149, 'Sheepshead Bay-Gerritsen Beach-Manhattan Beach' = 150, 'SoHo-TriBeCa-Civic Center-Little Italy' = 151, 'Soundview-Bruckner' = 152, 'Soundview-Castle Hill-Clason Point-Harding Park' = 153, 'South Jamaica' = 154, 'South Ozone Park' = 155, 'Springfield Gardens North' = 156, 'Springfield Gardens South-Brookville' = 157, 'Spuyten Duyvil-Kingsbridge' = 158, 'St. Albans' = 159, 'Stapleton-Rosebank' = 160, 'Starrett City' = 161, 'Steinway' = 162, 'Stuyvesant Heights' = 163, 'Stuyvesant Town-Cooper Village' = 164, 'Sunset Park East' = 165, 'Sunset Park West' = 166, 'Todt Hill-Emerson Hill-Heartland Village-Lighthouse Hill' = 167, 'Turtle Bay-East Midtown' = 168, 'University Heights-Morris Heights' = 169, 'Upper East Side-Carnegie Hill' = 170, 'Upper West Side' = 171, 'Van Cortlandt Village' = 172, 'Van Nest-Morris Park-Westchester Square' = 173, 'Washington Heights North' = 174, 'Washington Heights South' = 175, 'West Brighton' = 176, 'West Concourse' = 177, 'West Farms-Bronx River' = 178, 'West New Brighton-New Brighton-St. George' = 179, 'West Village' = 180, 'Westchester-Unionport' = 181, 'Westerleigh' = 182, 'Whitestone' = 183, 'Williamsbridge-Olinville' = 184, 'Williamsburg' = 185, 'Windsor Terrace' = 186, 'Woodhaven' = 187, 'Woodlawn-Wakefield' = 188, 'Woodside' = 189, 'Yorkville' = 190, 'park-cemetery-etc-Bronx' = 191, 'park-cemetery-etc-Brooklyn' = 192, 'park-cemetery-etc-Manhattan' = 193, 'park-cemetery-etc-Queens' = 194, 'park-cemetery-etc-Staten Island' = 195), pickup_puma UInt16, dropoff_nyct2010_gid UInt8, dropoff_ctlabel Float32, dropoff_borocode UInt8, dropoff_boroname Enum8('' = 0, 'Manhattan' = 1, 'Bronx' = 2, 'Brooklyn' = 3, 'Queens' = 4, 'Staten Island' = 5), dropoff_ct2010 FixedString(6), dropoff_boroct2010 FixedString(7), dropoff_cdeligibil Enum8(' ' = 0, 'E' = 1, 'I' = 2), dropoff_ntacode FixedString(4), dropoff_ntaname Enum16('' = 0, 'Airport' = 1, 'Allerton-Pelham Gardens' = 2, 'Annadale-Huguenot-Prince\'s Bay-Eltingville' = 3, 'Arden Heights' = 4, 'Astoria' = 5, 'Auburndale' = 6, 'Baisley Park' = 7, 'Bath Beach' = 8, 'Battery Park City-Lower Manhattan' = 9, 'Bay Ridge' = 10, 'Bayside-Bayside Hills' = 11, 'Bedford' = 12, 'Bedford Park-Fordham North' = 13, 'Bellerose' = 14, 'Belmont' = 15, 'Bensonhurst East' = 16, 'Bensonhurst West' = 17, 'Borough Park' = 18, 'Breezy Point-Belle Harbor-Rockaway Park-Broad Channel' = 19, 'Briarwood-Jamaica Hills' = 20, 'Brighton Beach' = 21, 'Bronxdale' = 22, 'Brooklyn Heights-Cobble Hill' = 23, 'Brownsville' = 24, 'Bushwick North' = 25, 'Bushwick South' = 26, 'Cambria Heights' = 27, 'Canarsie' = 28, 'Carroll Gardens-Columbia Street-Red Hook' = 29, 'Central Harlem North-Polo Grounds' = 30, 'Central Harlem South' = 31, 'Charleston-Richmond Valley-Tottenville' = 32, 'Chinatown' = 33, 'Claremont-Bathgate' = 34, 'Clinton' = 35, 'Clinton Hill' = 36, 'Co-op City' = 37, 'College Point' = 38, 'Corona' = 39, 'Crotona Park East' = 40, 'Crown Heights North' = 41, 'Crown Heights South' = 42, 'Cypress Hills-City Line' = 43, 'DUMBO-Vinegar Hill-Downtown Brooklyn-Boerum Hill' = 44, 'Douglas Manor-Douglaston-Little Neck' = 45, 'Dyker Heights' = 46, 'East Concourse-Concourse Village' = 47, 'East Elmhurst' = 48, 'East Flatbush-Farragut' = 49, 'East Flushing' = 50, 'East Harlem North' = 51, 'East Harlem South' = 52, 'East New York' = 53, 'East New York (Pennsylvania Ave)' = 54, 'East Tremont' = 55, 'East Village' = 56, 'East Williamsburg' = 57, 'Eastchester-Edenwald-Baychester' = 58, 'Elmhurst' = 59, 'Elmhurst-Maspeth' = 60, 'Erasmus' = 61, 'Far Rockaway-Bayswater' = 62, 'Flatbush' = 63, 'Flatlands' = 64, 'Flushing' = 65, 'Fordham South' = 66, 'Forest Hills' = 67, 'Fort Greene' = 68, 'Fresh Meadows-Utopia' = 69, 'Ft. Totten-Bay Terrace-Clearview' = 70, 'Georgetown-Marine Park-Bergen Beach-Mill Basin' = 71, 'Glen Oaks-Floral Park-New Hyde Park' = 72, 'Glendale' = 73, 'Gramercy' = 74, 'Grasmere-Arrochar-Ft. Wadsworth' = 75, 'Gravesend' = 76, 'Great Kills' = 77, 'Greenpoint' = 78, 'Grymes Hill-Clifton-Fox Hills' = 79, 'Hamilton Heights' = 80, 'Hammels-Arverne-Edgemere' = 81, 'Highbridge' = 82, 'Hollis' = 83, 'Homecrest' = 84, 'Hudson Yards-Chelsea-Flatiron-Union Square' = 85, 'Hunters Point-Sunnyside-West Maspeth' = 86, 'Hunts Point' = 87, 'Jackson Heights' = 88, 'Jamaica' = 89, 'Jamaica Estates-Holliswood' = 90, 'Kensington-Ocean Parkway' = 91, 'Kew Gardens' = 92, 'Kew Gardens Hills' = 93, 'Kingsbridge Heights' = 94, 'Laurelton' = 95, 'Lenox Hill-Roosevelt Island' = 96, 'Lincoln Square' = 97, 'Lindenwood-Howard Beach' = 98, 'Longwood' = 99, 'Lower East Side' = 100, 'Madison' = 101, 'Manhattanville' = 102, 'Marble Hill-Inwood' = 103, 'Mariner\'s Harbor-Arlington-Port Ivory-Graniteville' = 104, 'Maspeth' = 105, 'Melrose South-Mott Haven North' = 106, 'Middle Village' = 107, 'Midtown-Midtown South' = 108, 'Midwood' = 109, 'Morningside Heights' = 110, 'Morrisania-Melrose' = 111, 'Mott Haven-Port Morris' = 112, 'Mount Hope' = 113, 'Murray Hill' = 114, 'Murray Hill-Kips Bay' = 115, 'New Brighton-Silver Lake' = 116, 'New Dorp-Midland Beach' = 117, 'New Springville-Bloomfield-Travis' = 118, 'North Corona' = 119, 'North Riverdale-Fieldston-Riverdale' = 120, 'North Side-South Side' = 121, 'Norwood' = 122, 'Oakland Gardens' = 123, 'Oakwood-Oakwood Beach' = 124, 'Ocean Hill' = 125, 'Ocean Parkway South' = 126, 'Old Astoria' = 127, 'Old Town-Dongan Hills-South Beach' = 128, 'Ozone Park' = 129, 'Park Slope-Gowanus' = 130, 'Parkchester' = 131, 'Pelham Bay-Country Club-City Island' = 132, 'Pelham Parkway' = 133, 'Pomonok-Flushing Heights-Hillcrest' = 134, 'Port Richmond' = 135, 'Prospect Heights' = 136, 'Prospect Lefferts Gardens-Wingate' = 137, 'Queens Village' = 138, 'Queensboro Hill' = 139, 'Queensbridge-Ravenswood-Long Island City' = 140, 'Rego Park' = 141, 'Richmond Hill' = 142, 'Ridgewood' = 143, 'Rikers Island' = 144, 'Rosedale' = 145, 'Rossville-Woodrow' = 146, 'Rugby-Remsen Village' = 147, 'Schuylerville-Throgs Neck-Edgewater Park' = 148, 'Seagate-Coney Island' = 149, 'Sheepshead Bay-Gerritsen Beach-Manhattan Beach' = 150, 'SoHo-TriBeCa-Civic Center-Little Italy' = 151, 'Soundview-Bruckner' = 152, 'Soundview-Castle Hill-Clason Point-Harding Park' = 153, 'South Jamaica' = 154, 'South Ozone Park' = 155, 'Springfield Gardens North' = 156, 'Springfield Gardens South-Brookville' = 157, 'Spuyten Duyvil-Kingsbridge' = 158, 'St. Albans' = 159, 'Stapleton-Rosebank' = 160, 'Starrett City' = 161, 'Steinway' = 162, 'Stuyvesant Heights' = 163, 'Stuyvesant Town-Cooper Village' = 164, 'Sunset Park East' = 165, 'Sunset Park West' = 166, 'Todt Hill-Emerson Hill-Heartland Village-Lighthouse Hill' = 167, 'Turtle Bay-East Midtown' = 168, 'University Heights-Morris Heights' = 169, 'Upper East Side-Carnegie Hill' = 170, 'Upper West Side' = 171, 'Van Cortlandt Village' = 172, 'Van Nest-Morris Park-Westchester Square' = 173, 'Washington Heights North' = 174, 'Washington Heights South' = 175, 'West Brighton' = 176, 'West Concourse' = 177, 'West Farms-Bronx River' = 178, 'West New Brighton-New Brighton-St. George' = 179, 'West Village' = 180, 'Westchester-Unionport' = 181, 'Westerleigh' = 182, 'Whitestone' = 183, 'Williamsbridge-Olinville' = 184, 'Williamsburg' = 185, 'Windsor Terrace' = 186, 'Woodhaven' = 187, 'Woodlawn-Wakefield' = 188, 'Woodside' = 189, 'Yorkville' = 190, 'park-cemetery-etc-Bronx' = 191, 'park-cemetery-etc-Brooklyn' = 192, 'park-cemetery-etc-Manhattan' = 193, 'park-cemetery-etc-Queens' = 194, 'park-cemetery-etc-Staten Island' = 195), dropoff_puma UInt16) ENGINE = MergeTree(pickup_date, pickup_datetime, 8192) +CREATE TABLE default.trips_mergetree_third ( trip_id UInt32, vendor_id Enum8('1' = 1, '2' = 2, 'CMT' = 3, 'VTS' = 4, 'DDS' = 5, 'B02512' = 10, 'B02598' = 11, 'B02617' = 12, 'B02682' = 13, 'B02764' = 14), pickup_date Date, pickup_datetime DateTime, dropoff_date Date, dropoff_datetime DateTime, store_and_fwd_flag UInt8, rate_code_id UInt8, pickup_longitude Float64, pickup_latitude Float64, dropoff_longitude Float64, dropoff_latitude Float64, passenger_count UInt8, trip_distance Float64, fare_amount Float32, extra Float32, mta_tax Float32, tip_amount Float32, tolls_amount Float32, ehail_fee Float32, improvement_surcharge Float32, total_amount Float32, payment_type_ Enum8('UNK' = 0, 'CSH' = 1, 'CRE' = 2, 'NOC' = 3, 'DIS' = 4), trip_type UInt8, pickup FixedString(25), dropoff FixedString(25), cab_type Enum8('yellow' = 1, 'green' = 2, 'uber' = 3), pickup_nyct2010_gid UInt8, pickup_ctlabel Float32, pickup_borocode UInt8, pickup_boroname Enum8('' = 0, 'Manhattan' = 1, 'Bronx' = 2, 'Brooklyn' = 3, 'Queens' = 4, 'Staten Island' = 5), pickup_ct2010 FixedString(6), pickup_boroct2010 FixedString(7), pickup_cdeligibil Enum8(' ' = 0, 'E' = 1, 'I' = 2), pickup_ntacode FixedString(4), pickup_ntaname Enum16('' = 0, 'Airport' = 1, 'Allerton-Pelham Gardens' = 2, 'Annadale-Huguenot-Prince\'s Bay-Eltingville' = 3, 'Arden Heights' = 4, 'Astoria' = 5, 'Auburndale' = 6, 'Baisley Park' = 7, 'Bath Beach' = 8, 'Battery Park City-Lower Manhattan' = 9, 'Bay Ridge' = 10, 'Bayside-Bayside Hills' = 11, 'Bedford' = 12, 'Bedford Park-Fordham North' = 13, 'Bellerose' = 14, 'Belmont' = 15, 'Bensonhurst East' = 16, 'Bensonhurst West' = 17, 'Borough Park' = 18, 'Breezy Point-Belle Harbor-Rockaway Park-Broad Channel' = 19, 'Briarwood-Jamaica Hills' = 20, 'Brighton Beach' = 21, 'Bronxdale' = 22, 'Brooklyn Heights-Cobble Hill' = 23, 'Brownsville' = 24, 'Bushwick North' = 25, 'Bushwick South' = 26, 'Cambria Heights' = 27, 'Canarsie' = 28, 'Carroll Gardens-Columbia Street-Red Hook' = 29, 'Central Harlem North-Polo Grounds' = 30, 'Central Harlem South' = 31, 'Charleston-Richmond Valley-Tottenville' = 32, 'Chinatown' = 33, 'Claremont-Bathgate' = 34, 'Clinton' = 35, 'Clinton Hill' = 36, 'Co-op City' = 37, 'College Point' = 38, 'Corona' = 39, 'Crotona Park East' = 40, 'Crown Heights North' = 41, 'Crown Heights South' = 42, 'Cypress Hills-City Line' = 43, 'DUMBO-Vinegar Hill-Downtown Brooklyn-Boerum Hill' = 44, 'Douglas Manor-Douglaston-Little Neck' = 45, 'Dyker Heights' = 46, 'East Concourse-Concourse Village' = 47, 'East Elmhurst' = 48, 'East Flatbush-Farragut' = 49, 'East Flushing' = 50, 'East Harlem North' = 51, 'East Harlem South' = 52, 'East New York' = 53, 'East New York (Pennsylvania Ave)' = 54, 'East Tremont' = 55, 'East Village' = 56, 'East Williamsburg' = 57, 'Eastchester-Edenwald-Baychester' = 58, 'Elmhurst' = 59, 'Elmhurst-Maspeth' = 60, 'Erasmus' = 61, 'Far Rockaway-Bayswater' = 62, 'Flatbush' = 63, 'Flatlands' = 64, 'Flushing' = 65, 'Fordham South' = 66, 'Forest Hills' = 67, 'Fort Greene' = 68, 'Fresh Meadows-Utopia' = 69, 'Ft. Totten-Bay Terrace-Clearview' = 70, 'Georgetown-Marine Park-Bergen Beach-Mill Basin' = 71, 'Glen Oaks-Floral Park-New Hyde Park' = 72, 'Glendale' = 73, 'Gramercy' = 74, 'Grasmere-Arrochar-Ft. Wadsworth' = 75, 'Gravesend' = 76, 'Great Kills' = 77, 'Greenpoint' = 78, 'Grymes Hill-Clifton-Fox Hills' = 79, 'Hamilton Heights' = 80, 'Hammels-Arverne-Edgemere' = 81, 'Highbridge' = 82, 'Hollis' = 83, 'Homecrest' = 84, 'Hudson Yards-Chelsea-Flatiron-Union Square' = 85, 'Hunters Point-Sunnyside-West Maspeth' = 86, 'Hunts Point' = 87, 'Jackson Heights' = 88, 'Jamaica' = 89, 'Jamaica Estates-Holliswood' = 90, 'Kensington-Ocean Parkway' = 91, 'Kew Gardens' = 92, 'Kew Gardens Hills' = 93, 'Kingsbridge Heights' = 94, 'Laurelton' = 95, 'Lenox Hill-Roosevelt Island' = 96, 'Lincoln Square' = 97, 'Lindenwood-Howard Beach' = 98, 'Longwood' = 99, 'Lower East Side' = 100, 'Madison' = 101, 'Manhattanville' = 102, 'Marble Hill-Inwood' = 103, 'Mariner\'s Harbor-Arlington-Port Ivory-Graniteville' = 104, 'Maspeth' = 105, 'Melrose South-Mott Haven North' = 106, 'Middle Village' = 107, 'Midtown-Midtown South' = 108, 'Midwood' = 109, 'Morningside Heights' = 110, 'Morrisania-Melrose' = 111, 'Mott Haven-Port Morris' = 112, 'Mount Hope' = 113, 'Murray Hill' = 114, 'Murray Hill-Kips Bay' = 115, 'New Brighton-Silver Lake' = 116, 'New Dorp-Midland Beach' = 117, 'New Springville-Bloomfield-Travis' = 118, 'North Corona' = 119, 'North Riverdale-Fieldston-Riverdale' = 120, 'North Side-South Side' = 121, 'Norwood' = 122, 'Oakland Gardens' = 123, 'Oakwood-Oakwood Beach' = 124, 'Ocean Hill' = 125, 'Ocean Parkway South' = 126, 'Old Astoria' = 127, 'Old Town-Dongan Hills-South Beach' = 128, 'Ozone Park' = 129, 'Park Slope-Gowanus' = 130, 'Parkchester' = 131, 'Pelham Bay-Country Club-City Island' = 132, 'Pelham Parkway' = 133, 'Pomonok-Flushing Heights-Hillcrest' = 134, 'Port Richmond' = 135, 'Prospect Heights' = 136, 'Prospect Lefferts Gardens-Wingate' = 137, 'Queens Village' = 138, 'Queensboro Hill' = 139, 'Queensbridge-Ravenswood-Long Island City' = 140, 'Rego Park' = 141, 'Richmond Hill' = 142, 'Ridgewood' = 143, 'Rikers Island' = 144, 'Rosedale' = 145, 'Rossville-Woodrow' = 146, 'Rugby-Remsen Village' = 147, 'Schuylerville-Throgs Neck-Edgewater Park' = 148, 'Seagate-Coney Island' = 149, 'Sheepshead Bay-Gerritsen Beach-Manhattan Beach' = 150, 'SoHo-TriBeCa-Civic Center-Little Italy' = 151, 'Soundview-Bruckner' = 152, 'Soundview-Castle Hill-Clason Point-Harding Park' = 153, 'South Jamaica' = 154, 'South Ozone Park' = 155, 'Springfield Gardens North' = 156, 'Springfield Gardens South-Brookville' = 157, 'Spuyten Duyvil-Kingsbridge' = 158, 'St. Albans' = 159, 'Stapleton-Rosebank' = 160, 'Starrett City' = 161, 'Steinway' = 162, 'Stuyvesant Heights' = 163, 'Stuyvesant Town-Cooper Village' = 164, 'Sunset Park East' = 165, 'Sunset Park West' = 166, 'Todt Hill-Emerson Hill-Heartland Village-Lighthouse Hill' = 167, 'Turtle Bay-East Midtown' = 168, 'University Heights-Morris Heights' = 169, 'Upper East Side-Carnegie Hill' = 170, 'Upper West Side' = 171, 'Van Cortlandt Village' = 172, 'Van Nest-Morris Park-Westchester Square' = 173, 'Washington Heights North' = 174, 'Washington Heights South' = 175, 'West Brighton' = 176, 'West Concourse' = 177, 'West Farms-Bronx River' = 178, 'West New Brighton-New Brighton-St. George' = 179, 'West Village' = 180, 'Westchester-Unionport' = 181, 'Westerleigh' = 182, 'Whitestone' = 183, 'Williamsbridge-Olinville' = 184, 'Williamsburg' = 185, 'Windsor Terrace' = 186, 'Woodhaven' = 187, 'Woodlawn-Wakefield' = 188, 'Woodside' = 189, 'Yorkville' = 190, 'park-cemetery-etc-Bronx' = 191, 'park-cemetery-etc-Brooklyn' = 192, 'park-cemetery-etc-Manhattan' = 193, 'park-cemetery-etc-Queens' = 194, 'park-cemetery-etc-Staten Island' = 195), pickup_puma UInt16, dropoff_nyct2010_gid UInt8, dropoff_ctlabel Float32, dropoff_borocode UInt8, dropoff_boroname Enum8('' = 0, 'Manhattan' = 1, 'Bronx' = 2, 'Brooklyn' = 3, 'Queens' = 4, 'Staten Island' = 5), dropoff_ct2010 FixedString(6), dropoff_boroct2010 FixedString(7), dropoff_cdeligibil Enum8(' ' = 0, 'E' = 1, 'I' = 2), dropoff_ntacode FixedString(4), dropoff_ntaname Enum16('' = 0, 'Airport' = 1, 'Allerton-Pelham Gardens' = 2, 'Annadale-Huguenot-Prince\'s Bay-Eltingville' = 3, 'Arden Heights' = 4, 'Astoria' = 5, 'Auburndale' = 6, 'Baisley Park' = 7, 'Bath Beach' = 8, 'Battery Park City-Lower Manhattan' = 9, 'Bay Ridge' = 10, 'Bayside-Bayside Hills' = 11, 'Bedford' = 12, 'Bedford Park-Fordham North' = 13, 'Bellerose' = 14, 'Belmont' = 15, 'Bensonhurst East' = 16, 'Bensonhurst West' = 17, 'Borough Park' = 18, 'Breezy Point-Belle Harbor-Rockaway Park-Broad Channel' = 19, 'Briarwood-Jamaica Hills' = 20, 'Brighton Beach' = 21, 'Bronxdale' = 22, 'Brooklyn Heights-Cobble Hill' = 23, 'Brownsville' = 24, 'Bushwick North' = 25, 'Bushwick South' = 26, 'Cambria Heights' = 27, 'Canarsie' = 28, 'Carroll Gardens-Columbia Street-Red Hook' = 29, 'Central Harlem North-Polo Grounds' = 30, 'Central Harlem South' = 31, 'Charleston-Richmond Valley-Tottenville' = 32, 'Chinatown' = 33, 'Claremont-Bathgate' = 34, 'Clinton' = 35, 'Clinton Hill' = 36, 'Co-op City' = 37, 'College Point' = 38, 'Corona' = 39, 'Crotona Park East' = 40, 'Crown Heights North' = 41, 'Crown Heights South' = 42, 'Cypress Hills-City Line' = 43, 'DUMBO-Vinegar Hill-Downtown Brooklyn-Boerum Hill' = 44, 'Douglas Manor-Douglaston-Little Neck' = 45, 'Dyker Heights' = 46, 'East Concourse-Concourse Village' = 47, 'East Elmhurst' = 48, 'East Flatbush-Farragut' = 49, 'East Flushing' = 50, 'East Harlem North' = 51, 'East Harlem South' = 52, 'East New York' = 53, 'East New York (Pennsylvania Ave)' = 54, 'East Tremont' = 55, 'East Village' = 56, 'East Williamsburg' = 57, 'Eastchester-Edenwald-Baychester' = 58, 'Elmhurst' = 59, 'Elmhurst-Maspeth' = 60, 'Erasmus' = 61, 'Far Rockaway-Bayswater' = 62, 'Flatbush' = 63, 'Flatlands' = 64, 'Flushing' = 65, 'Fordham South' = 66, 'Forest Hills' = 67, 'Fort Greene' = 68, 'Fresh Meadows-Utopia' = 69, 'Ft. Totten-Bay Terrace-Clearview' = 70, 'Georgetown-Marine Park-Bergen Beach-Mill Basin' = 71, 'Glen Oaks-Floral Park-New Hyde Park' = 72, 'Glendale' = 73, 'Gramercy' = 74, 'Grasmere-Arrochar-Ft. Wadsworth' = 75, 'Gravesend' = 76, 'Great Kills' = 77, 'Greenpoint' = 78, 'Grymes Hill-Clifton-Fox Hills' = 79, 'Hamilton Heights' = 80, 'Hammels-Arverne-Edgemere' = 81, 'Highbridge' = 82, 'Hollis' = 83, 'Homecrest' = 84, 'Hudson Yards-Chelsea-Flatiron-Union Square' = 85, 'Hunters Point-Sunnyside-West Maspeth' = 86, 'Hunts Point' = 87, 'Jackson Heights' = 88, 'Jamaica' = 89, 'Jamaica Estates-Holliswood' = 90, 'Kensington-Ocean Parkway' = 91, 'Kew Gardens' = 92, 'Kew Gardens Hills' = 93, 'Kingsbridge Heights' = 94, 'Laurelton' = 95, 'Lenox Hill-Roosevelt Island' = 96, 'Lincoln Square' = 97, 'Lindenwood-Howard Beach' = 98, 'Longwood' = 99, 'Lower East Side' = 100, 'Madison' = 101, 'Manhattanville' = 102, 'Marble Hill-Inwood' = 103, 'Mariner\'s Harbor-Arlington-Port Ivory-Graniteville' = 104, 'Maspeth' = 105, 'Melrose South-Mott Haven North' = 106, 'Middle Village' = 107, 'Midtown-Midtown South' = 108, 'Midwood' = 109, 'Morningside Heights' = 110, 'Morrisania-Melrose' = 111, 'Mott Haven-Port Morris' = 112, 'Mount Hope' = 113, 'Murray Hill' = 114, 'Murray Hill-Kips Bay' = 115, 'New Brighton-Silver Lake' = 116, 'New Dorp-Midland Beach' = 117, 'New Springville-Bloomfield-Travis' = 118, 'North Corona' = 119, 'North Riverdale-Fieldston-Riverdale' = 120, 'North Side-South Side' = 121, 'Norwood' = 122, 'Oakland Gardens' = 123, 'Oakwood-Oakwood Beach' = 124, 'Ocean Hill' = 125, 'Ocean Parkway South' = 126, 'Old Astoria' = 127, 'Old Town-Dongan Hills-South Beach' = 128, 'Ozone Park' = 129, 'Park Slope-Gowanus' = 130, 'Parkchester' = 131, 'Pelham Bay-Country Club-City Island' = 132, 'Pelham Parkway' = 133, 'Pomonok-Flushing Heights-Hillcrest' = 134, 'Port Richmond' = 135, 'Prospect Heights' = 136, 'Prospect Lefferts Gardens-Wingate' = 137, 'Queens Village' = 138, 'Queensboro Hill' = 139, 'Queensbridge-Ravenswood-Long Island City' = 140, 'Rego Park' = 141, 'Richmond Hill' = 142, 'Ridgewood' = 143, 'Rikers Island' = 144, 'Rosedale' = 145, 'Rossville-Woodrow' = 146, 'Rugby-Remsen Village' = 147, 'Schuylerville-Throgs Neck-Edgewater Park' = 148, 'Seagate-Coney Island' = 149, 'Sheepshead Bay-Gerritsen Beach-Manhattan Beach' = 150, 'SoHo-TriBeCa-Civic Center-Little Italy' = 151, 'Soundview-Bruckner' = 152, 'Soundview-Castle Hill-Clason Point-Harding Park' = 153, 'South Jamaica' = 154, 'South Ozone Park' = 155, 'Springfield Gardens North' = 156, 'Springfield Gardens South-Brookville' = 157, 'Spuyten Duyvil-Kingsbridge' = 158, 'St. Albans' = 159, 'Stapleton-Rosebank' = 160, 'Starrett City' = 161, 'Steinway' = 162, 'Stuyvesant Heights' = 163, 'Stuyvesant Town-Cooper Village' = 164, 'Sunset Park East' = 165, 'Sunset Park West' = 166, 'Todt Hill-Emerson Hill-Heartland Village-Lighthouse Hill' = 167, 'Turtle Bay-East Midtown' = 168, 'University Heights-Morris Heights' = 169, 'Upper East Side-Carnegie Hill' = 170, 'Upper West Side' = 171, 'Van Cortlandt Village' = 172, 'Van Nest-Morris Park-Westchester Square' = 173, 'Washington Heights North' = 174, 'Washington Heights South' = 175, 'West Brighton' = 176, 'West Concourse' = 177, 'West Farms-Bronx River' = 178, 'West New Brighton-New Brighton-St. George' = 179, 'West Village' = 180, 'Westchester-Unionport' = 181, 'Westerleigh' = 182, 'Whitestone' = 183, 'Williamsbridge-Olinville' = 184, 'Williamsburg' = 185, 'Windsor Terrace' = 186, 'Woodhaven' = 187, 'Woodlawn-Wakefield' = 188, 'Woodside' = 189, 'Yorkville' = 190, 'park-cemetery-etc-Bronx' = 191, 'park-cemetery-etc-Brooklyn' = 192, 'park-cemetery-etc-Manhattan' = 193, 'park-cemetery-etc-Queens' = 194, 'park-cemetery-etc-Staten Island' = 195), dropoff_puma UInt16) ENGINE = MergeTree(pickup_date, pickup_datetime, 8192); ``` On the source server: ``` sql -CREATE TABLE trips_mergetree_x3 AS trips_mergetree_third ENGINE = Distributed(perftest, default, trips_mergetree_third, rand()) +CREATE TABLE trips_mergetree_x3 AS trips_mergetree_third ENGINE = Distributed(perftest, default, trips_mergetree_third, rand()); ``` The following query redistributes data: ``` sql -INSERT INTO trips_mergetree_x3 SELECT * FROM trips_mergetree +INSERT INTO trips_mergetree_x3 SELECT * FROM trips_mergetree; ``` This takes 2454 seconds. diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index ae2089f6365..db2e773a685 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1981,6 +1981,7 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t - [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`. - [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`. - [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`. +- [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `snappy`. ## Arrow {#data-format-arrow} @@ -2051,6 +2052,7 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam - [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`. - [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`. - [output_format_arrow_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_fixed_string_as_fixed_byte_array) - use Arrow FIXED_SIZE_BINARY type instead of Binary/String for FixedString columns. Default value - `true`. +- [output_format_arrow_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_compression_method) - compression method used in output Arrow format. Default value - `none`. ## ArrowStream {#data-format-arrow-stream} @@ -2107,6 +2109,7 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename. ### Arrow format settings {#parquet-format-settings} - [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`. +- [output_format_orc_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_orc_compression_method) - compression method used in output ORC format. Default value - `none`. - [input_format_arrow_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`. - [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`. - [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`. diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md index 25bdb0c36a3..e028b4a6d96 100644 --- a/docs/en/interfaces/schema-inference.md +++ b/docs/en/interfaces/schema-inference.md @@ -117,7 +117,7 @@ clickhouse-local --file='hobbies.jsonl' --table='hobbies' --query='SELECT * FROM 4 47 Brayan ['movies','skydiving'] ``` -# Using structure from insertion table {#using-structure-from-insertion-table} +## Using structure from insertion table {#using-structure-from-insertion-table} When table functions `file/s3/url/hdfs` are used to insert data into a table, there is an option to use the structure from the insertion table instead of extracting it from the data. @@ -222,7 +222,7 @@ INSERT INTO hobbies4 SELECT id, empty(hobbies) ? NULL : hobbies[1] FROM file(hob In this case, there are some operations performed on the column `hobbies` in the `SELECT` query to insert it into the table, so ClickHouse cannot use the structure from the insertion table, and schema inference will be used. -# Schema inference cache {#schema-inference-cache} +## Schema inference cache {#schema-inference-cache} For most input formats schema inference reads some data to determine its structure and this process can take some time. To prevent inferring the same schema every time ClickHouse read the data from the same file, the inferred schema is cached and when accessing the same file again, ClickHouse will use the schema from the cache. @@ -326,14 +326,14 @@ SELECT count() FROM system.schema_inference_cache WHERE storage='S3' └─────────┘ ``` -# Text formats {#text-formats} +## Text formats {#text-formats} For text formats, ClickHouse reads the data row by row, extracts column values according to the format, and then uses some recursive parsers and heuristics to determine the type for each value. The maximum number of rows read from the data in schema inference is controlled by the setting `input_format_max_rows_to_read_for_schema_inference` with default value 25000. By default, all inferred types are [Nullable](../sql-reference/data-types/nullable.md), but you can change this by setting `schema_inference_make_columns_nullable` (see examples in the [settings](#settings-for-text-formats) section). -## JSON formats {#json-formats} +### JSON formats {#json-formats} In JSON formats ClickHouse parses values according to the JSON specification and then tries to find the most appropriate data type for them. @@ -464,9 +464,9 @@ most likely this column contains only Nulls or empty Arrays/Maps. ... ``` -### JSON settings {#json-settings} +#### JSON settings {#json-settings} -#### input_format_json_read_objects_as_strings +##### input_format_json_read_objects_as_strings Enabling this setting allows reading nested JSON objects as strings. This setting can be used to read nested JSON objects without using JSON object type. @@ -486,7 +486,7 @@ DESC format(JSONEachRow, $$ └──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -#### input_format_json_try_infer_numbers_from_strings +##### input_format_json_try_infer_numbers_from_strings Enabling this setting allows inferring numbers from string values. @@ -507,7 +507,7 @@ DESC format(JSONEachRow, $$ └───────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -#### input_format_json_read_numbers_as_strings +##### input_format_json_read_numbers_as_strings Enabling this setting allows reading numeric values as strings. @@ -528,7 +528,7 @@ DESC format(JSONEachRow, $$ └───────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -#### input_format_json_read_bools_as_numbers +##### input_format_json_read_bools_as_numbers Enabling this setting allows reading Bool values as numbers. @@ -549,7 +549,7 @@ DESC format(JSONEachRow, $$ └───────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -## CSV {#csv} +### CSV {#csv} In CSV format ClickHouse extracts column values from the row according to delimiters. ClickHouse expects all types except numbers and strings to be enclosed in double quotes. If the value is in double quotes, ClickHouse tries to parse the data inside quotes using the recursive parser and then tries to find the most appropriate data type for it. If the value is not in double quotes, ClickHouse tries to parse it as a number, @@ -726,7 +726,7 @@ $$) └──────────────┴───────────────┘ ``` -## TSV/TSKV {#tsv-tskv} +### TSV/TSKV {#tsv-tskv} In TSV/TSKV formats ClickHouse extracts column value from the row according to tabular delimiters and then parses extracted value using the recursive parser to determine the most appropriate type. If the type cannot be determined, ClickHouse treats this value as String. @@ -1019,7 +1019,7 @@ DESC format(TSV, '[1,2,3] 42.42 Hello World!') └──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -## CustomSeparated {#custom-separated} +### CustomSeparated {#custom-separated} In CustomSeparated format ClickHouse first extracts all column values from the row according to specified delimiters and then tries to infer the data type for each value according to escaping rule. @@ -1080,7 +1080,7 @@ $$) └────────┴───────────────┴────────────┘ ``` -## Template {#template} +### Template {#template} In Template format ClickHouse first extracts all column values from the row according to the specified template and then tries to infer the data type for each value according to its escaping rule. @@ -1120,7 +1120,7 @@ $$) └──────────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -## Regexp {#regexp} +### Regexp {#regexp} Similar to Template, in Regexp format ClickHouse first extracts all column values from the row according to specified regular expression and then tries to infer data type for each value according to the specified escaping rule. @@ -1142,9 +1142,9 @@ Line: value_1=2, value_2="Some string 2", value_3="[4, 5, NULL]"$$) └──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -## Settings for text formats {settings-for-text-formats} +### Settings for text formats {#settings-for-text-formats} -### input_format_max_rows_to_read_for_schema_inference +#### input_format_max_rows_to_read_for_schema_inference This setting controls the maximum number of rows to be read while schema inference. The more rows are read, the more time is spent on schema inference, but the greater the chance to @@ -1152,7 +1152,7 @@ correctly determine the types (especially when the data contains a lot of nulls) Default value: `25000`. -### column_names_for_schema_inference +#### column_names_for_schema_inference The list of column names to use in schema inference for formats without explicit column names. Specified names will be used instead of default `c1,c2,c3,...`. The format: `column1,column2,column3,...`. @@ -1169,7 +1169,7 @@ DESC format(TSV, 'Hello, World! 42 [1, 2, 3]') settings column_names_for_schema_ └──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -### schema_inference_hints +#### schema_inference_hints The list of column names and types to use in schema inference instead of automatically determined types. The format: 'column_name1 column_type1, column_name2 column_type2, ...'. This setting can be used to specify the types of columns that could not be determined automatically or for optimizing the schema. @@ -1189,7 +1189,7 @@ DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : nul └─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -### schema_inference_make_columns_nullable +#### schema_inference_make_columns_nullable Controls making inferred types `Nullable` in schema inference for formats without information about nullability. If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will be `Nullable` only if the column contains `NULL` in a sample that is parsed during schema inference. @@ -1232,7 +1232,7 @@ DESC format(JSONEachRow, $$ └─────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -### input_format_try_infer_integers +#### input_format_try_infer_integers If enabled, ClickHouse will try to infer integers instead of floats in schema inference for text formats. If all numbers in the column from sample data are integers, the result type will be `Int64`, if at least one number is float, the result type will be `Float64`. @@ -1289,7 +1289,7 @@ DESC format(JSONEachRow, $$ └────────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -### input_format_try_infer_datetimes +#### input_format_try_infer_datetimes If enabled, ClickHouse will try to infer type `DateTime64` from string fields in schema inference for text formats. If all fields from a column in sample data were successfully parsed as datetimes, the result type will be `DateTime64(9)`, @@ -1337,7 +1337,7 @@ DESC format(JSONEachRow, $$ Note: Parsing datetimes during schema inference respect setting [date_time_input_format](/docs/en/operations/settings/settings-formats.md#date_time_input_format) -### input_format_try_infer_dates +#### input_format_try_infer_dates If enabled, ClickHouse will try to infer type `Date` from string fields in schema inference for text formats. If all fields from a column in sample data were successfully parsed as dates, the result type will be `Date`, @@ -1383,14 +1383,14 @@ DESC format(JSONEachRow, $$ └──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -# Self describing formats {#self-describing-formats} +## Self describing formats {#self-describing-formats} Self-describing formats contain information about the structure of the data in the data itself, it can be some header with a description, a binary type tree, or some kind of table. To automatically infer a schema from files in such formats, ClickHouse reads a part of the data containing information about the types and converts it into a schema of the ClickHouse table. -## Formats with -WithNamesAndTypes suffix {#formats-with-names-and-types} +### Formats with -WithNamesAndTypes suffix {#formats-with-names-and-types} ClickHouse supports some text formats with the suffix -WithNamesAndTypes. This suffix means that the data contains two additional rows with column names and types before the actual data. While schema inference for such formats, ClickHouse reads the first two rows and extracts column names and types. @@ -1412,7 +1412,7 @@ $$) └──────┴──────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -## JSON formats with metadata {#json-with-metadata} +### JSON formats with metadata {#json-with-metadata} Some JSON input formats ([JSON](formats.md#json), [JSONCompact](formats.md#json-compact), [JSONColumnsWithMetadata](formats.md#jsoncolumnswithmetadata)) contain metadata with column names and types. In schema inference for such formats, ClickHouse reads this metadata. @@ -1465,7 +1465,7 @@ $$) └──────┴──────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -## Avro {#avro} +### Avro {#avro} In Avro format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches: @@ -1485,7 +1485,7 @@ In Avro format ClickHouse reads its schema from the data and converts it to Clic Other Avro types are not supported. -## Parquet {#parquet} +### Parquet {#parquet} In Parquet format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches: @@ -1513,7 +1513,7 @@ In Parquet format ClickHouse reads its schema from the data and converts it to C Other Parquet types are not supported. By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`. -## Arrow {#arrow} +### Arrow {#arrow} In Arrow format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches: @@ -1541,7 +1541,7 @@ In Arrow format ClickHouse reads its schema from the data and converts it to Cli Other Arrow types are not supported. By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`. -## ORC {#orc} +### ORC {#orc} In ORC format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches: @@ -1564,17 +1564,17 @@ In ORC format ClickHouse reads its schema from the data and converts it to Click Other ORC types are not supported. By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`. -## Native {#native} +### Native {#native} Native format is used inside ClickHouse and contains the schema in the data. In schema inference, ClickHouse reads the schema from the data without any transformations. -# Formats with external schema {#formats-with-external-schema} +## Formats with external schema {#formats-with-external-schema} Such formats require a schema describing the data in a separate file in a specific schema language. To automatically infer a schema from files in such formats, ClickHouse reads external schema from a separate file and transforms it to a ClickHouse table schema. -# Protobuf {#protobuf} +### Protobuf {#protobuf} In schema inference for Protobuf format ClickHouse uses the following type matches: @@ -1592,7 +1592,7 @@ In schema inference for Protobuf format ClickHouse uses the following type match | `repeated T` | [Array(T)](../sql-reference/data-types/array.md) | | `message`, `group` | [Tuple](../sql-reference/data-types/tuple.md) | -# CapnProto {#capnproto} +### CapnProto {#capnproto} In schema inference for CapnProto format ClickHouse uses the following type matches: @@ -1615,13 +1615,13 @@ In schema inference for CapnProto format ClickHouse uses the following type matc | `struct` | [Tuple](../sql-reference/data-types/tuple.md) | | `union(T, Void)`, `union(Void, T)` | [Nullable(T)](../sql-reference/data-types/nullable.md) | -# Strong-typed binary formats {#strong-typed-binary-formats} +## Strong-typed binary formats {#strong-typed-binary-formats} In such formats, each serialized value contains information about its type (and possibly about its name), but there is no information about the whole table. In schema inference for such formats, ClickHouse reads data row by row (up to `input_format_max_rows_to_read_for_schema_inference` rows) and extracts the type (and possibly name) for each value from the data and then converts these types to ClickHouse types. -## MsgPack {msgpack} +### MsgPack {#msgpack} In MsgPack format there is no delimiter between rows, to use schema inference for this format you should specify the number of columns in the table using the setting `input_format_msgpack_number_of_columns`. ClickHouse uses the following type matches: @@ -1641,7 +1641,7 @@ using the setting `input_format_msgpack_number_of_columns`. ClickHouse uses the By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`. -## BSONEachRow {#bsoneachrow} +### BSONEachRow {#bsoneachrow} In BSONEachRow each row of data is presented as a BSON document. In schema inference ClickHouse reads BSON documents one by one and extracts values, names, and types from the data and then transforms these types to ClickHouse types using the following type matches: @@ -1661,11 +1661,11 @@ values, names, and types from the data and then transforms these types to ClickH By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`. -# Formats with constant schema {#formats-with-constant-schema} +## Formats with constant schema {#formats-with-constant-schema} Data in such formats always have the same schema. -## LineAsString {#line-as-string} +### LineAsString {#line-as-string} In this format, ClickHouse reads the whole line from the data into a single column with `String` data type. The inferred type for this format is always `String` and the column name is `line`. @@ -1680,7 +1680,7 @@ DESC format(LineAsString, 'Hello\nworld!') └──────┴────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -## JSONAsString {#json-as-string} +### JSONAsString {#json-as-string} In this format, ClickHouse reads the whole JSON object from the data into a single column with `String` data type. The inferred type for this format is always `String` and the column name is `json`. @@ -1695,7 +1695,7 @@ DESC format(JSONAsString, '{"x" : 42, "y" : "Hello, World!"}') └──────┴────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -## JSONAsObject {#json-as-object} +### JSONAsObject {#json-as-object} In this format, ClickHouse reads the whole JSON object from the data into a single column with `Object('json')` data type. Inferred type for this format is always `String` and the column name is `json`. diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 3fe815bc79a..99daddeeb99 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -1318,12 +1318,12 @@ Settings: ``` xml - /metrics - 8001 - true - true - true - + /metrics + 9363 + true + true + true + ``` ## query_log {#server_configuration_parameters-query-log} diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 3580d83f704..172627c7c3e 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -1014,6 +1014,12 @@ Use Arrow FIXED_SIZE_BINARY type instead of Binary/String for FixedString column Enabled by default. +### output_format_arrow_compression_method {#output_format_arrow_compression_method} + +Compression method used in output Arrow format. Supported codecs: `lz4_frame`, `zstd`, `none` (uncompressed) + +Default value: `none`. + ## ORC format settings {#orc-format-settings} ### input_format_orc_import_nested {#input_format_orc_import_nested} @@ -1057,6 +1063,12 @@ Use ORC String type instead of Binary for String columns. Disabled by default. +### output_format_orc_compression_method {#output_format_orc_compression_method} + +Compression method used in output ORC format. Supported codecs: `lz4`, `snappy`, `zlib`, `zstd`, `none` (uncompressed) + +Default value: `none`. + ## Parquet format settings {#parquet-format-settings} ### input_format_parquet_import_nested {#input_format_parquet_import_nested} @@ -1112,6 +1124,12 @@ The version of Parquet format used in output format. Supported versions: `1.0`, Default value: `2.latest`. +### output_format_parquet_compression_method {#output_format_parquet_compression_method} + +Compression method used in output Parquet format. Supported codecs: `snappy`, `lz4`, `brotli`, `zstd`, `gzip`, `none` (uncompressed) + +Default value: `snappy`. + ## Hive format settings {#hive-format-settings} ### input_format_hive_text_fields_delimiter {#input_format_hive_text_fields_delimiter} @@ -1474,7 +1492,7 @@ Default value: `65505`. The name of table that will be used in the output INSERT statement. -Default value: `'table''`. +Default value: `table`. ### output_format_sql_insert_include_column_names {#output_format_sql_insert_include_column_names} @@ -1514,4 +1532,12 @@ Disabled by default. The maximum allowed size for String in RowBinary format. It prevents allocating large amount of memory in case of corrupted data. 0 means there is no limit. -Default value: `1GiB` +Default value: `1GiB`. + +## Native format settings {#native-format-settings} + +### input_format_native_allow_types_conversion {#input_format_native_allow_types_conversion} + +Allow types conversion in Native input format between columns from input data and requested columns. + +Enabled by default. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 94dcf159ca9..37c6841225b 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1548,7 +1548,7 @@ Enables or disables asynchronous inserts. This makes sense only for insertion ov If enabled, the data is combined into batches before the insertion into tables, so it is possible to do small and frequent insertions into ClickHouse (up to 15000 queries per second) without buffer tables. -The data is inserted either after the [async_insert_max_data_size](#async-insert-max-data-size) is exceeded or after [async_insert_busy_timeout_ms](#async-insert-busy-timeout-ms) milliseconds since the first `INSERT` query. If the [async_insert_stale_timeout_ms](#async-insert-stale-timeout-ms) is set to a non-zero value, the data is inserted after `async_insert_stale_timeout_ms` milliseconds since the last query. +The data is inserted either after the [async_insert_max_data_size](#async-insert-max-data-size) is exceeded or after [async_insert_busy_timeout_ms](#async-insert-busy-timeout-ms) milliseconds since the first `INSERT` query. If the [async_insert_stale_timeout_ms](#async-insert-stale-timeout-ms) is set to a non-zero value, the data is inserted after `async_insert_stale_timeout_ms` milliseconds since the last query. Also the buffer will be flushed to disk if at least [async_insert_max_query_number](#async-insert-max-query-number) async insert queries per block were received. This last setting takes effect only if [async_insert_deduplicate](#async-insert-deduplicate) is enabled. If [wait_for_async_insert](#wait-for-async-insert) is enabled, every client will wait for the data to be processed and flushed to the table. Otherwise, the query would be processed almost instantly, even if the data is not inserted. diff --git a/docs/en/operations/system-tables/opentelemetry_span_log.md b/docs/en/operations/system-tables/opentelemetry_span_log.md index 9d8aea46218..7d7d1ef1b04 100644 --- a/docs/en/operations/system-tables/opentelemetry_span_log.md +++ b/docs/en/operations/system-tables/opentelemetry_span_log.md @@ -15,6 +15,13 @@ Columns: - `operation_name` ([String](../../sql-reference/data-types/string.md)) — The name of the operation. +- `kind` ([Enum8](../../sql-reference/data-types/enum.md)) — The [SpanKind](https://opentelemetry.io/docs/reference/specification/trace/api/#spankind) of the span. + - `INTERNAL` — Indicates that the span represents an internal operation within an application. + - `SERVER` — Indicates that the span covers server-side handling of a synchronous RPC or other remote request. + - `CLIENT` — Indicates that the span describes a request to some remote service. + - `PRODUCER` — Indicates that the span describes the initiators of an asynchronous request. This parent span will often end before the corresponding child CONSUMER span, possibly even before the child span starts. + - `CONSUMER` - Indicates that the span describes a child of an asynchronous PRODUCER request. + - `start_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The start time of the `trace span` (in microseconds). - `finish_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The finish time of the `trace span` (in microseconds). @@ -42,6 +49,7 @@ trace_id: cdab0847-0d62-61d5-4d38-dd65b19a1914 span_id: 701487461015578150 parent_span_id: 2991972114672045096 operation_name: DB::Block DB::InterpreterSelectQuery::getSampleBlockImpl() +kind: INTERNAL start_time_us: 1612374594529090 finish_time_us: 1612374594529108 finish_date: 2021-02-03 diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md index a4fa5579638..79b8bc90634 100644 --- a/docs/en/operations/utilities/clickhouse-local.md +++ b/docs/en/operations/utilities/clickhouse-local.md @@ -14,10 +14,6 @@ Accepts data that represent tables and queries them using [ClickHouse SQL dialec By default `clickhouse-local` does not have access to data on the same host, but it supports loading server configuration using `--config-file` argument. -:::warning -It is not recommended to load production server configuration into `clickhouse-local` because data can be damaged in case of human error. -::: - For temporary data, a unique temporary data directory is created by default. ## Usage {#usage} diff --git a/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md b/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md index 2587bc5533f..5546ade1758 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md +++ b/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md @@ -11,15 +11,15 @@ sidebar_title: exponentialMovingAverage **Syntax** ```sql -exponentialMovingAverage(x)(value, timestamp) +exponentialMovingAverage(x)(value, timeunit) ``` -Each `value` corresponds to the determinate `timestamp`. The half-life `x` is the time lag at which the exponential weights decay by one-half. The function returns a weighted average: the older the time point, the less weight the corresponding value is considered to be. +Each `value` corresponds to the determinate `timeunit`. The half-life `x` is the time lag at which the exponential weights decay by one-half. The function returns a weighted average: the older the time point, the less weight the corresponding value is considered to be. **Arguments** - `value` — Value. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). -- `timestamp` — Timestamp. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `timeunit` — Timeunit. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). Timeunit is not timestamp (seconds), it's -- an index of the time interval. Can be calculated using [intDiv](../../functions/arithmetic-functions/#intdiva-b). **Parameters** @@ -148,3 +148,58 @@ Result: │ 1 │ 49 │ 0.825 │ █████████████████████████████████████████▎│ └───────┴──────┴──────────────────────┴────────────────────────────────────────────┘ ``` + +```sql +CREATE TABLE data +ENGINE = Memory AS +SELECT + 10 AS value, + toDateTime('2020-01-01') + (3600 * number) AS time +FROM numbers_mt(10); + + +-- Calculate timeunit using intDiv +SELECT + value, + time, + exponentialMovingAverage(1)(value, intDiv(toUInt32(time), 3600)) OVER (ORDER BY time ASC) AS res, + intDiv(toUInt32(time), 3600) AS timeunit +FROM data +ORDER BY time ASC; + +┌─value─┬────────────────time─┬─────────res─┬─timeunit─┐ +│ 10 │ 2020-01-01 00:00:00 │ 5 │ 438288 │ +│ 10 │ 2020-01-01 01:00:00 │ 7.5 │ 438289 │ +│ 10 │ 2020-01-01 02:00:00 │ 8.75 │ 438290 │ +│ 10 │ 2020-01-01 03:00:00 │ 9.375 │ 438291 │ +│ 10 │ 2020-01-01 04:00:00 │ 9.6875 │ 438292 │ +│ 10 │ 2020-01-01 05:00:00 │ 9.84375 │ 438293 │ +│ 10 │ 2020-01-01 06:00:00 │ 9.921875 │ 438294 │ +│ 10 │ 2020-01-01 07:00:00 │ 9.9609375 │ 438295 │ +│ 10 │ 2020-01-01 08:00:00 │ 9.98046875 │ 438296 │ +│ 10 │ 2020-01-01 09:00:00 │ 9.990234375 │ 438297 │ +└───────┴─────────────────────┴─────────────┴──────────┘ + + +-- Calculate timeunit using toRelativeHourNum +SELECT + value, + time, + exponentialMovingAverage(1)(value, toRelativeHourNum(time)) OVER (ORDER BY time ASC) AS res, + toRelativeHourNum(time) AS timeunit +FROM data +ORDER BY time ASC; + +┌─value─┬────────────────time─┬─────────res─┬─timeunit─┐ +│ 10 │ 2020-01-01 00:00:00 │ 5 │ 438288 │ +│ 10 │ 2020-01-01 01:00:00 │ 7.5 │ 438289 │ +│ 10 │ 2020-01-01 02:00:00 │ 8.75 │ 438290 │ +│ 10 │ 2020-01-01 03:00:00 │ 9.375 │ 438291 │ +│ 10 │ 2020-01-01 04:00:00 │ 9.6875 │ 438292 │ +│ 10 │ 2020-01-01 05:00:00 │ 9.84375 │ 438293 │ +│ 10 │ 2020-01-01 06:00:00 │ 9.921875 │ 438294 │ +│ 10 │ 2020-01-01 07:00:00 │ 9.9609375 │ 438295 │ +│ 10 │ 2020-01-01 08:00:00 │ 9.98046875 │ 438296 │ +│ 10 │ 2020-01-01 09:00:00 │ 9.990234375 │ 438297 │ +└───────┴─────────────────────┴─────────────┴──────────┘ +``` diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index 50e74920e4b..ed35df9b97a 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -112,23 +112,21 @@ See also [data_type_default_nullable](../../../operations/settings/settings.md#d ## Default Values {#default_values} -The column description can specify an expression for a default value, in one of the following ways: `DEFAULT expr`, `MATERIALIZED expr`, `ALIAS expr`. +The column description can specify a default value expression in the form of `DEFAULT expr`, `MATERIALIZED expr`, or `ALIAS expr`. Example: `URLDomain String DEFAULT domain(URL)`. -Example: `URLDomain String DEFAULT domain(URL)`. +The expression `expr` is optional. If it is omitted, the column type must be specified explicitly and the default value will be `0` for numeric columns, `''` (the empty string) for string columns, `[]` (the empty array) for array columns, `1970-01-01` for date columns, or `NULL` for nullable columns. -If an expression for the default value is not defined, the default values will be set to zeros for numbers, empty strings for strings, empty arrays for arrays, and `1970-01-01` for dates or zero unix timestamp for DateTime, NULL for Nullable. +The column type of a default value column can be omitted in which case it is infered from `expr`'s type. For example the type of column `EventDate DEFAULT toDate(EventTime)` will be date. -If the default expression is defined, the column type is optional. If there isn’t an explicitly defined type, the default expression type is used. Example: `EventDate DEFAULT toDate(EventTime)` – the ‘Date’ type will be used for the ‘EventDate’ column. +If both a data type and a default value expression are specified, an implicit type casting function inserted which converts the expression to the specified type. Example: `Hits UInt32 DEFAULT 0` is internally represented as `Hits UInt32 DEFAULT toUInt32(0)`. -If the data type and default expression are defined explicitly, this expression will be cast to the specified type using type casting functions. Example: `Hits UInt32 DEFAULT 0` means the same thing as `Hits UInt32 DEFAULT toUInt32(0)`. - -Default expressions may be defined as an arbitrary expression from table constants and columns. When creating and changing the table structure, it checks that expressions do not contain loops. For INSERT, it checks that expressions are resolvable – that all columns they can be calculated from have been passed. +A default value expression `expr` may reference arbitrary table columns and constants. ClickHouse checks that changes of the table structure do not introduce loops in the expression calculation. For INSERT, it checks that expressions are resolvable – that all columns they can be calculated from have been passed. ### DEFAULT `DEFAULT expr` -Normal default value. If the INSERT query does not specify the corresponding column, it will be filled in by computing the corresponding expression. +Normal default value. If the value of such a column is not specified in an INSERT query, it is computed from `expr`. Example: @@ -154,9 +152,9 @@ SELECT * FROM test; `MATERIALIZED expr` -Materialized expression. Such a column can’t be specified for INSERT, because it is always calculated. -For an INSERT without a list of columns, these columns are not considered. -In addition, this column is not substituted when using an asterisk in a SELECT query. This is to preserve the invariant that the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns. +Materialized expression. Values of such columns are always calculated, they cannot be specified in INSERT queries. + +Also, default value columns of this type are not included in the result of `SELECT *`. This is to preserve the invariant that the result of a `SELECT *` can always be inserted back into the table using `INSERT`. This behavior can be disabled with setting `asterisk_include_materialized_columns`. Example: @@ -192,8 +190,9 @@ SELECT * FROM test SETTINGS asterisk_include_materialized_columns=1; `EPHEMERAL [expr]` -Ephemeral column. Such a column isn't stored in the table and cannot be SELECTed, but can be referenced in the defaults of CREATE statement. If `expr` is omitted type for column is required. -INSERT without list of columns will skip such column, so SELECT/INSERT invariant is preserved - the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns. +Ephemeral column. Columns of this type are not stored in the table and it is not possible to SELECT from them. The only purpose of ephemeral columns is to build default value expressions of other columns from them. + +An insert without explicitly specified columns will skip columns of this type. This is to preserve the invariant that the result of a `SELECT *` can always be inserted back into the table using `INSERT`. Example: @@ -205,7 +204,7 @@ CREATE OR REPLACE TABLE test hexed FixedString(4) DEFAULT unhex(unhexed) ) ENGINE = MergeTree -ORDER BY id +ORDER BY id; INSERT INTO test (id, unhexed) Values (1, '5a90b714'); @@ -227,9 +226,9 @@ hex(hexed): 5A90B714 `ALIAS expr` -Synonym. Such a column isn’t stored in the table at all. -Its values can’t be inserted in a table, and it is not substituted when using an asterisk in a SELECT query. -It can be used in SELECTs if the alias is expanded during query parsing. +Calculated columns (synonym). Column of this type are not stored in the table and it is not possible to INSERT values into them. + +When SELECT queries explicitly reference columns of this type, the value is computed at query time from `expr`. By default, `SELECT *` excludes ALIAS columns. This behavior can be disabled with setting `asteriks_include_alias_columns`. When using the ALTER query to add new columns, old data for these columns is not written. Instead, when reading old data that does not have values for the new columns, expressions are computed on the fly by default. However, if running the expressions requires different columns that are not indicated in the query, these columns will additionally be read, but only for the blocks of data that need it. diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index acdede3c673..0def42259ab 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -70,6 +70,12 @@ A materialized view is implemented as follows: when inserting data to the table Materialized views in ClickHouse use **column names** instead of column order during insertion into destination table. If some column names are not present in the `SELECT` query result, ClickHouse uses a default value, even if the column is not [Nullable](../../data-types/nullable.md). A safe practice would be to add aliases for every column when using Materialized views. Materialized views in ClickHouse are implemented more like insert triggers. If there’s some aggregation in the view query, it’s applied only to the batch of freshly inserted data. Any changes to existing data of source table (like update, delete, drop partition, etc.) does not change the materialized view. + +Materialized views in ClickHouse do not have deterministic behaviour in case of errors. This means that blocks that had been already written will be preserved in the destination table, but all blocks after error will not. + +By default if pushing to one of views fails, then the INSERT query will fail too, and some blocks may not be written to the destination table. This can be changed using `materialized_views_ignore_errors` setting (you should set it for `INSERT` query), if you will set `materialized_views_ignore_errors=true`, then any errors while pushing to views will be ignored and all blocks will be written to the destination table. + +Also note, that `materialized_views_ignore_errors` set to `true` by default for `system.*_log` tables. ::: If you specify `POPULATE`, the existing table data is inserted into the view when creating it, as if making a `CREATE TABLE ... AS SELECT ...` . Otherwise, the query contains only the data inserted in the table after creating the view. We **do not recommend** using `POPULATE`, since data inserted in the table during the view creation will not be inserted in it. diff --git a/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md b/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md index dac490468d0..e3b4238a200 100644 --- a/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md @@ -89,7 +89,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] └─────────────────────┴───────────┴──────────┴──────┘ ``` -Первая строка отменяет предыдущее состояние объекта (пользователя). Она должен повторять все поля из ключа сортировки для отменённого состояния за исключением `Sign`. +Первая строка отменяет предыдущее состояние объекта (пользователя). Она должна повторять все поля из ключа сортировки для отменённого состояния за исключением `Sign`. Вторая строка содержит текущее состояние. diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md index 7269cc023e4..24e0f8dbbb8 100644 --- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md @@ -584,7 +584,7 @@ TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y); Данные с истекшим `TTL` удаляются, когда ClickHouse мёржит куски данных. -Когда ClickHouse видит, что некоторые данные устарели, он выполняет внеплановые мёржи. Для управление частотой подобных мёржей, можно задать настройку `merge_with_ttl_timeout`. Если её значение слишком низкое, придется выполнять много внеплановых мёржей, которые могут начать потреблять значительную долю ресурсов сервера. +Когда ClickHouse видит, что некоторые данные устарели, он выполняет внеплановые мёржи. Для управления частотой подобных мёржей, можно задать настройку `merge_with_ttl_timeout`. Если её значение слишком низкое, придется выполнять много внеплановых мёржей, которые могут начать потреблять значительную долю ресурсов сервера. Если вы выполните запрос `SELECT` между слияниями вы можете получить устаревшие данные. Чтобы избежать этого используйте запрос [OPTIMIZE](../../../engines/table-engines/mergetree-family/mergetree.md#misc_operations-optimize) перед `SELECT`. @@ -679,7 +679,7 @@ TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y); - `policy_name_N` — название политики. Названия политик должны быть уникальны. - `volume_name_N` — название тома. Названия томов должны быть уникальны. - `disk` — диск, находящийся внутри тома. -- `max_data_part_size_bytes` — максимальный размер куска данных, который может находится на любом из дисков этого тома. Если в результате слияния размер куска ожидается больше, чем max_data_part_size_bytes, то этот кусок будет записан в следующий том. В основном эта функция позволяет хранить новые / мелкие куски на горячем (SSD) томе и перемещать их на холодный (HDD) том, когда они достигают большого размера. Не используйте этот параметр, если политика имеет только один том. +- `max_data_part_size_bytes` — максимальный размер куска данных, который может находиться на любом из дисков этого тома. Если в результате слияния размер куска ожидается больше, чем max_data_part_size_bytes, то этот кусок будет записан в следующий том. В основном эта функция позволяет хранить новые / мелкие куски на горячем (SSD) томе и перемещать их на холодный (HDD) том, когда они достигают большого размера. Не используйте этот параметр, если политика имеет только один том. - `move_factor` — доля доступного свободного места на томе, если места становится меньше, то данные начнут перемещение на следующий том, если он есть (по умолчанию 0.1). Для перемещения куски сортируются по размеру от большего к меньшему (по убыванию) и выбираются куски, совокупный размер которых достаточен для соблюдения условия `move_factor`, если совокупный размер всех партов недостаточен, будут перемещены все парты. - `prefer_not_to_merge` — Отключает слияние кусков данных, хранящихся на данном томе. Если данная настройка включена, то слияние данных, хранящихся на данном томе, не допускается. Это позволяет контролировать работу ClickHouse с медленными дисками. @@ -730,7 +730,7 @@ TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y); В приведенном примере, политика `hdd_in_order` реализует прицип [round-robin](https://ru.wikipedia.org/wiki/Round-robin_(%D0%B0%D0%BB%D0%B3%D0%BE%D1%80%D0%B8%D1%82%D0%BC)). Так как в политике есть всего один том (`single`), то все записи производятся на его диски по круговому циклу. Такая политика может быть полезна при наличии в системе нескольких похожих дисков, но при этом не сконфигурирован RAID. Учтите, что каждый отдельный диск ненадёжен и чтобы не потерять важные данные это необходимо скомпенсировать за счет хранения данных в трёх копиях. -Если система содержит диски различных типов, то может пригодиться политика `moving_from_ssd_to_hdd`. В томе `hot` находится один SSD-диск (`fast_ssd`), а также задается ограничение на максимальный размер куска, который может храниться на этом томе (1GB). Все куски такой таблицы больше 1GB будут записываться сразу на том `cold`, в котором содержится один HDD-диск `disk1`. Также, при заполнении диска `fast_ssd` более чем на 80% данные будут переносится на диск `disk1` фоновым процессом. +Если система содержит диски различных типов, то может пригодиться политика `moving_from_ssd_to_hdd`. В томе `hot` находится один SSD-диск (`fast_ssd`), а также задается ограничение на максимальный размер куска, который может храниться на этом томе (1GB). Все куски такой таблицы больше 1GB будут записываться сразу на том `cold`, в котором содержится один HDD-диск `disk1`. Также при заполнении диска `fast_ssd` более чем на 80% данные будут переноситься на диск `disk1` фоновым процессом. Порядок томов в политиках хранения важен, при достижении условий на переполнение тома данные переносятся на следующий. Порядок дисков в томах так же важен, данные пишутся по очереди на каждый из них. diff --git a/docs/ru/interfaces/third-party/client-libraries.md b/docs/ru/interfaces/third-party/client-libraries.md index f55bbe2a47d..a4659e9ac4e 100644 --- a/docs/ru/interfaces/third-party/client-libraries.md +++ b/docs/ru/interfaces/third-party/client-libraries.md @@ -8,6 +8,7 @@ sidebar_label: "Клиентские библиотеки от сторонни :::danger "Disclaimer" Яндекс не поддерживает перечисленные ниже библиотеки и не проводит тщательного тестирования для проверки их качества. +::: - Python: - [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm) diff --git a/docs/ru/interfaces/third-party/gui.md b/docs/ru/interfaces/third-party/gui.md index c34b799347b..e7190362dc4 100644 --- a/docs/ru/interfaces/third-party/gui.md +++ b/docs/ru/interfaces/third-party/gui.md @@ -177,19 +177,20 @@ sidebar_label: "Визуальные интерфейсы от сторонни ### Yandex DataLens {#yandex-datalens} -[Yandex DataLens](https://cloud.yandex.ru/services/datalens) — cервис визуализации и анализа данных. +[Yandex DataLens](https://datalens.yandex.ru) — cервис визуализации и анализа данных. Основные возможности: - Широкий выбор инструментов визуализации, от простых столбчатых диаграмм до сложных дашбордов. - Возможность опубликовать дашборды на широкую аудиторию. - Поддержка множества источников данных, включая ClickHouse. -- Хранение материализованных данных в кластере ClickHouse DataLens. -Для небольших проектов DataLens [доступен бесплатно](https://cloud.yandex.ru/docs/datalens/pricing), в том числе и для коммерческого использования. +DataLens [доступен бесплатно](https://cloud.yandex.ru/docs/datalens/pricing), в том числе и для коммерческого использования. +- [Знакомство с DataLens]((https://youtu.be/57ngi_6BINE). +- [Чат сообщества DataLens](https://t.me/YandexDataLens) - [Документация DataLens](https://cloud.yandex.ru/docs/datalens/). -- [Пособие по визуализации данных из ClickHouse](https://cloud.yandex.ru/docs/solutions/datalens/data-from-ch-visualization). +- [Сценарий по визуализации данных из ClickHouse](https://cloud.yandex.ru/docs/solutions/datalens/data-from-ch-visualization). ### Holistics Software {#holistics-software} diff --git a/docs/ru/operations/clickhouse-keeper.md b/docs/ru/operations/clickhouse-keeper.md index 67be83e13b2..3a931529b32 100644 --- a/docs/ru/operations/clickhouse-keeper.md +++ b/docs/ru/operations/clickhouse-keeper.md @@ -325,21 +325,21 @@ clickhouse-keeper-converter --zookeeper-logs-dir /var/lib/zookeeper/version-2 -- Например, для кластера из 3 нод, алгоритм кворума продолжает работать при отказе не более чем одной ноды. Конфигурация кластера может быть изменена динамически с некоторыми ограничениями. -Переконфигурация также использует Raft, поэтому для добавление новой ноды кластера или исключения старой ноды из него требуется достижения кворума в рамках текущей конфигурации кластера. +Переконфигурация также использует Raft, поэтому для добавления новой ноды кластера или исключения старой ноды требуется достижение кворума в рамках текущей конфигурации кластера. Если в вашем кластере произошел отказ большего числа нод, чем допускает Raft для вашей текущей конфигурации и у вас нет возможности восстановить их работоспособность, Raft перестанет работать и не позволит изменить конфигурацию стандартным механизмом. -Тем не менее ClickHousr Keeper имеет возможность запуститься в режиме восстановления, который позволяет переконфигурировать класте используя только одну ноду кластера. +Тем не менее ClickHouse Keeper имеет возможность запуститься в режиме восстановления, который позволяет переконфигурировать кластер используя только одну ноду кластера. Этот механизм может использоваться только как крайняя мера, когда вы не можете восстановить существующие ноды кластера или запустить новый сервер с тем же идентификатором. Важно: - Удостоверьтесь, что отказавшие ноды не смогут в дальнейшем подключиться к кластеру в будущем. -- Не запускайте новые ноды, пока не завешите процедуру ниже. +- Не запускайте новые ноды, пока не завершите процедуру ниже. После того, как выполнили действия выше выполните следующие шаги. -1. Выберете одну ноду Keeper, которая станет новым лидером. Учтите, что данные которые с этой ноды будут испольщзованы всем кластером, поэтому рекомендуется выбрать ноду с наиболее актуальным состоянием. +1. Выберете одну ноду Keeper, которая станет новым лидером. Учтите, что данные с этой ноды будут использованы всем кластером, поэтому рекомендуется выбрать ноду с наиболее актуальным состоянием. 2. Перед дальнейшими действиям сделайте резервную копию данных из директорий `log_storage_path` и `snapshot_storage_path`. 3. Измените настройки на всех нодах кластера, которые вы собираетесь использовать. -4. Отправьте команду `rcvr` на ноду, которую вы выбрали или остановите ее и запустите заново с аргументом `--force-recovery`. Это переведет ноду в режим восстановления. +4. Отправьте команду `rcvr` на ноду, которую вы выбрали, или остановите ее и запустите заново с аргументом `--force-recovery`. Это переведет ноду в режим восстановления. 5. Запускайте остальные ноды кластера по одной и проверяйте, что команда `mntr` возвращает `follower` в выводе состояния `zk_server_state` перед тем, как запустить следующую ноду. -6. Пока нода работает в режиме восстановления, лидер будет возвращать ошибку на запрос `mntr` пока кворум не будет достигнут с помощью новых нод. Любые запросы от клиентов и постедователей будут возвращать ошибку. +6. Пока нода работает в режиме восстановления, лидер будет возвращать ошибку на запрос `mntr` пока кворум не будет достигнут с помощью новых нод. Любые запросы от клиентов и последователей будут возвращать ошибку. 7. После достижения кворума лидер перейдет в нормальный режим работы и станет обрабатывать все запросы через Raft. Удостоверьтесь, что запрос `mntr` возвращает `leader` в выводе состояния `zk_server_state`. diff --git a/docs/ru/operations/opentelemetry.md b/docs/ru/operations/opentelemetry.md index b6c5e89bcc6..4e127e9e0f0 100644 --- a/docs/ru/operations/opentelemetry.md +++ b/docs/ru/operations/opentelemetry.md @@ -10,6 +10,7 @@ ClickHouse поддерживает [OpenTelemetry](https://opentelemetry.io/) :::danger "Предупреждение" Поддержка стандарта экспериментальная и будет со временем меняться. +::: ## Обеспечение поддержки контекста трассировки в ClickHouse diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index bffa3c39a60..e29b9def9d4 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -26,6 +26,7 @@ ClickHouse перезагружает встроенные словари с з :::danger "Внимание" Лучше не использовать, если вы только начали работать с ClickHouse. +::: Общий вид конфигурации: @@ -1064,6 +1065,7 @@ ClickHouse использует потоки из глобального пул :::danger "Обратите внимание" Завершающий слеш обязателен. +::: **Пример** @@ -1330,6 +1332,7 @@ TCP порт для защищённого обмена данными с кли :::danger "Обратите внимание" Завершающий слеш обязателен. +::: **Пример** diff --git a/docs/ru/operations/storing-data.md b/docs/ru/operations/storing-data.md index 2f5c9c95ea4..56081c82bc9 100644 --- a/docs/ru/operations/storing-data.md +++ b/docs/ru/operations/storing-data.md @@ -82,7 +82,7 @@ sidebar_label: "Хранение данных на внешних дисках" - `type` — `encrypted`. Иначе зашифрованный диск создан не будет. - `disk` — тип диска для хранения данных. -- `key` — ключ для шифрования и расшифровки. Тип: [Uint64](../sql-reference/data-types/int-uint.md). Вы можете использовать параметр `key_hex` для шифрования в шестнадцатеричной форме. +- `key` — ключ для шифрования и расшифровки. Тип: [UInt64](../sql-reference/data-types/int-uint.md). Вы можете использовать параметр `key_hex` для шифрования в шестнадцатеричной форме. Вы можете указать несколько ключей, используя атрибут `id` (смотрите пример выше). Необязательные параметры: diff --git a/docs/ru/sql-reference/data-types/aggregatefunction.md b/docs/ru/sql-reference/data-types/aggregatefunction.md index 21b452acb1d..e42b467e4af 100644 --- a/docs/ru/sql-reference/data-types/aggregatefunction.md +++ b/docs/ru/sql-reference/data-types/aggregatefunction.md @@ -6,7 +6,7 @@ sidebar_label: AggregateFunction # AggregateFunction {#data-type-aggregatefunction} -Агрегатные функции могут обладать определяемым реализацией промежуточным состоянием, которое может быть сериализовано в тип данных, соответствующий AggregateFunction(…), и быть записано в таблицу обычно посредством [материализованного представления] (../../sql-reference/statements/create/view.md). Чтобы получить промежуточное состояние, обычно используются агрегатные функции с суффиксом `-State`. Чтобы в дальнейшем получить агрегированные данные необходимо использовать те же агрегатные функции с суффиксом `-Merge`. +Агрегатные функции могут обладать определяемым реализацией промежуточным состоянием, которое может быть сериализовано в тип данных, соответствующий AggregateFunction(…), и быть записано в таблицу обычно посредством [материализованного представления](../../sql-reference/statements/create/view.md). Чтобы получить промежуточное состояние, обычно используются агрегатные функции с суффиксом `-State`. Чтобы в дальнейшем получить агрегированные данные необходимо использовать те же агрегатные функции с суффиксом `-Merge`. `AggregateFunction(name, types_of_arguments…)` — параметрический тип данных. diff --git a/docs/ru/sql-reference/data-types/geo.md b/docs/ru/sql-reference/data-types/geo.md index a7c5f79b0be..24d981195f5 100644 --- a/docs/ru/sql-reference/data-types/geo.md +++ b/docs/ru/sql-reference/data-types/geo.md @@ -10,6 +10,7 @@ ClickHouse поддерживает типы данных для отображ :::danger "Предупреждение" Сейчас использование типов данных для работы с географическими структурами является экспериментальной возможностью. Чтобы использовать эти типы данных, включите настройку `allow_experimental_geo_types = 1`. +::: **См. также** - [Хранение географических структур данных](https://ru.wikipedia.org/wiki/GeoJSON). diff --git a/docs/ru/sql-reference/data-types/special-data-types/interval.md b/docs/ru/sql-reference/data-types/special-data-types/interval.md index 856275ed8f2..109ceee7852 100644 --- a/docs/ru/sql-reference/data-types/special-data-types/interval.md +++ b/docs/ru/sql-reference/data-types/special-data-types/interval.md @@ -10,6 +10,7 @@ sidebar_label: Interval :::danger "Внимание" Нельзя использовать типы данных `Interval` для хранения данных в таблице. +::: Структура: diff --git a/docs/ru/sql-reference/data-types/tuple.md b/docs/ru/sql-reference/data-types/tuple.md index 76370d01c0d..8953134d154 100644 --- a/docs/ru/sql-reference/data-types/tuple.md +++ b/docs/ru/sql-reference/data-types/tuple.md @@ -34,7 +34,7 @@ SELECT tuple(1,'a') AS x, toTypeName(x) ## Особенности работы с типами данных {#osobennosti-raboty-s-tipami-dannykh} -При создании кортежа «на лету» ClickHouse автоматически определяет тип каждого аргументов как минимальный из типов, который может сохранить значение аргумента. Если аргумент — [NULL](../../sql-reference/data-types/tuple.md#null-literal), то тип элемента кортежа — [Nullable](nullable.md). +При создании кортежа «на лету» ClickHouse автоматически определяет тип всех аргументов как минимальный из типов, который может сохранить значение аргумента. Если аргумент — [NULL](../../sql-reference/data-types/tuple.md#null-literal), то тип элемента кортежа — [Nullable](nullable.md). Пример автоматического определения типа данных: diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md index 64637edc4a4..24f29d3bf53 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md @@ -61,7 +61,7 @@ LAYOUT(POLYGON(STORE_POLYGON_KEY_COLUMN 1)) - Мультиполигон. Представляет из себя массив полигонов. Каждый полигон задается двумерным массивом точек — первый элемент этого массива задает внешнюю границу полигона, последующие элементы могут задавать дырки, вырезаемые из него. -Точки могут задаваться массивом или кортежем из своих координат. В текущей реализации поддерживается только двумерные точки. +Точки могут задаваться массивом или кортежем из своих координат. В текущей реализации поддерживаются только двумерные точки. Пользователь может [загружать свои собственные данные](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) во всех поддерживаемых ClickHouse форматах. @@ -80,7 +80,7 @@ LAYOUT(POLYGON(STORE_POLYGON_KEY_COLUMN 1)) - `POLYGON`. Синоним к `POLYGON_INDEX_CELL`. Запросы к словарю осуществляются с помощью стандартных [функций](../../../sql-reference/functions/ext-dict-functions.md) для работы со внешними словарями. -Важным отличием является то, что здесь ключами будут являются точки, для которых хочется найти содержащий их полигон. +Важным отличием является то, что здесь ключами являются точки, для которых хочется найти содержащий их полигон. **Пример** diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md index 8c01b8295bf..a711287ae8e 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md @@ -59,6 +59,7 @@ ClickHouse поддерживает следующие виды ключей: :::danger "Обратите внимание" Ключ не надо дополнительно описывать в атрибутах. +::: ### Числовой ключ {#ext_dict-numeric-key} diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts.md index 314fefab5eb..a262a354889 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts.md @@ -14,7 +14,7 @@ ClickHouse: - Периодически обновляет их и динамически подгружает отсутствующие значения. - Позволяет создавать внешние словари с помощью xml-файлов или [DDL-запросов](../../statements/create/dictionary.md#create-dictionary-query). -Конфигурация внешних словарей может находится в одном или нескольких xml-файлах. Путь к конфигурации указывается в параметре [dictionaries_config](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_config). +Конфигурация внешних словарей может находиться в одном или нескольких xml-файлах. Путь к конфигурации указывается в параметре [dictionaries_config](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_config). Словари могут загружаться при старте сервера или при первом использовании, в зависимости от настройки [dictionaries_lazy_load](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_lazy_load). diff --git a/docs/ru/sql-reference/functions/introspection.md b/docs/ru/sql-reference/functions/introspection.md index 7d04dff6b72..26497ef21d3 100644 --- a/docs/ru/sql-reference/functions/introspection.md +++ b/docs/ru/sql-reference/functions/introspection.md @@ -22,7 +22,7 @@ sidebar_label: "Функции интроспекции" ClickHouse сохраняет отчеты профилировщика в [журнал трассировки](../../operations/system-tables/trace_log.md#system_tables-trace_log) в системной таблице. Убедитесь, что таблица и профилировщик настроены правильно. -## addresssToLine {#addresstoline} +## addressToLine {#addresstoline} Преобразует адрес виртуальной памяти внутри процесса сервера ClickHouse в имя файла и номер строки в исходном коде ClickHouse. diff --git a/docs/ru/sql-reference/operators/exists.md b/docs/ru/sql-reference/operators/exists.md index 3fc085fe021..38855abbcf3 100644 --- a/docs/ru/sql-reference/operators/exists.md +++ b/docs/ru/sql-reference/operators/exists.md @@ -8,7 +8,8 @@ slug: /ru/sql-reference/operators/exists `EXISTS` может быть использован в секции [WHERE](../../sql-reference/statements/select/where.md). :::danger "Предупреждение" - Ссылки на таблицы или столбцы основного запроса не поддерживаются в подзапросе. + Ссылки на таблицы или столбцы основного запроса не поддерживаются в подзапросе. +::: **Синтаксис** diff --git a/docs/ru/sql-reference/operators/in.md b/docs/ru/sql-reference/operators/in.md index fa679b890a7..60400fb2b31 100644 --- a/docs/ru/sql-reference/operators/in.md +++ b/docs/ru/sql-reference/operators/in.md @@ -38,9 +38,9 @@ SELECT '1' IN (SELECT 1); └──────────────────────┘ ``` -Если в качестве правой части оператора указано имя таблицы (например, `UserID IN users`), то это эквивалентно подзапросу `UserID IN (SELECT * FROM users)`. Это используется при работе с внешними данными, отправляемым вместе с запросом. Например, вместе с запросом может быть отправлено множество идентификаторов посетителей, загруженное во временную таблицу users, по которому следует выполнить фильтрацию. +Если в качестве правой части оператора указано имя таблицы (например, `UserID IN users`), то это эквивалентно подзапросу `UserID IN (SELECT * FROM users)`. Это используется при работе с внешними данными, отправляемыми вместе с запросом. Например, вместе с запросом может быть отправлено множество идентификаторов посетителей, загруженное во временную таблицу users, по которому следует выполнить фильтрацию. -Если в качестве правой части оператора, указано имя таблицы, имеющий движок Set (подготовленное множество, постоянно находящееся в оперативке), то множество не будет создаваться заново при каждом запросе. +Если в качестве правой части оператора, указано имя таблицы, имеющей движок Set (подготовленное множество, постоянно находящееся в оперативке), то множество не будет создаваться заново при каждом запросе. В подзапросе может быть указано более одного столбца для фильтрации кортежей. Пример: @@ -49,9 +49,9 @@ SELECT '1' IN (SELECT 1); SELECT (CounterID, UserID) IN (SELECT CounterID, UserID FROM ...) FROM ... ``` -Типы столбцов слева и справа оператора IN, должны совпадать. +Типы столбцов слева и справа оператора IN должны совпадать. -Оператор IN и подзапрос могут встречаться в любой части запроса, в том числе в агрегатных и лямбда функциях. +Оператор IN и подзапрос могут встречаться в любой части запроса, в том числе в агрегатных и лямбда-функциях. Пример: ``` sql @@ -122,7 +122,7 @@ FROM t_null Существует два варианта IN-ов с подзапросами (аналогично для JOIN-ов): обычный `IN` / `JOIN` и `GLOBAL IN` / `GLOBAL JOIN`. Они отличаются способом выполнения при распределённой обработке запроса. -:::note "Attention" +:::note "Внимание" Помните, что алгоритмы, описанные ниже, могут работать иначе в зависимости от [настройки](../../operations/settings/settings.md) `distributed_product_mode`. ::: При использовании обычного IN-а, запрос отправляется на удалённые серверы, и на каждом из них выполняются подзапросы в секциях `IN` / `JOIN`. @@ -228,7 +228,7 @@ SELECT CounterID, count() FROM distributed_table_1 WHERE UserID IN (SELECT UserI SETTINGS max_parallel_replicas=3 ``` -преобразуются на каждом сервере в +преобразуется на каждом сервере в ```sql SELECT CounterID, count() FROM local_table_1 WHERE UserID IN (SELECT UserID FROM local_table_2 WHERE CounterID < 100) diff --git a/docs/ru/sql-reference/operators/index.md b/docs/ru/sql-reference/operators/index.md index 57c426cb5ad..b5fec3cb38c 100644 --- a/docs/ru/sql-reference/operators/index.md +++ b/docs/ru/sql-reference/operators/index.md @@ -263,6 +263,7 @@ SELECT toDateTime('2014-10-26 00:00:00', 'Europe/Moscow') AS time, time + 60 * 6 │ 2014-10-26 00:00:00 │ 2014-10-26 23:00:00 │ 2014-10-27 00:00:00 │ └─────────────────────┴─────────────────────┴─────────────────────┘ ``` +::: **Смотрите также** diff --git a/docs/ru/sql-reference/statements/alter/view.md b/docs/ru/sql-reference/statements/alter/view.md index 2d4823bba3a..e6f6730ff99 100644 --- a/docs/ru/sql-reference/statements/alter/view.md +++ b/docs/ru/sql-reference/statements/alter/view.md @@ -6,7 +6,7 @@ sidebar_label: VIEW # Выражение ALTER TABLE … MODIFY QUERY {#alter-modify-query} -Вы можеие изменить запрос `SELECT`, который был задан при создании [материализованного представления](../create/view.md#materialized), с помощью запроса 'ALTER TABLE … MODIFY QUERY'. Используйте его если при создании материализованного представления не использовалась секция `TO [db.]name`. Настройка `allow_experimental_alter_materialized_view_structure` должна быть включена. +Вы можете изменить запрос `SELECT`, который был задан при создании [материализованного представления](../create/view.md#materialized), с помощью запроса 'ALTER TABLE … MODIFY QUERY'. Используйте его если при создании материализованного представления не использовалась секция `TO [db.]name`. Настройка `allow_experimental_alter_materialized_view_structure` должна быть включена. Если при создании материализованного представления использовалась конструкция `TO [db.]name`, то для изменения отсоедините представление с помощью [DETACH](../detach.md), измените таблицу с помощью [ALTER TABLE](index.md), а затем снова присоедините запрос с помощью [ATTACH](../attach.md). diff --git a/docs/ru/sql-reference/statements/optimize.md b/docs/ru/sql-reference/statements/optimize.md index b70bba2d765..26993183232 100644 --- a/docs/ru/sql-reference/statements/optimize.md +++ b/docs/ru/sql-reference/statements/optimize.md @@ -10,6 +10,7 @@ sidebar_label: OPTIMIZE :::danger "Внимание" `OPTIMIZE` не устраняет причину появления ошибки `Too many parts`. +::: **Синтаксис** diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp index bc882719a08..d3696f2cf12 100644 --- a/programs/copier/ClusterCopier.cpp +++ b/programs/copier/ClusterCopier.cpp @@ -1867,8 +1867,8 @@ std::set ClusterCopier::getShardPartitions(const ConnectionTimeouts & ti String query; { WriteBufferFromOwnString wb; - wb << "SELECT DISTINCT " << partition_name << " AS partition FROM" - << " " << getQuotedTable(task_shard.table_read_shard) << " ORDER BY partition DESC"; + wb << "SELECT " << partition_name << " AS partition FROM " + << getQuotedTable(task_shard.table_read_shard) << " GROUP BY partition ORDER BY partition DESC"; query = wb.str(); } diff --git a/src/Analyzer/FunctionNode.cpp b/src/Analyzer/FunctionNode.cpp index 718dcf4bb58..fe170c8482e 100644 --- a/src/Analyzer/FunctionNode.cpp +++ b/src/Analyzer/FunctionNode.cpp @@ -2,18 +2,21 @@ #include #include -#include -#include #include #include +#include +#include + #include #include #include +#include +#include #include namespace DB @@ -44,17 +47,29 @@ const DataTypes & FunctionNode::getArgumentTypes() const ColumnsWithTypeAndName FunctionNode::getArgumentColumns() const { const auto & arguments = getArguments().getNodes(); + size_t arguments_size = arguments.size(); + ColumnsWithTypeAndName argument_columns; argument_columns.reserve(arguments.size()); - for (const auto & arg : arguments) + for (size_t i = 0; i < arguments_size; ++i) { - ColumnWithTypeAndName argument; - argument.type = arg->getResultType(); - if (auto * constant = arg->as()) - argument.column = argument.type->createColumnConst(1, constant->getValue()); - argument_columns.push_back(std::move(argument)); + const auto & argument = arguments[i]; + + ColumnWithTypeAndName argument_column; + + if (isNameOfInFunction(function_name) && i == 1) + argument_column.type = std::make_shared(); + else + argument_column.type = argument->getResultType(); + + auto * constant = argument->as(); + if (constant && !isNotCreatable(argument_column.type)) + argument_column.column = argument_column.type->createColumnConst(1, constant->getValue()); + + argument_columns.push_back(std::move(argument_column)); } + return argument_columns; } diff --git a/src/Analyzer/InDepthQueryTreeVisitor.h b/src/Analyzer/InDepthQueryTreeVisitor.h index af69fc55589..1cc48fb1e53 100644 --- a/src/Analyzer/InDepthQueryTreeVisitor.h +++ b/src/Analyzer/InDepthQueryTreeVisitor.h @@ -99,8 +99,9 @@ class InDepthQueryTreeVisitorWithContext public: using VisitQueryTreeNodeType = std::conditional_t; - explicit InDepthQueryTreeVisitorWithContext(ContextPtr context) + explicit InDepthQueryTreeVisitorWithContext(ContextPtr context, size_t initial_subquery_depth = 0) : current_context(std::move(context)) + , subquery_depth(initial_subquery_depth) {} /// Return true if visitor should traverse tree top to bottom, false otherwise @@ -125,11 +126,17 @@ public: return current_context->getSettingsRef(); } + size_t getSubqueryDepth() const + { + return subquery_depth; + } + void visit(VisitQueryTreeNodeType & query_tree_node) { auto current_scope_context_ptr = current_context; SCOPE_EXIT( current_context = std::move(current_scope_context_ptr); + --subquery_depth; ); if (auto * query_node = query_tree_node->template as()) @@ -137,6 +144,8 @@ public: else if (auto * union_node = query_tree_node->template as()) current_context = union_node->getContext(); + ++subquery_depth; + bool traverse_top_to_bottom = getDerived().shouldTraverseTopToBottom(); if (!traverse_top_to_bottom) visitChildren(query_tree_node); @@ -145,7 +154,12 @@ public: if (traverse_top_to_bottom) visitChildren(query_tree_node); + + getDerived().leaveImpl(query_tree_node); } + + void leaveImpl(VisitQueryTreeNodeType & node [[maybe_unused]]) + {} private: Derived & getDerived() { @@ -172,6 +186,7 @@ private: } ContextPtr current_context; + size_t subquery_depth = 0; }; template diff --git a/src/Analyzer/JoinNode.h b/src/Analyzer/JoinNode.h index 0d856985794..f58fe3f1af5 100644 --- a/src/Analyzer/JoinNode.h +++ b/src/Analyzer/JoinNode.h @@ -106,6 +106,12 @@ public: return locality; } + /// Set join locality + void setLocality(JoinLocality locality_value) + { + locality = locality_value; + } + /// Get join strictness JoinStrictness getStrictness() const { diff --git a/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp b/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp index fa5fc0e75a8..15326ca1dc8 100644 --- a/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp +++ b/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp @@ -42,7 +42,7 @@ private: return; const auto & storage = table_node ? table_node->getStorage() : table_function_node->getStorage(); - bool is_final_supported = storage && storage->supportsFinal() && !storage->isRemote(); + bool is_final_supported = storage && storage->supportsFinal(); if (!is_final_supported) return; diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizer.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp similarity index 97% rename from src/Analyzer/Passes/LogicalExpressionOptimizer.cpp rename to src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp index 73585a4cd23..3d65035f9fd 100644 --- a/src/Analyzer/Passes/LogicalExpressionOptimizer.cpp +++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp @@ -7,8 +7,6 @@ #include #include -#include - namespace DB { @@ -100,6 +98,9 @@ private: } } + if (and_operands.size() == function_node.getArguments().getNodes().size()) + return; + if (and_operands.size() == 1) { /// AND operator can have UInt8 or bool as its type. @@ -207,6 +208,9 @@ private: or_operands.push_back(std::move(in_function)); } + if (or_operands.size() == function_node.getArguments().getNodes().size()) + return; + if (or_operands.size() == 1) { /// if the result type of operand is the same as the result type of OR diff --git a/src/Analyzer/Passes/OptimizeGroupByFunctionKeysPass.cpp b/src/Analyzer/Passes/OptimizeGroupByFunctionKeysPass.cpp index f6c4d2bc15d..c97645219da 100644 --- a/src/Analyzer/Passes/OptimizeGroupByFunctionKeysPass.cpp +++ b/src/Analyzer/Passes/OptimizeGroupByFunctionKeysPass.cpp @@ -69,8 +69,7 @@ private: for (auto it = function_arguments.rbegin(); it != function_arguments.rend(); ++it) candidates.push_back({ *it, is_deterministic }); - // Using DFS we traverse function tree and try to find if it uses other keys as function arguments. - // TODO: Also process CONSTANT here. We can simplify GROUP BY x, x + 1 to GROUP BY x. + /// Using DFS we traverse function tree and try to find if it uses other keys as function arguments. while (!candidates.empty()) { auto [candidate, parents_are_only_deterministic] = candidates.back(); @@ -108,6 +107,7 @@ private: return false; } } + return true; } diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 34c03a9ffb6..38575965973 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -193,13 +193,9 @@ namespace ErrorCodes * lookup should not be continued, and exception must be thrown because if lookup continues identifier can be resolved from parent scope. * * TODO: Update exception messages - * TODO: JOIN TREE subquery constant columns * TODO: Table identifiers with optional UUID. * TODO: Lookup functions arrayReduce(sum, [1, 2, 3]); - * TODO: SELECT (compound_expression).*, (compound_expression).COLUMNS are not supported on parser level. - * TODO: SELECT a.b.c.*, a.b.c.COLUMNS. Qualified matcher where identifier size is greater than 2 are not supported on parser level. * TODO: Support function identifier resolve from parent query scope, if lambda in parent scope does not capture any columns. - * TODO: Scalar subqueries cache. */ namespace @@ -701,7 +697,9 @@ struct IdentifierResolveScope } if (auto * union_node = scope_node->as()) + { context = union_node->getContext(); + } else if (auto * query_node = scope_node->as()) { context = query_node->getContext(); @@ -1336,6 +1334,9 @@ private: /// Global resolve expression node to projection names map std::unordered_map resolved_expressions; + /// Global resolve expression node to tree size + std::unordered_map node_to_tree_size; + /// Global scalar subquery to scalar value map std::unordered_map scalar_subquery_to_scalar_value; @@ -1864,7 +1865,10 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden Block scalar_block; - QueryTreeNodePtrWithHash node_with_hash(node); + auto node_without_alias = node->clone(); + node_without_alias->removeAlias(); + + QueryTreeNodePtrWithHash node_with_hash(node_without_alias); auto scalar_value_it = scalar_subquery_to_scalar_value.find(node_with_hash); if (scalar_value_it != scalar_subquery_to_scalar_value.end()) @@ -1954,21 +1958,7 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden * * Example: SELECT (SELECT 2 AS x, x) */ - NameSet block_column_names; - size_t unique_column_name_counter = 1; - - for (auto & column_with_type : block) - { - if (!block_column_names.contains(column_with_type.name)) - { - block_column_names.insert(column_with_type.name); - continue; - } - - column_with_type.name += '_'; - column_with_type.name += std::to_string(unique_column_name_counter); - ++unique_column_name_counter; - } + makeUniqueColumnNamesInBlock(block); scalar_block.insert({ ColumnTuple::create(block.getColumns()), @@ -2348,7 +2338,13 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveTableIdentifierFromDatabaseCatalog(con storage_id = context->resolveStorageID(storage_id); bool is_temporary_table = storage_id.getDatabaseName() == DatabaseCatalog::TEMPORARY_DATABASE; - auto storage = DatabaseCatalog::instance().tryGetTable(storage_id, context); + StoragePtr storage; + + if (is_temporary_table) + storage = DatabaseCatalog::instance().getTable(storage_id, context); + else + storage = DatabaseCatalog::instance().tryGetTable(storage_id, context); + if (!storage) return {}; @@ -2914,7 +2910,10 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromTableExpression(const Id break; IdentifierLookup column_identifier_lookup = {qualified_identifier_with_removed_part, IdentifierLookupContext::EXPRESSION}; - if (tryBindIdentifierToAliases(column_identifier_lookup, scope) || + if (tryBindIdentifierToAliases(column_identifier_lookup, scope)) + break; + + if (table_expression_data.should_qualify_columns && tryBindIdentifierToTableExpressions(column_identifier_lookup, table_expression_node, scope)) break; @@ -3018,11 +3017,39 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo resolved_identifier = std::move(result_column_node); } - else if (scope.joins_count == 1 && scope.context->getSettingsRef().single_join_prefer_left_table) + else if (left_resolved_identifier->isEqual(*right_resolved_identifier, IQueryTreeNode::CompareOptions{.compare_aliases = false})) { + const auto & identifier_path_part = identifier_lookup.identifier.front(); + auto * left_resolved_identifier_column = left_resolved_identifier->as(); + auto * right_resolved_identifier_column = right_resolved_identifier->as(); + + if (left_resolved_identifier_column && right_resolved_identifier_column) + { + const auto & left_column_source_alias = left_resolved_identifier_column->getColumnSource()->getAlias(); + const auto & right_column_source_alias = right_resolved_identifier_column->getColumnSource()->getAlias(); + + /** If column from right table was resolved using alias, we prefer column from right table. + * + * Example: SELECT dummy FROM system.one JOIN system.one AS A ON A.dummy = system.one.dummy; + * + * If alias is specified for left table, and alias is not specified for right table and identifier was resolved + * without using left table alias, we prefer column from right table. + * + * Example: SELECT dummy FROM system.one AS A JOIN system.one ON A.dummy = system.one.dummy; + * + * Otherwise we prefer column from left table. + */ + if (identifier_path_part == right_column_source_alias) + return right_resolved_identifier; + else if (!left_column_source_alias.empty() && + right_column_source_alias.empty() && + identifier_path_part != left_column_source_alias) + return right_resolved_identifier; + } + return left_resolved_identifier; } - else if (left_resolved_identifier->isEqual(*right_resolved_identifier, IQueryTreeNode::CompareOptions{.compare_aliases = false})) + else if (scope.joins_count == 1 && scope.context->getSettingsRef().single_join_prefer_left_table) { return left_resolved_identifier; } @@ -4466,6 +4493,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi bool is_special_function_dict_get = false; bool is_special_function_join_get = false; bool is_special_function_exists = false; + bool is_special_function_if = false; if (!lambda_expression_untyped) { @@ -4473,6 +4501,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi is_special_function_dict_get = functionIsDictGet(function_name); is_special_function_join_get = functionIsJoinGet(function_name); is_special_function_exists = function_name == "exists"; + is_special_function_if = function_name == "if"; auto function_name_lowercase = Poco::toLower(function_name); @@ -4571,6 +4600,60 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi is_special_function_in = true; } + if (is_special_function_if && !function_node_ptr->getArguments().getNodes().empty()) + { + /** Handle special case with constant If function, even if some of the arguments are invalid. + * + * SELECT if(hasColumnInTable('system', 'numbers', 'not_existing_column'), not_existing_column, 5) FROM system.numbers; + */ + auto & if_function_arguments = function_node_ptr->getArguments().getNodes(); + auto if_function_condition = if_function_arguments[0]; + resolveExpressionNode(if_function_condition, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + + auto constant_condition = tryExtractConstantFromConditionNode(if_function_condition); + + if (constant_condition.has_value() && if_function_arguments.size() == 3) + { + QueryTreeNodePtr constant_if_result_node; + QueryTreeNodePtr possibly_invalid_argument_node; + + if (*constant_condition) + { + possibly_invalid_argument_node = if_function_arguments[2]; + constant_if_result_node = if_function_arguments[1]; + } + else + { + possibly_invalid_argument_node = if_function_arguments[1]; + constant_if_result_node = if_function_arguments[2]; + } + + bool apply_constant_if_optimization = false; + + try + { + resolveExpressionNode(possibly_invalid_argument_node, + scope, + false /*allow_lambda_expression*/, + false /*allow_table_expression*/); + } + catch (...) + { + apply_constant_if_optimization = true; + } + + if (apply_constant_if_optimization) + { + auto result_projection_names = resolveExpressionNode(constant_if_result_node, + scope, + false /*allow_lambda_expression*/, + false /*allow_table_expression*/); + node = std::move(constant_if_result_node); + return result_projection_names; + } + } + } + /// Resolve function arguments bool allow_table_expressions = is_special_function_in; @@ -5059,7 +5142,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi /// Do not constant fold get scalar functions bool disable_constant_folding = function_name == "__getScalar" || function_name == "shardNum" || - function_name == "shardCount"; + function_name == "shardCount" || function_name == "hostName"; /** If function is suitable for constant folding try to convert it to constant. * Example: SELECT plus(1, 1); @@ -5085,7 +5168,8 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi /** Do not perform constant folding if there are aggregate or arrayJoin functions inside function. * Example: SELECT toTypeName(sum(number)) FROM numbers(10); */ - if (column && isColumnConst(*column) && (!hasAggregateFunctionNodes(node) && !hasFunctionNode(node, "arrayJoin"))) + if (column && isColumnConst(*column) && !typeid_cast(column.get())->getDataColumn().isDummy() && + (!hasAggregateFunctionNodes(node) && !hasFunctionNode(node, "arrayJoin"))) { /// Replace function node with result constant node Field column_constant_value; @@ -5433,9 +5517,9 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id } } - if (node - && scope.nullable_group_by_keys.contains(node) - && !scope.expressions_in_resolve_process_stack.hasAggregateFunction()) + validateTreeSize(node, scope.context->getSettingsRef().max_expanded_ast_elements, node_to_tree_size); + + if (scope.nullable_group_by_keys.contains(node) && !scope.expressions_in_resolve_process_stack.hasAggregateFunction()) { node = node->clone(); node->convertToNullable(); @@ -6592,6 +6676,17 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier /// Resolve query node sections. + NamesAndTypes projection_columns; + + if (!scope.group_by_use_nulls) + { + projection_columns = resolveProjectionExpressionNodeList(query_node_typed.getProjectionNode(), scope); + if (query_node_typed.getProjection().getNodes().empty()) + throw Exception(ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED, + "Empty list of columns in projection. In scope {}", + scope.scope_node->formatASTForErrorMessage()); + } + if (query_node_typed.hasWith()) resolveExpressionNodeList(query_node_typed.getWithNode(), scope, true /*allow_lambda_expression*/, false /*allow_table_expression*/); @@ -6686,11 +6781,14 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier convertLimitOffsetExpression(query_node_typed.getOffset(), "OFFSET", scope); } - auto projection_columns = resolveProjectionExpressionNodeList(query_node_typed.getProjectionNode(), scope); - if (query_node_typed.getProjection().getNodes().empty()) - throw Exception(ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED, - "Empty list of columns in projection. In scope {}", - scope.scope_node->formatASTForErrorMessage()); + if (scope.group_by_use_nulls) + { + projection_columns = resolveProjectionExpressionNodeList(query_node_typed.getProjectionNode(), scope); + if (query_node_typed.getProjection().getNodes().empty()) + throw Exception(ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED, + "Empty list of columns in projection. In scope {}", + scope.scope_node->formatASTForErrorMessage()); + } /** Resolve nodes with duplicate aliases. * Table expressions cannot have duplicate aliases. @@ -6757,6 +6855,15 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier validateAggregates(query_node, { .group_by_use_nulls = scope.group_by_use_nulls }); + for (const auto & column : projection_columns) + { + if (isNotCreatable(column.type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Invalid projection column with type {}. In scope {}", + column.type->getName(), + scope.scope_node->formatASTForErrorMessage()); + } + /** WITH section can be safely removed, because WITH section only can provide aliases to query expressions * and CTE for other sections to use. * diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp index c7b9f9aae08..7dd988619ac 100644 --- a/src/Analyzer/QueryTreeBuilder.cpp +++ b/src/Analyzer/QueryTreeBuilder.cpp @@ -355,21 +355,67 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q if (select_limit_by) current_query_tree->getLimitByNode() = buildExpressionList(select_limit_by, current_context); - /// Combine limit expression with limit setting + /// Combine limit expression with limit and offset settings into final limit expression + /// The sequence of application is the following - offset expression, limit expression, offset setting, limit setting. + /// Since offset setting is applied after limit expression, but we want to transfer settings into expression + /// we must decrease limit expression by offset setting and then add offset setting to offset expression. + /// select_limit - limit expression + /// limit - limit setting + /// offset - offset setting + /// + /// if select_limit + /// -- if offset >= select_limit (expr 0) + /// then (0) (0 rows) + /// -- else if limit > 0 (expr 1) + /// then min(select_limit - offset, limit) (expr 2) + /// -- else + /// then (select_limit - offset) (expr 3) + /// else if limit > 0 + /// then limit + /// + /// offset = offset + of_expr auto select_limit = select_query_typed.limitLength(); - if (select_limit && limit) + if (select_limit) { - auto function_node = std::make_shared("least"); - function_node->getArguments().getNodes().push_back(buildExpression(select_limit, current_context)); - function_node->getArguments().getNodes().push_back(std::make_shared(limit)); - current_query_tree->getLimit() = std::move(function_node); - } - else if (limit) - current_query_tree->getLimit() = std::make_shared(limit); - else if (select_limit) - current_query_tree->getLimit() = buildExpression(select_limit, current_context); + /// Shortcut + if (offset == 0 && limit == 0) + { + current_query_tree->getLimit() = buildExpression(select_limit, current_context); + } + else + { + /// expr 3 + auto expr_3 = std::make_shared("minus"); + expr_3->getArguments().getNodes().push_back(buildExpression(select_limit, current_context)); + expr_3->getArguments().getNodes().push_back(std::make_shared(offset)); - /// Combine offset expression with offset setting + /// expr 2 + auto expr_2 = std::make_shared("least"); + expr_2->getArguments().getNodes().push_back(expr_3->clone()); + expr_2->getArguments().getNodes().push_back(std::make_shared(limit)); + + /// expr 0 + auto expr_0 = std::make_shared("greaterOrEquals"); + expr_0->getArguments().getNodes().push_back(std::make_shared(offset)); + expr_0->getArguments().getNodes().push_back(buildExpression(select_limit, current_context)); + + /// expr 1 + auto expr_1 = std::make_shared(limit > 0); + + auto function_node = std::make_shared("multiIf"); + function_node->getArguments().getNodes().push_back(expr_0); + function_node->getArguments().getNodes().push_back(std::make_shared(0)); + function_node->getArguments().getNodes().push_back(expr_1); + function_node->getArguments().getNodes().push_back(expr_2); + function_node->getArguments().getNodes().push_back(expr_3); + + current_query_tree->getLimit() = std::move(function_node); + } + } + else if (limit > 0) + current_query_tree->getLimit() = std::make_shared(limit); + + /// Combine offset expression with offset setting into final offset expression auto select_offset = select_query_typed.limitOffset(); if (select_offset && offset) { diff --git a/src/Analyzer/TableNode.cpp b/src/Analyzer/TableNode.cpp index a746986be04..f315d372bc9 100644 --- a/src/Analyzer/TableNode.cpp +++ b/src/Analyzer/TableNode.cpp @@ -61,12 +61,17 @@ bool TableNode::isEqualImpl(const IQueryTreeNode & rhs) const void TableNode::updateTreeHashImpl(HashState & state) const { - auto full_name = storage_id.getFullNameNotQuoted(); - state.update(full_name.size()); - state.update(full_name); - - state.update(temporary_table_name.size()); - state.update(temporary_table_name); + if (!temporary_table_name.empty()) + { + state.update(temporary_table_name.size()); + state.update(temporary_table_name); + } + else + { + auto full_name = storage_id.getFullNameNotQuoted(); + state.update(full_name.size()); + state.update(full_name); + } if (table_expression_modifiers) table_expression_modifiers->updateTreeHash(state); diff --git a/src/Analyzer/Utils.cpp b/src/Analyzer/Utils.cpp index c5a5c042cbc..eb7aceef1e8 100644 --- a/src/Analyzer/Utils.cpp +++ b/src/Analyzer/Utils.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -32,6 +33,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; } bool isNodePartOfTree(const IQueryTreeNode * node, const IQueryTreeNode * root) @@ -79,6 +81,75 @@ bool isNameOfInFunction(const std::string & function_name) return is_special_function_in; } +bool isNameOfLocalInFunction(const std::string & function_name) +{ + bool is_special_function_in = function_name == "in" || + function_name == "notIn" || + function_name == "nullIn" || + function_name == "notNullIn" || + function_name == "inIgnoreSet" || + function_name == "notInIgnoreSet" || + function_name == "nullInIgnoreSet" || + function_name == "notNullInIgnoreSet"; + + return is_special_function_in; +} + +bool isNameOfGlobalInFunction(const std::string & function_name) +{ + bool is_special_function_in = function_name == "globalIn" || + function_name == "globalNotIn" || + function_name == "globalNullIn" || + function_name == "globalNotNullIn" || + function_name == "globalInIgnoreSet" || + function_name == "globalNotInIgnoreSet" || + function_name == "globalNullInIgnoreSet" || + function_name == "globalNotNullInIgnoreSet"; + + return is_special_function_in; +} + +std::string getGlobalInFunctionNameForLocalInFunctionName(const std::string & function_name) +{ + if (function_name == "in") + return "globalIn"; + else if (function_name == "notIn") + return "globalNotIn"; + else if (function_name == "nullIn") + return "globalNullIn"; + else if (function_name == "notNullIn") + return "globalNotNullIn"; + else if (function_name == "inIgnoreSet") + return "globalInIgnoreSet"; + else if (function_name == "notInIgnoreSet") + return "globalNotInIgnoreSet"; + else if (function_name == "nullInIgnoreSet") + return "globalNullInIgnoreSet"; + else if (function_name == "notNullInIgnoreSet") + return "globalNotNullInIgnoreSet"; + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid local IN function name {}", function_name); +} + +void makeUniqueColumnNamesInBlock(Block & block) +{ + NameSet block_column_names; + size_t unique_column_name_counter = 1; + + for (auto & column_with_type : block) + { + if (!block_column_names.contains(column_with_type.name)) + { + block_column_names.insert(column_with_type.name); + continue; + } + + column_with_type.name += '_'; + column_with_type.name += std::to_string(unique_column_name_counter); + ++unique_column_name_counter; + } +} + QueryTreeNodePtr buildCastFunction(const QueryTreeNodePtr & expression, const DataTypePtr & type, const ContextPtr & context, @@ -102,6 +173,27 @@ QueryTreeNodePtr buildCastFunction(const QueryTreeNodePtr & expression, return cast_function_node; } +std::optional tryExtractConstantFromConditionNode(const QueryTreeNodePtr & condition_node) +{ + const auto * constant_node = condition_node->as(); + if (!constant_node) + return {}; + + const auto & value = constant_node->getValue(); + auto constant_type = constant_node->getResultType(); + constant_type = removeNullable(removeLowCardinality(constant_type)); + + auto which_constant_type = WhichDataType(constant_type); + if (!which_constant_type.isUInt8() && !which_constant_type.isNothing()) + return {}; + + if (value.isNull()) + return false; + + UInt8 predicate_value = value.safeGet(); + return predicate_value > 0; +} + static ASTPtr convertIntoTableExpressionAST(const QueryTreeNodePtr & table_expression_node) { ASTPtr table_expression_node_ast; diff --git a/src/Analyzer/Utils.h b/src/Analyzer/Utils.h index 3e2d95c6012..5802c86c462 100644 --- a/src/Analyzer/Utils.h +++ b/src/Analyzer/Utils.h @@ -13,6 +13,18 @@ bool isNodePartOfTree(const IQueryTreeNode * node, const IQueryTreeNode * root); /// Returns true if function name is name of IN function or its variations, false otherwise bool isNameOfInFunction(const std::string & function_name); +/// Returns true if function name is name of local IN function or its variations, false otherwise +bool isNameOfLocalInFunction(const std::string & function_name); + +/// Returns true if function name is name of global IN function or its variations, false otherwise +bool isNameOfGlobalInFunction(const std::string & function_name); + +/// Returns global IN function name for local IN function name +std::string getGlobalInFunctionNameForLocalInFunctionName(const std::string & function_name); + +/// Add unique suffix to names of duplicate columns in block +void makeUniqueColumnNamesInBlock(Block & block); + /** Build cast function that cast expression into type. * If resolve = true, then result cast function is resolved during build, otherwise * result cast function is not resolved during build. @@ -22,6 +34,9 @@ QueryTreeNodePtr buildCastFunction(const QueryTreeNodePtr & expression, const ContextPtr & context, bool resolve = true); +/// Try extract boolean constant from condition node +std::optional tryExtractConstantFromConditionNode(const QueryTreeNodePtr & condition_node); + /** Add table expression in tables in select query children. * If table expression node is not of identifier node, table node, query node, table function node, join node or array join node type throws logical error exception. */ diff --git a/src/Analyzer/ValidationUtils.cpp b/src/Analyzer/ValidationUtils.cpp index 8ccecc9769c..d70ed1170fc 100644 --- a/src/Analyzer/ValidationUtils.cpp +++ b/src/Analyzer/ValidationUtils.cpp @@ -16,6 +16,7 @@ namespace ErrorCodes { extern const int NOT_AN_AGGREGATE; extern const int NOT_IMPLEMENTED; + extern const int BAD_ARGUMENTS; } class ValidateGroupByColumnsVisitor : public ConstInDepthQueryTreeVisitor @@ -283,4 +284,52 @@ void assertNoFunctionNodes(const QueryTreeNodePtr & node, visitor.visit(node); } +void validateTreeSize(const QueryTreeNodePtr & node, + size_t max_size, + std::unordered_map & node_to_tree_size) +{ + size_t tree_size = 0; + std::vector> nodes_to_process; + nodes_to_process.emplace_back(node, false); + + while (!nodes_to_process.empty()) + { + const auto [node_to_process, processed_children] = nodes_to_process.back(); + nodes_to_process.pop_back(); + + if (processed_children) + { + ++tree_size; + node_to_tree_size.emplace(node_to_process, tree_size); + continue; + } + + auto node_to_size_it = node_to_tree_size.find(node_to_process); + if (node_to_size_it != node_to_tree_size.end()) + { + tree_size += node_to_size_it->second; + continue; + } + + nodes_to_process.emplace_back(node_to_process, true); + + for (const auto & node_to_process_child : node_to_process->getChildren()) + { + if (!node_to_process_child) + continue; + + nodes_to_process.emplace_back(node_to_process_child, false); + } + + auto * constant_node = node_to_process->as(); + if (constant_node && constant_node->hasSourceExpression()) + nodes_to_process.emplace_back(constant_node->getSourceExpression(), false); + } + + if (tree_size > max_size) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Query tree is too big. Maximum: {}", + max_size); +} + } diff --git a/src/Analyzer/ValidationUtils.h b/src/Analyzer/ValidationUtils.h index b8ba6b8cc10..c15a3531c8d 100644 --- a/src/Analyzer/ValidationUtils.h +++ b/src/Analyzer/ValidationUtils.h @@ -7,7 +7,7 @@ namespace DB struct ValidationParams { - bool group_by_use_nulls; + bool group_by_use_nulls = false; }; /** Validate aggregates in query node. @@ -31,4 +31,11 @@ void assertNoFunctionNodes(const QueryTreeNodePtr & node, std::string_view exception_function_name, std::string_view exception_place_message); +/** Validate tree size. If size of tree is greater than max size throws exception. + * Additionally for each node in tree, update node to tree size map. + */ +void validateTreeSize(const QueryTreeNodePtr & node, + size_t max_size, + std::unordered_map & node_to_tree_size); + } diff --git a/src/Analyzer/WindowNode.cpp b/src/Analyzer/WindowNode.cpp index 3e8537302e5..d516f7a58b8 100644 --- a/src/Analyzer/WindowNode.cpp +++ b/src/Analyzer/WindowNode.cpp @@ -113,11 +113,17 @@ ASTPtr WindowNode::toASTImpl() const window_definition->parent_window_name = parent_window_name; - window_definition->children.push_back(getPartitionByNode()->toAST()); - window_definition->partition_by = window_definition->children.back(); + if (hasPartitionBy()) + { + window_definition->children.push_back(getPartitionByNode()->toAST()); + window_definition->partition_by = window_definition->children.back(); + } - window_definition->children.push_back(getOrderByNode()->toAST()); - window_definition->order_by = window_definition->children.back(); + if (hasOrderBy()) + { + window_definition->children.push_back(getOrderByNode()->toAST()); + window_definition->order_by = window_definition->children.back(); + } window_definition->frame_is_default = window_frame.is_default; window_definition->frame_type = window_frame.type; diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 53eb5080130..876b1d1906d 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1834,7 +1834,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text) { /// disable logs if expects errors TestHint test_hint(all_queries_text); - if (test_hint.clientError() || test_hint.serverError()) + if (test_hint.hasClientErrors() || test_hint.hasServerErrors()) processTextAsSingleQuery("SET send_logs_level = 'fatal'"); } @@ -1876,17 +1876,17 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text) // the query ends because we failed to parse it, so we consume // the entire line. TestHint hint(String(this_query_begin, this_query_end - this_query_begin)); - if (hint.serverError()) + if (hint.hasServerErrors()) { // Syntax errors are considered as client errors - current_exception->addMessage("\nExpected server error '{}'.", hint.serverError()); + current_exception->addMessage("\nExpected server error: {}.", hint.serverErrors()); current_exception->rethrow(); } - if (hint.clientError() != current_exception->code()) + if (!hint.hasExpectedClientError(current_exception->code())) { - if (hint.clientError()) - current_exception->addMessage("\nExpected client error: " + std::to_string(hint.clientError())); + if (hint.hasClientErrors()) + current_exception->addMessage("\nExpected client error: {}.", hint.clientErrors()); current_exception->rethrow(); } @@ -1935,37 +1935,37 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text) bool error_matches_hint = true; if (have_error) { - if (test_hint.serverError()) + if (test_hint.hasServerErrors()) { if (!server_exception) { error_matches_hint = false; fmt::print(stderr, "Expected server error code '{}' but got no server error (query: {}).\n", - test_hint.serverError(), full_query); + test_hint.serverErrors(), full_query); } - else if (server_exception->code() != test_hint.serverError()) + else if (!test_hint.hasExpectedServerError(server_exception->code())) { error_matches_hint = false; fmt::print(stderr, "Expected server error code: {} but got: {} (query: {}).\n", - test_hint.serverError(), server_exception->code(), full_query); + test_hint.serverErrors(), server_exception->code(), full_query); } } - if (test_hint.clientError()) + if (test_hint.hasClientErrors()) { if (!client_exception) { error_matches_hint = false; fmt::print(stderr, "Expected client error code '{}' but got no client error (query: {}).\n", - test_hint.clientError(), full_query); + test_hint.clientErrors(), full_query); } - else if (client_exception->code() != test_hint.clientError()) + else if (!test_hint.hasExpectedClientError(client_exception->code())) { error_matches_hint = false; fmt::print(stderr, "Expected client error code '{}' but got '{}' (query: {}).\n", - test_hint.clientError(), client_exception->code(), full_query); + test_hint.clientErrors(), client_exception->code(), full_query); } } - if (!test_hint.clientError() && !test_hint.serverError()) + if (!test_hint.hasClientErrors() && !test_hint.hasServerErrors()) { // No error was expected but it still occurred. This is the // default case without test hint, doesn't need additional @@ -1975,19 +1975,19 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text) } else { - if (test_hint.clientError()) + if (test_hint.hasClientErrors()) { error_matches_hint = false; fmt::print(stderr, "The query succeeded but the client error '{}' was expected (query: {}).\n", - test_hint.clientError(), full_query); + test_hint.clientErrors(), full_query); } - if (test_hint.serverError()) + if (test_hint.hasServerErrors()) { error_matches_hint = false; fmt::print(stderr, "The query succeeded but the server error '{}' was expected (query: {}).\n", - test_hint.serverError(), full_query); + test_hint.serverErrors(), full_query); } } diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index eea007a8608..4f7cf893328 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -506,7 +506,7 @@ void Connection::sendQuery( bool with_pending_data, std::function) { - OpenTelemetry::SpanHolder span("Connection::sendQuery()"); + OpenTelemetry::SpanHolder span("Connection::sendQuery()", OpenTelemetry::CLIENT); span.addAttribute("clickhouse.query_id", query_id_); span.addAttribute("clickhouse.query", query); span.addAttribute("target", [this] () { return this->getHost() + ":" + std::to_string(this->getPort()); }); diff --git a/src/Client/TestHint.cpp b/src/Client/TestHint.cpp index f6d1e5d73c3..b64882577ee 100644 --- a/src/Client/TestHint.cpp +++ b/src/Client/TestHint.cpp @@ -1,32 +1,15 @@ -#include "TestHint.h" +#include +#include + +#include -#include -#include -#include -#include #include +#include +#include -namespace +namespace DB::ErrorCodes { - -/// Parse error as number or as a string (name of the error code const) -int parseErrorCode(DB::ReadBufferFromString & in) -{ - int code = -1; - String code_name; - - auto * pos = in.position(); - tryReadText(code, in); - if (pos != in.position()) - { - return code; - } - - /// Try parse as string - readStringUntilWhitespace(code_name, in); - return DB::ErrorCodes::getErrorCodeByName(code_name); -} - + extern const int CANNOT_PARSE_TEXT; } namespace DB @@ -60,8 +43,8 @@ TestHint::TestHint(const String & query_) size_t pos_end = comment.find('}', pos_start); if (pos_end != String::npos) { - String hint(comment.begin() + pos_start + 1, comment.begin() + pos_end); - parse(hint, is_leading_hint); + Lexer comment_lexer(comment.c_str() + pos_start + 1, comment.c_str() + pos_end, 0); + parse(comment_lexer, is_leading_hint); } } } @@ -69,33 +52,86 @@ TestHint::TestHint(const String & query_) } } -void TestHint::parse(const String & hint, bool is_leading_hint) +bool TestHint::hasExpectedClientError(int error) { - ReadBufferFromString in(hint); - String item; + return std::find(client_errors.begin(), client_errors.end(), error) != client_errors.end(); +} - while (!in.eof()) +bool TestHint::hasExpectedServerError(int error) +{ + return std::find(server_errors.begin(), server_errors.end(), error) != server_errors.end(); +} + +void TestHint::parse(Lexer & comment_lexer, bool is_leading_hint) +{ + std::unordered_set commands{"echo", "echoOn", "echoOff"}; + + std::unordered_set command_errors{ + "serverError", + "clientError", + }; + + for (Token token = comment_lexer.nextToken(); !token.isEnd(); token = comment_lexer.nextToken()) { - readStringUntilWhitespace(item, in); - if (in.eof()) - break; - - skipWhitespaceIfAny(in); - - if (!is_leading_hint) + String item = String(token.begin, token.end); + if (token.type == TokenType::BareWord && commands.contains(item)) { - if (item == "serverError") - server_error = parseErrorCode(in); - else if (item == "clientError") - client_error = parseErrorCode(in); + if (item == "echo") + echo.emplace(true); + if (item == "echoOn") + echo.emplace(true); + if (item == "echoOff") + echo.emplace(false); } + else if (!is_leading_hint && token.type == TokenType::BareWord && command_errors.contains(item)) + { + /// Everything after this must be a list of errors separated by comma + ErrorVector error_codes; + while (!token.isEnd()) + { + token = comment_lexer.nextToken(); + if (token.type == TokenType::Whitespace) + continue; + if (token.type == TokenType::Number) + { + int code; + auto [p, ec] = std::from_chars(token.begin, token.end, code); + if (p == token.begin) + throw DB::Exception( + DB::ErrorCodes::CANNOT_PARSE_TEXT, + "Could not parse integer number for errorcode: {}", + std::string_view(token.begin, token.end)); + error_codes.push_back(code); + } + else if (token.type == TokenType::BareWord) + { + int code = DB::ErrorCodes::getErrorCodeByName(std::string_view(token.begin, token.end)); + error_codes.push_back(code); + } + else + throw DB::Exception( + DB::ErrorCodes::CANNOT_PARSE_TEXT, + "Could not parse error code in {}: {}", + getTokenName(token.type), + std::string_view(token.begin, token.end)); + do + { + token = comment_lexer.nextToken(); + } while (!token.isEnd() && token.type == TokenType::Whitespace); - if (item == "echo") - echo.emplace(true); - if (item == "echoOn") - echo.emplace(true); - if (item == "echoOff") - echo.emplace(false); + if (!token.isEnd() && token.type != TokenType::Comma) + throw DB::Exception( + DB::ErrorCodes::CANNOT_PARSE_TEXT, + "Could not parse error code. Expected ','. Got '{}'", + std::string_view(token.begin, token.end)); + } + + if (item == "serverError") + server_errors = error_codes; + else + client_errors = error_codes; + break; + } } } diff --git a/src/Client/TestHint.h b/src/Client/TestHint.h index 7fa4e86c025..63f16b1dd97 100644 --- a/src/Client/TestHint.h +++ b/src/Client/TestHint.h @@ -1,21 +1,30 @@ #pragma once #include +#include + +#include + #include namespace DB { +class Lexer; + /// Checks expected server and client error codes. /// /// The following comment hints are supported: /// /// - "-- { serverError 60 }" -- in case of you are expecting server error. +/// - "-- { serverError 16, 36 }" -- in case of you are expecting one of the 2 errors. /// /// - "-- { clientError 20 }" -- in case of you are expecting client error. +/// - "-- { clientError 20, 60, 92 }" -- It's expected that the client will return one of the 3 errors. /// /// - "-- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO }" -- by error name. +/// - "-- { serverError NO_SUCH_COLUMN_IN_TABLE, BAD_ARGUMENTS }" -- by error name. /// /// - "-- { clientError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO }" -- by error name. /// @@ -43,29 +52,73 @@ namespace DB class TestHint { public: + using ErrorVector = std::vector; TestHint(const String & query_); - int serverError() const { return server_error; } - int clientError() const { return client_error; } + const auto & serverErrors() const { return server_errors; } + const auto & clientErrors() const { return client_errors; } std::optional echoQueries() const { return echo; } + bool hasClientErrors() { return !client_errors.empty(); } + bool hasServerErrors() { return !server_errors.empty(); } + + bool hasExpectedClientError(int error); + bool hasExpectedServerError(int error); + private: const String & query; - int server_error = 0; - int client_error = 0; + ErrorVector server_errors{}; + ErrorVector client_errors{}; std::optional echo; - void parse(const String & hint, bool is_leading_hint); + void parse(Lexer & comment_lexer, bool is_leading_hint); bool allErrorsExpected(int actual_server_error, int actual_client_error) const { - return (server_error || client_error) && (server_error == actual_server_error) && (client_error == actual_client_error); + if (actual_server_error && std::find(server_errors.begin(), server_errors.end(), actual_server_error) == server_errors.end()) + return false; + if (!actual_server_error && server_errors.size()) + return false; + + if (actual_client_error && std::find(client_errors.begin(), client_errors.end(), actual_client_error) == client_errors.end()) + return false; + if (!actual_client_error && client_errors.size()) + return false; + + return true; } bool lostExpectedError(int actual_server_error, int actual_client_error) const { - return (server_error && !actual_server_error) || (client_error && !actual_client_error); + return (server_errors.size() && !actual_server_error) || (client_errors.size() && !actual_client_error); } }; } + +template <> +struct fmt::formatter +{ + static constexpr auto parse(format_parse_context & ctx) + { + const auto * it = ctx.begin(); + const auto * end = ctx.end(); + + /// Only support {}. + if (it != end && *it != '}') + throw format_error("Invalid format"); + + return it; + } + + template + auto format(const DB::TestHint::ErrorVector & ErrorVector, FormatContext & ctx) + { + if (ErrorVector.empty()) + return format_to(ctx.out(), "{}", 0); + else if (ErrorVector.size() == 1) + return format_to(ctx.out(), "{}", ErrorVector[0]); + else + return format_to(ctx.out(), "[{}]", fmt::join(ErrorVector, ", ")); + } +}; diff --git a/src/Common/CurrentThread.cpp b/src/Common/CurrentThread.cpp index b54cf3b9371..188e78fe69b 100644 --- a/src/Common/CurrentThread.cpp +++ b/src/Common/CurrentThread.cpp @@ -110,23 +110,4 @@ ThreadGroupStatusPtr CurrentThread::getGroup() return current_thread->getThreadGroup(); } -MemoryTracker * CurrentThread::getUserMemoryTracker() -{ - if (unlikely(!current_thread)) - return nullptr; - - auto * tracker = current_thread->memory_tracker.getParent(); - while (tracker && tracker->level != VariableContext::User) - tracker = tracker->getParent(); - - return tracker; -} - -void CurrentThread::flushUntrackedMemory() -{ - if (unlikely(!current_thread)) - return; - current_thread->flushUntrackedMemory(); -} - } diff --git a/src/Common/CurrentThread.h b/src/Common/CurrentThread.h index ffc00c77504..f4975e800ca 100644 --- a/src/Common/CurrentThread.h +++ b/src/Common/CurrentThread.h @@ -40,12 +40,6 @@ public: /// Group to which belongs current thread static ThreadGroupStatusPtr getGroup(); - /// MemoryTracker for user that owns current thread if any - static MemoryTracker * getUserMemoryTracker(); - - /// Adjust counters in MemoryTracker hierarchy if untracked_memory is not 0. - static void flushUntrackedMemory(); - /// A logs queue used by TCPHandler to pass logs to a client static void attachInternalTextLogsQueue(const std::shared_ptr & logs_queue, LogsLevel client_logs_level); diff --git a/src/Common/OpenTelemetryTraceContext.cpp b/src/Common/OpenTelemetryTraceContext.cpp index df4ee6a34bf..014034c6ba8 100644 --- a/src/Common/OpenTelemetryTraceContext.cpp +++ b/src/Common/OpenTelemetryTraceContext.cpp @@ -92,7 +92,7 @@ bool Span::addAttributeImpl(std::string_view name, std::string_view value) noexc return true; } -SpanHolder::SpanHolder(std::string_view _operation_name) +SpanHolder::SpanHolder(std::string_view _operation_name, SpanKind _kind) { if (!current_thread_trace_context.isTraceEnabled()) { @@ -106,6 +106,7 @@ SpanHolder::SpanHolder(std::string_view _operation_name) this->parent_span_id = current_thread_trace_context.span_id; this->span_id = thread_local_rng(); // create a new id for this span this->operation_name = _operation_name; + this->kind = _kind; this->start_time_us = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); diff --git a/src/Common/OpenTelemetryTraceContext.h b/src/Common/OpenTelemetryTraceContext.h index 64bce9e98db..bfd091e2902 100644 --- a/src/Common/OpenTelemetryTraceContext.h +++ b/src/Common/OpenTelemetryTraceContext.h @@ -13,6 +13,29 @@ class ReadBuffer; namespace OpenTelemetry { +/// See https://opentelemetry.io/docs/reference/specification/trace/api/#spankind +enum SpanKind +{ + /// Default value. Indicates that the span represents an internal operation within an application, + /// as opposed to an operations with remote parents or children. + INTERNAL = 0, + + /// Indicates that the span covers server-side handling of a synchronous RPC or other remote request. + /// This span is often the child of a remote CLIENT span that was expected to wait for a response. + SERVER = 1, + + /// Indicates that the span describes a request to some remote service. + /// This span is usually the parent of a remote SERVER span and does not end until the response is received. + CLIENT = 2, + + /// Indicates that the span describes the initiators of an asynchronous request. This parent span will often end before the corresponding child CONSUMER span, possibly even before the child span starts. + /// In messaging scenarios with batching, tracing individual messages requires a new PRODUCER span per message to be created. + PRODUCER = 3, + + /// Indicates that the span describes a child of an asynchronous PRODUCER request + CONSUMER = 4 +}; + struct Span { UUID trace_id{}; @@ -21,6 +44,7 @@ struct Span String operation_name; UInt64 start_time_us = 0; UInt64 finish_time_us = 0; + SpanKind kind = INTERNAL; Map attributes; /// Following methods are declared as noexcept to make sure they're exception safe. @@ -155,7 +179,7 @@ using TracingContextHolderPtr = std::unique_ptr; /// Once it's created or destructed, it automatically maitains the tracing context on the thread that it lives. struct SpanHolder : public Span { - SpanHolder(std::string_view); + SpanHolder(std::string_view, SpanKind _kind = INTERNAL); ~SpanHolder(); /// Finish a span explicitly if needed. diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index 899310dc591..ddedae4fa0f 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -211,9 +211,14 @@ public: void flush() { auto * file_buffer = tryGetFileBuffer(); - /// Fsync file system if needed - if (file_buffer && log_file_settings.force_sync) - file_buffer->sync(); + if (file_buffer) + { + /// Fsync file system if needed + if (log_file_settings.force_sync) + file_buffer->sync(); + else + file_buffer->next(); + } } uint64_t getStartIndex() const diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 3937f73218f..881cbe42a02 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -516,6 +516,7 @@ class IColumn; M(Bool, allow_experimental_alter_materialized_view_structure, false, "Allow atomic alter on Materialized views. Work in progress.", 0) \ M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there're constants there", 0) \ M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \ + M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \ M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \ M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \ @@ -802,7 +803,6 @@ class IColumn; M(Bool, input_format_tsv_detect_header, true, "Automatically detect header with names and types in TSV format", 0) \ M(Bool, input_format_custom_detect_header, true, "Automatically detect header with names and types in CustomSeparated format", 0) \ M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format Parquet", 0) \ - M(UInt64, input_format_parquet_max_block_size, 8192, "Max block size for parquet reader.", 0) \ M(Bool, input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference, false, "Skip fields with unsupported types while schema inference for format Protobuf", 0) \ M(Bool, input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format CapnProto", 0) \ M(Bool, input_format_orc_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format ORC", 0) \ @@ -826,6 +826,8 @@ class IColumn; M(UInt64, input_format_csv_skip_first_lines, 0, "Skip specified number of lines at the beginning of data in CSV format", 0) \ M(UInt64, input_format_tsv_skip_first_lines, 0, "Skip specified number of lines at the beginning of data in TSV format", 0) \ \ + M(Bool, input_format_native_allow_types_conversion, true, "Allow data types conversion in Native input format", 0) \ + \ M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic', 'best_effort' and 'best_effort_us'.", 0) \ M(DateTimeOutputFormat, date_time_output_format, FormatSettings::DateTimeOutputFormat::Simple, "Method to write DateTime to text output. Possible values: 'simple', 'iso', 'unix_timestamp'.", 0) \ \ @@ -864,6 +866,7 @@ class IColumn; M(Bool, output_format_parquet_string_as_string, false, "Use Parquet String type instead of Binary for String columns.", 0) \ M(Bool, output_format_parquet_fixed_string_as_fixed_byte_array, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary for FixedString columns.", 0) \ M(ParquetVersion, output_format_parquet_version, "2.latest", "Parquet format version for output format. Supported versions: 1.0, 2.4, 2.6 and 2.latest (default)", 0) \ + M(ParquetCompression, output_format_parquet_compression_method, "lz4", "Compression method for Parquet output format. Supported codecs: snappy, lz4, brotli, zstd, gzip, none (uncompressed)", 0) \ M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \ M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \ M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \ @@ -906,8 +909,10 @@ class IColumn; M(Bool, output_format_arrow_low_cardinality_as_dictionary, false, "Enable output LowCardinality type as Dictionary Arrow type", 0) \ M(Bool, output_format_arrow_string_as_string, false, "Use Arrow String type instead of Binary for String columns", 0) \ M(Bool, output_format_arrow_fixed_string_as_fixed_byte_array, true, "Use Arrow FIXED_SIZE_BINARY type instead of Binary for FixedString columns.", 0) \ + M(ArrowCompression, output_format_arrow_compression_method, "lz4_frame", "Compression method for Arrow output format. Supported codecs: lz4_frame, zstd, none (uncompressed)", 0) \ \ M(Bool, output_format_orc_string_as_string, false, "Use ORC String type instead of Binary for String columns", 0) \ + M(ORCCompression, output_format_orc_compression_method, "lz4", "Compression method for ORC output format. Supported codecs: lz4, snappy, zlib, zstd, none (uncompressed)", 0) \ \ M(EnumComparingMode, format_capn_proto_enum_comparising_mode, FormatSettings::EnumComparingMode::BY_VALUES, "How to map ClickHouse Enum and CapnProto Enum", 0) \ \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 04f328bb665..caf18cf8fb8 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -81,7 +81,11 @@ namespace SettingsChangesHistory static std::map settings_changes_history = { {"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"}, - {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}}}, + {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}, + {"input_format_native_allow_types_conversion", false, true, "Allow types conversion in Native input forma"}, + {"output_format_arrow_compression_method", "none", "lz4_frame", "Use lz4 compression in Arrow output format by default"}, + {"output_format_parquet_compression_method", "snappy", "lz4", "Use lz4 compression in Parquet output format by default"}, + {"output_format_orc_compression_method", "none", "lz4_frame", "Use lz4 compression in ORC output format by default"}}}, {"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"}, {"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"}, {"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"}, diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index b04757ecaa2..e0f16ea00db 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -158,7 +158,7 @@ IMPLEMENT_SETTING_ENUM(EscapingRule, ErrorCodes::BAD_ARGUMENTS, {"XML", FormatSettings::EscapingRule::XML}, {"Raw", FormatSettings::EscapingRule::Raw}}) -IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS, +IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation, ErrorCodes::BAD_ARGUMENTS, {{"bin", FormatSettings::MsgPackUUIDRepresentation::BIN}, {"str", FormatSettings::MsgPackUUIDRepresentation::STR}, {"ext", FormatSettings::MsgPackUUIDRepresentation::EXT}}) @@ -176,11 +176,30 @@ IMPLEMENT_SETTING_ENUM(LocalFSReadMethod, ErrorCodes::BAD_ARGUMENTS, {"pread", LocalFSReadMethod::pread}, {"read", LocalFSReadMethod::read}}) - IMPLEMENT_SETTING_ENUM_WITH_RENAME(ParquetVersion, ErrorCodes::BAD_ARGUMENTS, {{"1.0", FormatSettings::ParquetVersion::V1_0}, {"2.4", FormatSettings::ParquetVersion::V2_4}, {"2.6", FormatSettings::ParquetVersion::V2_6}, {"2.latest", FormatSettings::ParquetVersion::V2_LATEST}}) +IMPLEMENT_SETTING_ENUM(ParquetCompression, ErrorCodes::BAD_ARGUMENTS, + {{"none", FormatSettings::ParquetCompression::NONE}, + {"snappy", FormatSettings::ParquetCompression::SNAPPY}, + {"zstd", FormatSettings::ParquetCompression::ZSTD}, + {"gzip", FormatSettings::ParquetCompression::GZIP}, + {"lz4", FormatSettings::ParquetCompression::LZ4}, + {"brotli", FormatSettings::ParquetCompression::BROTLI}}) + +IMPLEMENT_SETTING_ENUM(ArrowCompression, ErrorCodes::BAD_ARGUMENTS, + {{"none", FormatSettings::ArrowCompression::NONE}, + {"lz4_frame", FormatSettings::ArrowCompression::LZ4_FRAME}, + {"zstd", FormatSettings::ArrowCompression::ZSTD}}) + +IMPLEMENT_SETTING_ENUM(ORCCompression, ErrorCodes::BAD_ARGUMENTS, + {{"none", FormatSettings::ORCCompression::NONE}, + {"snappy", FormatSettings::ORCCompression::SNAPPY}, + {"zstd", FormatSettings::ORCCompression::ZSTD}, + {"zlib", FormatSettings::ORCCompression::ZLIB}, + {"lz4", FormatSettings::ORCCompression::LZ4}}) + } diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index ae9456cc6d7..3ae7bfaa673 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -194,6 +194,12 @@ DECLARE_SETTING_ENUM_WITH_RENAME(EscapingRule, FormatSettings::EscapingRule) DECLARE_SETTING_ENUM_WITH_RENAME(MsgPackUUIDRepresentation, FormatSettings::MsgPackUUIDRepresentation) +DECLARE_SETTING_ENUM_WITH_RENAME(ParquetCompression, FormatSettings::ParquetCompression) + +DECLARE_SETTING_ENUM_WITH_RENAME(ArrowCompression, FormatSettings::ArrowCompression) + +DECLARE_SETTING_ENUM_WITH_RENAME(ORCCompression, FormatSettings::ORCCompression) + enum class Dialect { clickhouse, diff --git a/src/DataTypes/DataTypeLowCardinality.h b/src/DataTypes/DataTypeLowCardinality.h index 57f67ddad7a..d301a0f5443 100644 --- a/src/DataTypes/DataTypeLowCardinality.h +++ b/src/DataTypes/DataTypeLowCardinality.h @@ -86,6 +86,6 @@ DataTypePtr recursiveRemoveLowCardinality(const DataTypePtr & type); ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column); /// Convert column of type from_type to type to_type by converting nested LowCardinality columns. -ColumnPtr recursiveTypeConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type); +ColumnPtr recursiveLowCardinalityTypeConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type); } diff --git a/src/DataTypes/DataTypeLowCardinalityHelpers.cpp b/src/DataTypes/DataTypeLowCardinalityHelpers.cpp index 8a61afee420..98eb76267a4 100644 --- a/src/DataTypes/DataTypeLowCardinalityHelpers.cpp +++ b/src/DataTypes/DataTypeLowCardinalityHelpers.cpp @@ -113,7 +113,7 @@ ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column) return column; } -ColumnPtr recursiveTypeConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type) +ColumnPtr recursiveLowCardinalityTypeConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type) { if (!column) return column; @@ -128,7 +128,7 @@ ColumnPtr recursiveTypeConversion(const ColumnPtr & column, const DataTypePtr & if (const auto * column_const = typeid_cast(column.get())) { const auto & nested = column_const->getDataColumnPtr(); - auto nested_no_lc = recursiveTypeConversion(nested, from_type, to_type); + auto nested_no_lc = recursiveLowCardinalityTypeConversion(nested, from_type, to_type); if (nested.get() == nested_no_lc.get()) return column; @@ -164,7 +164,7 @@ ColumnPtr recursiveTypeConversion(const ColumnPtr & column, const DataTypePtr & const auto & nested_to = to_array_type->getNestedType(); return ColumnArray::create( - recursiveTypeConversion(column_array->getDataPtr(), nested_from, nested_to), + recursiveLowCardinalityTypeConversion(column_array->getDataPtr(), nested_from, nested_to), column_array->getOffsetsPtr()); } } @@ -187,7 +187,7 @@ ColumnPtr recursiveTypeConversion(const ColumnPtr & column, const DataTypePtr & for (size_t i = 0; i < columns.size(); ++i) { auto & element = columns[i]; - auto element_no_lc = recursiveTypeConversion(element, from_elements.at(i), to_elements.at(i)); + auto element_no_lc = recursiveLowCardinalityTypeConversion(element, from_elements.at(i), to_elements.at(i)); if (element.get() != element_no_lc.get()) { element = element_no_lc; diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index aca3166a8c4..a6db9d5ba0d 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -117,7 +117,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference; format_settings.parquet.output_string_as_string = settings.output_format_parquet_string_as_string; format_settings.parquet.output_fixed_string_as_fixed_byte_array = settings.output_format_parquet_fixed_string_as_fixed_byte_array; - format_settings.parquet.max_block_size = settings.input_format_parquet_max_block_size; + format_settings.parquet.output_compression_method = settings.output_format_parquet_compression_method; format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8; format_settings.pretty.color = settings.output_format_pretty_color; format_settings.pretty.max_column_pad_width = settings.output_format_pretty_max_column_pad_width; @@ -158,6 +158,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.arrow.case_insensitive_column_matching = settings.input_format_arrow_case_insensitive_column_matching; format_settings.arrow.output_string_as_string = settings.output_format_arrow_string_as_string; format_settings.arrow.output_fixed_string_as_fixed_byte_array = settings.output_format_arrow_fixed_string_as_fixed_byte_array; + format_settings.arrow.output_compression_method = settings.output_format_arrow_compression_method; format_settings.orc.import_nested = settings.input_format_orc_import_nested; format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns; format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size; @@ -168,6 +169,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_orc_skip_columns_with_unsupported_types_in_schema_inference; format_settings.orc.case_insensitive_column_matching = settings.input_format_orc_case_insensitive_column_matching; format_settings.orc.output_string_as_string = settings.output_format_orc_string_as_string; + format_settings.orc.output_compression_method = settings.output_format_orc_compression_method; format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields; format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode; format_settings.capn_proto.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference; @@ -191,6 +193,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.bson.output_string_as_string = settings.output_format_bson_string_as_string; format_settings.bson.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_bson_skip_fields_with_unsupported_types_in_schema_inference; format_settings.max_binary_string_size = settings.format_binary_max_string_size; + format_settings.native.allow_types_conversion = settings.input_format_native_allow_types_conversion; format_settings.max_parser_depth = context->getSettingsRef().max_parser_depth; format_settings.client_protocol_version = context->getClientProtocolVersion(); diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index d1755a35c5f..ef6be805bea 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -86,6 +86,13 @@ struct FormatSettings UInt64 max_parser_depth = DBMS_DEFAULT_MAX_PARSER_DEPTH; + enum class ArrowCompression + { + NONE, + LZ4_FRAME, + ZSTD + }; + struct { UInt64 row_group_size = 1000000; @@ -96,6 +103,7 @@ struct FormatSettings bool case_insensitive_column_matching = false; bool output_string_as_string = false; bool output_fixed_string_as_fixed_byte_array = true; + ArrowCompression output_compression_method = ArrowCompression::NONE; } arrow; struct @@ -183,6 +191,16 @@ struct FormatSettings V2_LATEST, }; + enum class ParquetCompression + { + NONE, + SNAPPY, + ZSTD, + LZ4, + GZIP, + BROTLI, + }; + struct { UInt64 row_group_size = 1000000; @@ -193,8 +211,8 @@ struct FormatSettings std::unordered_set skip_row_groups = {}; bool output_string_as_string = false; bool output_fixed_string_as_fixed_byte_array = true; - UInt64 max_block_size = 8192; ParquetVersion output_version; + ParquetCompression output_compression_method = ParquetCompression::SNAPPY; } parquet; struct Pretty @@ -276,6 +294,15 @@ struct FormatSettings bool accurate_types_of_literals = true; } values; + enum class ORCCompression + { + NONE, + LZ4, + SNAPPY, + ZSTD, + ZLIB, + }; + struct { bool import_nested = false; @@ -285,6 +312,7 @@ struct FormatSettings bool case_insensitive_column_matching = false; std::unordered_set skip_stripes = {}; bool output_string_as_string = false; + ORCCompression output_compression_method = ORCCompression::NONE; } orc; /// For capnProto format we should determine how to @@ -335,6 +363,11 @@ struct FormatSettings bool output_string_as_string; bool skip_fields_with_unsupported_types_in_schema_inference; } bson; + + struct + { + bool allow_types_conversion = true; + } native; }; } diff --git a/src/Formats/NativeReader.cpp b/src/Formats/NativeReader.cpp index 9f8d4ba1930..eca88a41c13 100644 --- a/src/Formats/NativeReader.cpp +++ b/src/Formats/NativeReader.cpp @@ -15,6 +15,8 @@ #include #include +#include + namespace DB { @@ -39,12 +41,14 @@ NativeReader::NativeReader( UInt64 server_revision_, bool skip_unknown_columns_, bool null_as_default_, + bool allow_types_conversion_, BlockMissingValues * block_missing_values_) : istr(istr_) , header(header_) , server_revision(server_revision_) , skip_unknown_columns(skip_unknown_columns_) , null_as_default(null_as_default_) + , allow_types_conversion(allow_types_conversion_) , block_missing_values(block_missing_values_) { } @@ -204,11 +208,31 @@ Block NativeReader::read() if (null_as_default) insertNullAsDefaultIfNeeded(column, header_column, header.getPositionByName(column.name), block_missing_values); - /// Support insert from old clients without low cardinality type. if (!header_column.type->equals(*column.type)) { - column.column = recursiveTypeConversion(column.column, column.type, header.safeGetByPosition(i).type); - column.type = header.safeGetByPosition(i).type; + if (allow_types_conversion) + { + try + { + column.column = castColumn(column, header_column.type); + } + catch (Exception & e) + { + e.addMessage(fmt::format( + "while converting column \"{}\" from type {} to type {}", + column.name, + column.type->getName(), + header_column.type->getName())); + throw; + } + } + else + { + /// Support insert from old clients without low cardinality type. + column.column = recursiveLowCardinalityTypeConversion(column.column, column.type, header_column.type); + } + + column.type = header_column.type; } } else diff --git a/src/Formats/NativeReader.h b/src/Formats/NativeReader.h index 2d8b16e06eb..3cec4afd997 100644 --- a/src/Formats/NativeReader.h +++ b/src/Formats/NativeReader.h @@ -30,6 +30,7 @@ public: UInt64 server_revision_, bool skip_unknown_columns_ = false, bool null_as_default_ = false, + bool allow_types_conversion_ = false, BlockMissingValues * block_missing_values_ = nullptr); /// For cases when we have an index. It allows to skip columns. Only columns specified in the index will be read. @@ -51,6 +52,7 @@ private: UInt64 server_revision; bool skip_unknown_columns = false; bool null_as_default = false; + bool allow_types_conversion = false; BlockMissingValues * block_missing_values = nullptr; bool use_index = false; diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 00eb686385d..011860948c3 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -984,13 +984,16 @@ DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSetting if (tryReadIntText(tmp_int, buf) && buf.eof()) return std::make_shared(); + /// We can safely get back to the start of buffer, because we read from a string and we didn't reach eof. + buf.position() = buf.buffer().begin(); + /// In case of Int64 overflow, try to infer UInt64 UInt64 tmp_uint; if (tryReadIntText(tmp_uint, buf) && buf.eof()) return std::make_shared(); } - /// We cam safely get back to the start of buffer, because we read from a string and we didn't reach eof. + /// We can safely get back to the start of buffer, because we read from a string and we didn't reach eof. buf.position() = buf.buffer().begin(); Float64 tmp; diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp index 240732965f4..fa7dda82e1c 100644 --- a/src/Functions/FunctionFile.cpp +++ b/src/Functions/FunctionFile.cpp @@ -38,6 +38,8 @@ public: String getName() const override { return name; } size_t getNumberOfArguments() const override { return 0; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + bool isDeterministic() const override { return false; } + bool isDeterministicInScopeOfQuery() const override { return false; } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { diff --git a/src/Functions/URL/decodeURLComponent.cpp b/src/Functions/URL/decodeURLComponent.cpp index 7d98ccd63a0..05e3fbea3fd 100644 --- a/src/Functions/URL/decodeURLComponent.cpp +++ b/src/Functions/URL/decodeURLComponent.cpp @@ -14,28 +14,33 @@ namespace ErrorCodes static size_t encodeURL(const char * __restrict src, size_t src_size, char * __restrict dst, bool space_as_plus) { char * dst_pos = dst; - for (size_t i = 0; i < src_size; i++) + for (size_t i = 0; i < src_size; ++i) { if ((src[i] >= '0' && src[i] <= '9') || (src[i] >= 'a' && src[i] <= 'z') || (src[i] >= 'A' && src[i] <= 'Z') || src[i] == '-' || src[i] == '_' || src[i] == '.' || src[i] == '~') { - *dst_pos++ = src[i]; + *dst_pos = src[i]; + ++dst_pos; } else if (src[i] == ' ' && space_as_plus) { - *dst_pos++ = '+'; + *dst_pos = '+'; + ++dst_pos; } else { - *dst_pos++ = '%'; - *dst_pos++ = hexDigitUppercase(src[i] >> 4); - *dst_pos++ = hexDigitUppercase(src[i] & 0xf); + dst_pos[0] = '%'; + ++dst_pos; + writeHexByteUppercase(src[i], dst_pos); + dst_pos += 2; } } - *dst_pos++ = src[src_size]; + *dst_pos = 0; + ++dst_pos; return dst_pos - dst; } + /// We assume that size of the dst buf isn't less than src_size. static size_t decodeURL(const char * __restrict src, size_t src_size, char * __restrict dst, bool plus_as_space) { @@ -120,10 +125,14 @@ struct CodeURLComponentImpl ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) { if (code_strategy == encode) - //the destination(res_data) string is at most three times the length of the source string + { + /// the destination(res_data) string is at most three times the length of the source string res_data.resize(data.size() * 3); + } else + { res_data.resize(data.size()); + } size_t size = offsets.size(); res_offsets.resize(size); diff --git a/src/Functions/array/arrayReverse.cpp b/src/Functions/array/arrayReverse.cpp index 912adbadc7c..654a6c4cebf 100644 --- a/src/Functions/array/arrayReverse.cpp +++ b/src/Functions/array/arrayReverse.cpp @@ -91,6 +91,8 @@ ColumnPtr FunctionArrayReverse::executeImpl(const ColumnsWithTypeAndName & argum || executeFixedString(*src_inner_col, offsets, *res_inner_col) || executeGeneric(*src_inner_col, offsets, *res_inner_col); + chassert(bool(src_nullable_col) == bool(res_nullable_col)); + if (src_nullable_col) if (!executeNumber(src_nullable_col->getNullMapColumn(), offsets, res_nullable_col->getNullMapColumn())) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of null map of the first argument of function {}", diff --git a/src/Functions/runningDifference.h b/src/Functions/runningDifference.h index 154370d4cd9..f1ec4f9e523 100644 --- a/src/Functions/runningDifference.h +++ b/src/Functions/runningDifference.h @@ -70,7 +70,7 @@ private: if (!has_prev_value) { - dst[i] = is_first_line_zero ? 0 : src[i]; + dst[i] = is_first_line_zero ? static_cast(0) : static_cast(src[i]); prev = src[i]; has_prev_value = true; } @@ -102,6 +102,10 @@ private: f(UInt32()); else if (which.isUInt64()) f(UInt64()); + else if (which.isUInt128()) + f(UInt128()); + else if (which.isUInt256()) + f(UInt256()); else if (which.isInt8()) f(Int8()); else if (which.isInt16()) @@ -110,6 +114,10 @@ private: f(Int32()); else if (which.isInt64()) f(Int64()); + else if (which.isInt128()) + f(Int128()); + else if (which.isInt256()) + f(Int256()); else if (which.isFloat32()) f(Float32()); else if (which.isFloat64()) diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index 5c5285c5260..ed25c101d7e 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -259,12 +259,12 @@ namespace detail { try { - callWithRedirects(response, Poco::Net::HTTPRequest::HTTP_HEAD); + callWithRedirects(response, Poco::Net::HTTPRequest::HTTP_HEAD, true); break; } catch (const Poco::Exception & e) { - if (i == settings.http_max_tries - 1) + if (i == settings.http_max_tries - 1 || !isRetriableError(response.getStatus())) throw; LOG_ERROR(log, "Failed to make HTTP_HEAD request to {}. Error: {}", uri.toString(), e.displayText()); @@ -353,11 +353,12 @@ namespace detail static bool isRetriableError(const Poco::Net::HTTPResponse::HTTPStatus http_status) noexcept { - constexpr std::array non_retriable_errors{ + static constexpr std::array non_retriable_errors{ Poco::Net::HTTPResponse::HTTPStatus::HTTP_BAD_REQUEST, Poco::Net::HTTPResponse::HTTPStatus::HTTP_UNAUTHORIZED, Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND, Poco::Net::HTTPResponse::HTTPStatus::HTTP_FORBIDDEN, + Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_IMPLEMENTED, Poco::Net::HTTPResponse::HTTPStatus::HTTP_METHOD_NOT_ALLOWED}; return std::all_of( diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 78b173de6dc..590cbc9ba83 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -104,10 +103,9 @@ bool AsynchronousInsertQueue::InsertQuery::operator==(const InsertQuery & other) return query_str == other.query_str && settings == other.settings; } -AsynchronousInsertQueue::InsertData::Entry::Entry(String && bytes_, String && query_id_, MemoryTracker * user_memory_tracker_) +AsynchronousInsertQueue::InsertData::Entry::Entry(String && bytes_, String && query_id_) : bytes(std::move(bytes_)) , query_id(std::move(query_id_)) - , user_memory_tracker(user_memory_tracker_) , create_time(std::chrono::system_clock::now()) { } @@ -236,7 +234,7 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context) if (auto quota = query_context->getQuota()) quota->used(QuotaType::WRITTEN_BYTES, bytes.size()); - auto entry = std::make_shared(std::move(bytes), query_context->getCurrentQueryId(), CurrentThread::getUserMemoryTracker()); + auto entry = std::make_shared(std::move(bytes), query_context->getCurrentQueryId()); InsertQuery key{query, settings}; InsertDataPtr data_to_process; diff --git a/src/Interpreters/AsynchronousInsertQueue.h b/src/Interpreters/AsynchronousInsertQueue.h index e6b7bff8d26..23a2860364d 100644 --- a/src/Interpreters/AsynchronousInsertQueue.h +++ b/src/Interpreters/AsynchronousInsertQueue.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include @@ -60,31 +59,6 @@ private: UInt128 calculateHash() const; }; - struct UserMemoryTrackerSwitcher - { - explicit UserMemoryTrackerSwitcher(MemoryTracker * new_tracker) - { - auto * thread_tracker = CurrentThread::getMemoryTracker(); - prev_untracked_memory = current_thread->untracked_memory; - prev_memory_tracker_parent = thread_tracker->getParent(); - - current_thread->untracked_memory = 0; - thread_tracker->setParent(new_tracker); - } - - ~UserMemoryTrackerSwitcher() - { - CurrentThread::flushUntrackedMemory(); - auto * thread_tracker = CurrentThread::getMemoryTracker(); - - current_thread->untracked_memory = prev_untracked_memory; - thread_tracker->setParent(prev_memory_tracker_parent); - } - - MemoryTracker * prev_memory_tracker_parent; - Int64 prev_untracked_memory; - }; - struct InsertData { struct Entry @@ -92,10 +66,9 @@ private: public: const String bytes; const String query_id; - MemoryTracker * const user_memory_tracker; const std::chrono::time_point create_time; - Entry(String && bytes_, String && query_id_, MemoryTracker * user_memory_tracker_); + Entry(String && bytes_, String && query_id_); void finish(std::exception_ptr exception_ = nullptr); std::future getFuture() { return promise.get_future(); } @@ -106,19 +79,6 @@ private: std::atomic_bool finished = false; }; - ~InsertData() - { - auto it = entries.begin(); - // Entries must be destroyed in context of user who runs async insert. - // Each entry in the list may correspond to a different user, - // so we need to switch current thread's MemoryTracker parent on each iteration. - while (it != entries.end()) - { - UserMemoryTrackerSwitcher switcher((*it)->user_memory_tracker); - it = entries.erase(it); - } - } - using EntryPtr = std::shared_ptr; std::list entries; diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp index 8010b9c682b..c20379e1fc1 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp @@ -34,7 +34,7 @@ IFileCachePriority::WriteIterator LRUFileCachePriority::add(const Key & key, siz CurrentMetrics::add(CurrentMetrics::FilesystemCacheSize, size); CurrentMetrics::add(CurrentMetrics::FilesystemCacheElements); - LOG_TRACE(log, "Added entry into LRU queue, key: {}, offset: {}", key.toString(), offset); + LOG_TEST(log, "Added entry into LRU queue, key: {}, offset: {}", key.toString(), offset); return std::make_shared(this, iter); } @@ -54,7 +54,7 @@ void LRUFileCachePriority::removeAll(std::lock_guard &) CurrentMetrics::sub(CurrentMetrics::FilesystemCacheSize, cache_size); CurrentMetrics::sub(CurrentMetrics::FilesystemCacheElements, queue.size()); - LOG_TRACE(log, "Removed all entries from LRU queue"); + LOG_TEST(log, "Removed all entries from LRU queue"); queue.clear(); cache_size = 0; @@ -88,7 +88,7 @@ void LRUFileCachePriority::LRUFileCacheIterator::removeAndGetNext(std::lock_guar CurrentMetrics::sub(CurrentMetrics::FilesystemCacheSize, queue_iter->size); CurrentMetrics::sub(CurrentMetrics::FilesystemCacheElements); - LOG_TRACE(cache_priority->log, "Removed entry from LRU queue, key: {}, offset: {}", queue_iter->key.toString(), queue_iter->offset); + LOG_TEST(cache_priority->log, "Removed entry from LRU queue, key: {}, offset: {}", queue_iter->key.toString(), queue_iter->offset); queue_iter = cache_priority->queue.erase(queue_iter); } diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 7179ce94e0b..f70c0010585 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -169,11 +169,11 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r return {}; } - auto write_error_status = [&](const String & host_id, const String & error_message, const String & reason) + auto write_error_status = [&](const String & host_id, const ExecutionStatus & status, const String & reason) { - LOG_ERROR(log, "Cannot parse DDL task {}: {}. Will try to send error status: {}", entry_name, reason, error_message); + LOG_ERROR(log, "Cannot parse DDL task {}: {}. Will try to send error status: {}", entry_name, reason, status.message); createStatusDirs(entry_path, zookeeper); - zookeeper->tryCreate(fs::path(entry_path) / "finished" / host_id, error_message, zkutil::CreateMode::Persistent); + zookeeper->tryCreate(fs::path(entry_path) / "finished" / host_id, status.serializeText(), zkutil::CreateMode::Persistent); }; try @@ -187,7 +187,7 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r /// We can try to create fail node using FQDN if it equal to host name in cluster config attempt will be successful. /// Otherwise, that node will be ignored by DDLQueryStatusSource. out_reason = "Incorrect task format"; - write_error_status(host_fqdn_id, ExecutionStatus::fromCurrentException().serializeText(), out_reason); + write_error_status(host_fqdn_id, ExecutionStatus::fromCurrentException(), out_reason); return {}; } @@ -212,7 +212,7 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r catch (...) { out_reason = "Cannot parse query or obtain cluster info"; - write_error_status(task->host_id_str, ExecutionStatus::fromCurrentException().serializeText(), out_reason); + write_error_status(task->host_id_str, ExecutionStatus::fromCurrentException(), out_reason); return {}; } @@ -542,6 +542,7 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper) OpenTelemetry::TracingContextHolder tracing_ctx_holder(__PRETTY_FUNCTION__ , task.entry.tracing_context, this->context->getOpenTelemetrySpanLog()); + tracing_ctx_holder.root_span.kind = OpenTelemetry::CONSUMER; String active_node_path = task.getActiveNodePath(); String finished_node_path = task.getFinishedNodePath(); @@ -650,7 +651,7 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper) bool status_written_by_table_or_db = task.ops.empty(); if (status_written_by_table_or_db) { - throw Exception(ErrorCodes::UNFINISHED, "Unexpected error: {}", task.execution_status.serializeText()); + throw Exception(ErrorCodes::UNFINISHED, "Unexpected error: {}", task.execution_status.message); } else { diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 2b88ff6a353..67aace815dc 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1089,6 +1089,7 @@ static std::shared_ptr chooseJoinAlgorithm( if (MergeJoin::isSupported(analyzed_join)) return std::make_shared(analyzed_join, right_sample_block); + return std::make_shared(analyzed_join, right_sample_block); } throw Exception(ErrorCodes::NOT_IMPLEMENTED, diff --git a/src/Interpreters/FullSortingMergeJoin.h b/src/Interpreters/FullSortingMergeJoin.h index fa7d0478535..a94d7a7dfc6 100644 --- a/src/Interpreters/FullSortingMergeJoin.h +++ b/src/Interpreters/FullSortingMergeJoin.h @@ -44,6 +44,10 @@ public: const auto & on_expr = table_join->getOnlyClause(); bool support_conditions = !on_expr.on_filter_condition_left && !on_expr.on_filter_condition_right; + if (!on_expr.analyzer_left_filter_condition_column_name.empty() || + !on_expr.analyzer_right_filter_condition_column_name.empty()) + support_conditions = false; + /// Key column can change nullability and it's not handled on type conversion stage, so algorithm should be aware of it bool support_using_and_nulls = !table_join->hasUsing() || !table_join->joinUseNulls(); diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp index 0536ee10f7c..98f70c25dcd 100644 --- a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp +++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp @@ -226,6 +226,12 @@ BlockIO InterpreterSelectQueryAnalyzer::execute() return result; } +QueryPlan & InterpreterSelectQueryAnalyzer::getQueryPlan() +{ + planner.buildQueryPlanIfNeeded(); + return planner.getQueryPlan(); +} + QueryPlan && InterpreterSelectQueryAnalyzer::extractQueryPlan() && { planner.buildQueryPlanIfNeeded(); diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.h b/src/Interpreters/InterpreterSelectQueryAnalyzer.h index 681a9cfe5a3..2c8af49cf0e 100644 --- a/src/Interpreters/InterpreterSelectQueryAnalyzer.h +++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.h @@ -51,6 +51,8 @@ public: BlockIO execute() override; + QueryPlan & getQueryPlan(); + QueryPlan && extractQueryPlan() &&; QueryPipelineBuilder buildQueryPipeline(); diff --git a/src/Interpreters/OpenTelemetrySpanLog.cpp b/src/Interpreters/OpenTelemetrySpanLog.cpp index 63b8ae406a6..257c64e1df8 100644 --- a/src/Interpreters/OpenTelemetrySpanLog.cpp +++ b/src/Interpreters/OpenTelemetrySpanLog.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -20,11 +21,23 @@ namespace DB NamesAndTypesList OpenTelemetrySpanLogElement::getNamesAndTypes() { + auto span_kind_type = std::make_shared( + DataTypeEnum8::Values + { + {"INTERNAL", static_cast(OpenTelemetry::INTERNAL)}, + {"SERVER", static_cast(OpenTelemetry::SERVER)}, + {"CLIENT", static_cast(OpenTelemetry::CLIENT)}, + {"PRODUCER", static_cast(OpenTelemetry::PRODUCER)}, + {"CONSUMER", static_cast(OpenTelemetry::CONSUMER)} + } + ); + return { {"trace_id", std::make_shared()}, {"span_id", std::make_shared()}, {"parent_span_id", std::make_shared()}, {"operation_name", std::make_shared()}, + {"kind", std::move(span_kind_type)}, // DateTime64 is really unwieldy -- there is no "normal" way to convert // it to an UInt64 count of microseconds, except: // 1) reinterpretAsUInt64(reinterpretAsFixedString(date)), which just @@ -59,6 +72,7 @@ void OpenTelemetrySpanLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(span_id); columns[i++]->insert(parent_span_id); columns[i++]->insert(operation_name); + columns[i++]->insert(kind); columns[i++]->insert(start_time_us); columns[i++]->insert(finish_time_us); columns[i++]->insert(DateLUT::instance().toDayNum(finish_time_us / 1000000).toUnderType()); diff --git a/src/Interpreters/OptimizeIfChains.cpp b/src/Interpreters/OptimizeIfChains.cpp index ba4c7bcd95f..9a5f9bcb2e1 100644 --- a/src/Interpreters/OptimizeIfChains.cpp +++ b/src/Interpreters/OptimizeIfChains.cpp @@ -64,6 +64,7 @@ ASTs OptimizeIfChainsVisitor::ifChain(const ASTPtr & child) throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected AST for function 'if'"); const auto * function_args = function_node->arguments->as(); + chassert(function_args); if (!function_args || function_args->children.size() != 3) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, diff --git a/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/src/Interpreters/ReplaceQueryParameterVisitor.cpp index f271de26ca4..893c93f0950 100644 --- a/src/Interpreters/ReplaceQueryParameterVisitor.cpp +++ b/src/Interpreters/ReplaceQueryParameterVisitor.cpp @@ -50,7 +50,16 @@ void ReplaceQueryParameterVisitor::visit(ASTPtr & ast) void ReplaceQueryParameterVisitor::visitChildren(ASTPtr & ast) { for (auto & child : ast->children) + { + void * old_ptr = child.get(); visit(child); + void * new_ptr = child.get(); + + /// Some AST classes have naked pointers to children elements as members. + /// We have to replace them if the child was replaced. + if (new_ptr != old_ptr) + ast->updatePointerToChild(old_ptr, new_ptr); + } } const String & ReplaceQueryParameterVisitor::getParamValue(const String & name) @@ -89,6 +98,7 @@ void ReplaceQueryParameterVisitor::visitQueryParameter(ASTPtr & ast) literal = value; else literal = temp_column[0]; + ast = addTypeConversionToAST(std::make_shared(literal), type_name); /// Keep the original alias. diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 9d8547abcf2..78513920236 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -426,6 +426,8 @@ void SystemLog::flushImpl(const std::vector & to_flush, // we need query context to do inserts to target table with MV containing subqueries or joins auto insert_context = Context::createCopy(context); insert_context->makeQueryContext(); + /// We always want to deliver the data to the original table regardless of the MVs + insert_context->setSetting("materialized_views_ignore_errors", true); InterpreterInsertQuery interpreter(query_ptr, insert_context); BlockIO io = interpreter.execute(); diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index 60d16eda6ba..c1063e25587 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -55,7 +55,7 @@ bool isSupportedAlterType(int type) BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, ContextPtr context, const DDLQueryOnClusterParams & params) { - OpenTelemetry::SpanHolder span(__FUNCTION__); + OpenTelemetry::SpanHolder span(__FUNCTION__, OpenTelemetry::PRODUCER); if (context->getCurrentTransaction() && context->getSettingsRef().throw_on_unsupported_query_inside_transaction) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ON CLUSTER queries inside transactions are not supported"); diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 076b0c08d93..039580370c5 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -451,10 +451,24 @@ static std::tuple executeQueryImpl( /// Avoid early destruction of process_list_entry if it was not saved to `res` yet (in case of exception) ProcessList::EntryPtr process_list_entry; BlockIO res; - std::shared_ptr implicit_txn_control{}; + auto implicit_txn_control = std::make_shared(false); String query_database; String query_table; + auto execute_implicit_tcl_query = [implicit_txn_control](const ContextMutablePtr & query_context, ASTTransactionControl::QueryType tcl_type) + { + /// Unset the flag on COMMIT and ROLLBACK + SCOPE_EXIT({ if (tcl_type != ASTTransactionControl::BEGIN) *implicit_txn_control = false; }); + + ASTPtr tcl_ast = std::make_shared(tcl_type); + InterpreterTransactionControlQuery tc(tcl_ast, query_context); + tc.execute(); + + /// Set the flag after successful BIGIN + if (tcl_type == ASTTransactionControl::BEGIN) + *implicit_txn_control = true; + }; + try { if (auto txn = context->getCurrentTransaction()) @@ -674,14 +688,7 @@ static std::tuple executeQueryImpl( if (context->isGlobalContext()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot create transactions"); - /// If there is no session (which is the default for the HTTP Handler), set up one just for this as it is necessary - /// to control the transaction lifetime - if (!context->hasSessionContext()) - context->makeSessionContext(); - - auto tc = std::make_shared(ast, context); - tc->executeBegin(context->getSessionContext()); - implicit_txn_control = std::move(tc); + execute_implicit_tcl_query(context, ASTTransactionControl::BEGIN); } catch (Exception & e) { @@ -949,6 +956,7 @@ static std::tuple executeQueryImpl( log_processors_profiles = settings.log_processors_profiles, status_info_to_query_log, implicit_txn_control, + execute_implicit_tcl_query, pulling_pipeline = pipeline.pulling(), query_span](QueryPipeline & query_pipeline) mutable { @@ -1062,21 +1070,8 @@ static std::tuple executeQueryImpl( } } - if (implicit_txn_control) - { - try - { - implicit_txn_control->executeCommit(context->getSessionContext()); - implicit_txn_control.reset(); - } - catch (const Exception &) - { - /// An exception might happen when trying to commit the transaction. For example we might get an immediate exception - /// because ZK is down and wait_changes_become_visible_after_commit_mode == WAIT_UNKNOWN - implicit_txn_control.reset(); - throw; - } - } + if (*implicit_txn_control) + execute_implicit_tcl_query(context, ASTTransactionControl::COMMIT); } if (query_span) @@ -1104,13 +1099,11 @@ static std::tuple executeQueryImpl( quota(quota), status_info_to_query_log, implicit_txn_control, + execute_implicit_tcl_query, query_span](bool log_error) mutable { - if (implicit_txn_control) - { - implicit_txn_control->executeRollback(context->getSessionContext()); - implicit_txn_control.reset(); - } + if (*implicit_txn_control) + execute_implicit_tcl_query(context, ASTTransactionControl::ROLLBACK); else if (auto txn = context->getCurrentTransaction()) txn->onException(); @@ -1179,15 +1172,10 @@ static std::tuple executeQueryImpl( } catch (...) { - if (implicit_txn_control) - { - implicit_txn_control->executeRollback(context->getSessionContext()); - implicit_txn_control.reset(); - } + if (*implicit_txn_control) + execute_implicit_tcl_query(context, ASTTransactionControl::ROLLBACK); else if (auto txn = context->getCurrentTransaction()) - { txn->onException(); - } if (!internal) onExceptionBeforeStart(query_for_logging, context, ast, query_span, start_watch.elapsedMilliseconds()); diff --git a/src/Interpreters/loadMetadata.cpp b/src/Interpreters/loadMetadata.cpp index 837d0f1789d..47ccff57419 100644 --- a/src/Interpreters/loadMetadata.cpp +++ b/src/Interpreters/loadMetadata.cpp @@ -22,6 +22,8 @@ #include #include +#define ORDINARY_TO_ATOMIC_PREFIX ".tmp_convert." + namespace fs = std::filesystem; namespace DB @@ -117,6 +119,37 @@ static void checkUnsupportedVersion(ContextMutablePtr context, const String & da "If so, you should upgrade through intermediate version.", database_name); } +static void checkIncompleteOrdinaryToAtomicConversion(ContextPtr context, const std::map & databases) +{ + if (context->getConfigRef().has("allow_reserved_database_name_tmp_convert")) + return; + + auto convert_flag_path = fs::path(context->getFlagsPath()) / "convert_ordinary_to_atomic"; + if (!fs::exists(convert_flag_path)) + return; + + /// Flag exists. Let's check if we had an unsuccessful conversion attempt previously + for (const auto & db : databases) + { + if (!db.first.starts_with(ORDINARY_TO_ATOMIC_PREFIX)) + continue; + size_t last_dot = db.first.rfind('.'); + if (last_dot <= strlen(ORDINARY_TO_ATOMIC_PREFIX)) + continue; + + String actual_name = db.first.substr(strlen(ORDINARY_TO_ATOMIC_PREFIX), last_dot - strlen(ORDINARY_TO_ATOMIC_PREFIX)); + + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Found a database with special name: {}. " + "Most likely it indicates that conversion of database {} from Ordinary to Atomic " + "was interrupted or failed in the middle. You can add to config.xml " + "or remove convert_ordinary_to_atomic file from flags/ directory, so the server will start forcefully. " + "After starting the server, you can finish conversion manually by moving rest of the tables from {} to {} " + "(using RENAME TABLE) and executing DROP DATABASE {} and RENAME DATABASE {} TO {}", + backQuote(db.first), backQuote(actual_name), backQuote(actual_name), backQuote(db.first), + backQuote(actual_name), backQuote(db.first), backQuote(actual_name)); + } +} + void loadMetadata(ContextMutablePtr context, const String & default_database_name) { Poco::Logger * log = &Poco::Logger::get("loadMetadata"); @@ -168,6 +201,8 @@ void loadMetadata(ContextMutablePtr context, const String & default_database_nam } } + checkIncompleteOrdinaryToAtomicConversion(context, databases); + /// clickhouse-local creates DatabaseMemory as default database by itself /// For clickhouse-server we need create default database bool create_default_db_if_not_exists = !default_database_name.empty(); @@ -324,7 +359,7 @@ static void maybeConvertOrdinaryDatabaseToAtomic(ContextMutablePtr context, cons database_name, fmt::join(permanently_detached_tables, ", ")); } - String tmp_name = fmt::format(".tmp_convert.{}.{}", database_name, thread_local_rng()); + String tmp_name = fmt::format(ORDINARY_TO_ATOMIC_PREFIX"{}.{}", database_name, thread_local_rng()); try { @@ -415,11 +450,13 @@ void convertDatabasesEnginesIfNeed(ContextMutablePtr context) LOG_INFO(&Poco::Logger::get("loadMetadata"), "Found convert_ordinary_to_atomic file in flags directory, " "will try to convert all Ordinary databases to Atomic"); - fs::remove(convert_flag_path); for (const auto & [name, _] : DatabaseCatalog::instance().getDatabases()) if (name != DatabaseCatalog::SYSTEM_DATABASE) maybeConvertOrdinaryDatabaseToAtomic(context, name, /* tables_started */ true); + + LOG_INFO(&Poco::Logger::get("loadMetadata"), "Conversion finished, removing convert_ordinary_to_atomic flag"); + fs::remove(convert_flag_path); } void startupSystemTables() diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h index 2a48f5bbd9e..1400113fa9c 100644 --- a/src/Parsers/ASTAlterQuery.h +++ b/src/Parsers/ASTAlterQuery.h @@ -256,6 +256,11 @@ protected: void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; bool isOneCommandTypeOnly(const ASTAlterCommand::Type & type) const; + + void forEachPointerToChild(std::function f) override + { + f(reinterpret_cast(&command_list)); + } }; } diff --git a/src/Parsers/ASTBackupQuery.h b/src/Parsers/ASTBackupQuery.h index a3e3a144c72..0201c2b14f9 100644 --- a/src/Parsers/ASTBackupQuery.h +++ b/src/Parsers/ASTBackupQuery.h @@ -94,5 +94,12 @@ public: void formatImpl(const FormatSettings & format, FormatState &, FormatStateStacked) const override; ASTPtr getRewrittenASTWithoutOnCluster(const WithoutOnClusterASTRewriteParams &) const override; QueryKind getQueryKind() const override; + + void forEachPointerToChild(std::function f) override + { + f(reinterpret_cast(&backup_name)); + f(reinterpret_cast(&base_backup_name)); + } }; + } diff --git a/src/Parsers/ASTConstraintDeclaration.h b/src/Parsers/ASTConstraintDeclaration.h index 437aab1a82d..f48d7ef77fe 100644 --- a/src/Parsers/ASTConstraintDeclaration.h +++ b/src/Parsers/ASTConstraintDeclaration.h @@ -25,5 +25,11 @@ public: ASTPtr clone() const override; void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override; + + void forEachPointerToChild(std::function f) override + { + f(reinterpret_cast(&expr)); + } }; + } diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index 955ce62b0f7..e28e863c21f 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -91,6 +91,11 @@ public: ASTPtr clone() const override; void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override; + + void forEachPointerToChild(std::function f) override + { + f(reinterpret_cast(&elem)); + } }; ASTPtr ASTColumnsElement::clone() const diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h index 90a15e09369..230996f610e 100644 --- a/src/Parsers/ASTCreateQuery.h +++ b/src/Parsers/ASTCreateQuery.h @@ -32,6 +32,17 @@ public: void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override; bool isExtendedStorageDefinition() const; + + void forEachPointerToChild(std::function f) override + { + f(reinterpret_cast(&engine)); + f(reinterpret_cast(&partition_by)); + f(reinterpret_cast(&primary_key)); + f(reinterpret_cast(&order_by)); + f(reinterpret_cast(&sample_by)); + f(reinterpret_cast(&ttl_table)); + f(reinterpret_cast(&settings)); + } }; @@ -57,6 +68,16 @@ public: return (!columns || columns->children.empty()) && (!indices || indices->children.empty()) && (!constraints || constraints->children.empty()) && (!projections || projections->children.empty()); } + + void forEachPointerToChild(std::function f) override + { + f(reinterpret_cast(&columns)); + f(reinterpret_cast(&indices)); + f(reinterpret_cast(&primary_key)); + f(reinterpret_cast(&constraints)); + f(reinterpret_cast(&projections)); + f(reinterpret_cast(&primary_key)); + } }; @@ -126,6 +147,19 @@ public: protected: void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; + + void forEachPointerToChild(std::function f) override + { + f(reinterpret_cast(&columns_list)); + f(reinterpret_cast(&inner_storage)); + f(reinterpret_cast(&storage)); + f(reinterpret_cast(&as_table_function)); + f(reinterpret_cast(&select)); + f(reinterpret_cast(&comment)); + f(reinterpret_cast(&table_overrides)); + f(reinterpret_cast(&dictionary_attributes_list)); + f(reinterpret_cast(&dictionary)); + } }; } diff --git a/src/Parsers/ASTDictionary.h b/src/Parsers/ASTDictionary.h index 3611621b8ad..8c332247d52 100644 --- a/src/Parsers/ASTDictionary.h +++ b/src/Parsers/ASTDictionary.h @@ -47,6 +47,11 @@ public: ASTPtr clone() const override; void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; + + void forEachPointerToChild(std::function f) override + { + f(reinterpret_cast(¶meters)); + } }; diff --git a/src/Parsers/ASTExternalDDLQuery.h b/src/Parsers/ASTExternalDDLQuery.h index 7913d44b970..96600b07f29 100644 --- a/src/Parsers/ASTExternalDDLQuery.h +++ b/src/Parsers/ASTExternalDDLQuery.h @@ -41,6 +41,11 @@ public: } QueryKind getQueryKind() const override { return QueryKind::ExternalDDL; } + + void forEachPointerToChild(std::function f) override + { + f(reinterpret_cast(&from)); + } }; } diff --git a/src/Parsers/ASTFunctionWithKeyValueArguments.h b/src/Parsers/ASTFunctionWithKeyValueArguments.h index 67d591dfcdc..75a8ae0415e 100644 --- a/src/Parsers/ASTFunctionWithKeyValueArguments.h +++ b/src/Parsers/ASTFunctionWithKeyValueArguments.h @@ -33,6 +33,11 @@ public: bool hasSecretParts() const override; void updateTreeHashImpl(SipHash & hash_state) const override; + + void forEachPointerToChild(std::function f) override + { + f(reinterpret_cast(&second)); + } }; diff --git a/src/Parsers/ASTIndexDeclaration.h b/src/Parsers/ASTIndexDeclaration.h index e22c1da4489..bd52a611f3f 100644 --- a/src/Parsers/ASTIndexDeclaration.h +++ b/src/Parsers/ASTIndexDeclaration.h @@ -23,6 +23,12 @@ public: ASTPtr clone() const override; void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override; + + void forEachPointerToChild(std::function f) override + { + f(reinterpret_cast(&expr)); + f(reinterpret_cast(&type)); + } }; } diff --git a/src/Parsers/ASTProjectionDeclaration.h b/src/Parsers/ASTProjectionDeclaration.h index 53c681c3ec1..df7a7c832a6 100644 --- a/src/Parsers/ASTProjectionDeclaration.h +++ b/src/Parsers/ASTProjectionDeclaration.h @@ -18,6 +18,11 @@ public: ASTPtr clone() const override; void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override; + + void forEachPointerToChild(std::function f) override + { + f(reinterpret_cast(&query)); + } }; } diff --git a/src/Parsers/ASTTableOverrides.h b/src/Parsers/ASTTableOverrides.h index c47260789d8..1df267acaa9 100644 --- a/src/Parsers/ASTTableOverrides.h +++ b/src/Parsers/ASTTableOverrides.h @@ -27,6 +27,12 @@ public: String getID(char) const override { return "TableOverride " + table_name; } ASTPtr clone() const override; void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; + + void forEachPointerToChild(std::function f) override + { + f(reinterpret_cast(&columns)); + f(reinterpret_cast(&storage)); + } }; /// List of table overrides, for example: diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index 627b1174b33..5928506aa5b 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -175,6 +175,16 @@ public: field = nullptr; } + /// After changing one of `children` elements, update the corresponding member pointer if needed. + void updatePointerToChild(void * old_ptr, void * new_ptr) + { + forEachPointerToChild([old_ptr, new_ptr](void ** ptr) mutable + { + if (*ptr == old_ptr) + *ptr = new_ptr; + }); + } + /// Convert to a string. /// Format settings. @@ -295,6 +305,10 @@ public: protected: bool childrenHaveSecretParts() const; + /// Some AST classes have naked pointers to children elements as members. + /// This method allows to iterate over them. + virtual void forEachPointerToChild(std::function) {} + private: size_t checkDepthImpl(size_t max_depth) const; diff --git a/src/Parsers/MySQL/ASTAlterCommand.h b/src/Parsers/MySQL/ASTAlterCommand.h index f097ed71219..87b665ec6a5 100644 --- a/src/Parsers/MySQL/ASTAlterCommand.h +++ b/src/Parsers/MySQL/ASTAlterCommand.h @@ -80,6 +80,15 @@ protected: { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported by MySQLParser::ASTAlterCommand."); } + + void forEachPointerToChild(std::function f) override + { + f(reinterpret_cast(&index_decl)); + f(reinterpret_cast(&default_expression)); + f(reinterpret_cast(&additional_columns)); + f(reinterpret_cast(&order_by_columns)); + f(reinterpret_cast(&properties)); + } }; class ParserAlterCommand : public IParserBase diff --git a/src/Parsers/MySQL/ASTCreateDefines.h b/src/Parsers/MySQL/ASTCreateDefines.h index 3d2a79568ab..7c23d1cb87f 100644 --- a/src/Parsers/MySQL/ASTCreateDefines.h +++ b/src/Parsers/MySQL/ASTCreateDefines.h @@ -31,6 +31,13 @@ protected: { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported by MySQLParser::ASTCreateDefines."); } + + void forEachPointerToChild(std::function f) override + { + f(reinterpret_cast(&columns)); + f(reinterpret_cast(&indices)); + f(reinterpret_cast(&constraints)); + } }; class ParserCreateDefines : public IParserBase @@ -44,4 +51,3 @@ protected: } } - diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 2ce470d9ecf..37a4614bad3 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -214,9 +214,14 @@ public: { /// Constness of limit is validated during query analysis stage limit_length = query_node.getLimit()->as().getValue().safeGet(); - } - if (query_node.hasOffset()) + if (query_node.hasOffset() && limit_length) + { + /// Constness of offset is validated during query analysis stage + limit_offset = query_node.getOffset()->as().getValue().safeGet(); + } + } + else if (query_node.hasOffset()) { /// Constness of offset is validated during query analysis stage limit_offset = query_node.getOffset()->as().getValue().safeGet(); diff --git a/src/Planner/PlannerContext.cpp b/src/Planner/PlannerContext.cpp index 9f4a489bf5f..59ae0f20fac 100644 --- a/src/Planner/PlannerContext.cpp +++ b/src/Planner/PlannerContext.cpp @@ -45,7 +45,7 @@ bool GlobalPlannerContext::hasColumnIdentifier(const ColumnIdentifier & column_i return column_identifiers.contains(column_identifier); } -PlannerContext::PlannerContext(ContextPtr query_context_, GlobalPlannerContextPtr global_planner_context_) +PlannerContext::PlannerContext(ContextMutablePtr query_context_, GlobalPlannerContextPtr global_planner_context_) : query_context(std::move(query_context_)) , global_planner_context(std::move(global_planner_context_)) {} diff --git a/src/Planner/PlannerContext.h b/src/Planner/PlannerContext.h index 63874bf7ab9..e47198bfe5f 100644 --- a/src/Planner/PlannerContext.h +++ b/src/Planner/PlannerContext.h @@ -88,16 +88,22 @@ class PlannerContext { public: /// Create planner context with query context and global planner context - PlannerContext(ContextPtr query_context_, GlobalPlannerContextPtr global_planner_context_); + PlannerContext(ContextMutablePtr query_context_, GlobalPlannerContextPtr global_planner_context_); /// Get planner context query context - const ContextPtr & getQueryContext() const + ContextPtr getQueryContext() const { return query_context; } - /// Get planner context query context - ContextPtr & getQueryContext() + /// Get planner context mutable query context + const ContextMutablePtr & getMutableQueryContext() const + { + return query_context; + } + + /// Get planner context mutable query context + ContextMutablePtr & getMutableQueryContext() { return query_context; } @@ -137,12 +143,18 @@ public: */ TableExpressionData * getTableExpressionDataOrNull(const QueryTreeNodePtr & table_expression_node); - /// Get table expression node to data read only map + /// Get table expression node to data map const std::unordered_map & getTableExpressionNodeToData() const { return table_expression_node_to_data; } + /// Get table expression node to data map + std::unordered_map & getTableExpressionNodeToData() + { + return table_expression_node_to_data; + } + /** Get column node identifier. * For column node source check if table expression data is registered. * If table expression data is not registered exception is thrown. @@ -184,7 +196,7 @@ public: private: /// Query context - ContextPtr query_context; + ContextMutablePtr query_context; /// Global planner context GlobalPlannerContextPtr global_planner_context; diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp index 9a7340f936c..11444503c5f 100644 --- a/src/Planner/PlannerExpressionAnalysis.cpp +++ b/src/Planner/PlannerExpressionAnalysis.cpp @@ -34,15 +34,13 @@ namespace * It is client responsibility to update filter analysis result if filter column must be removed after chain is finalized. */ FilterAnalysisResult analyzeFilter(const QueryTreeNodePtr & filter_expression_node, - const ColumnsWithTypeAndName & current_output_columns, + const ColumnsWithTypeAndName & input_columns, const PlannerContextPtr & planner_context, ActionsChain & actions_chain) { - const auto & filter_input = current_output_columns; - FilterAnalysisResult result; - result.filter_actions = buildActionsDAGFromExpressionNode(filter_expression_node, filter_input, planner_context); + result.filter_actions = buildActionsDAGFromExpressionNode(filter_expression_node, input_columns, planner_context); result.filter_column_name = result.filter_actions->getOutputs().at(0)->result_name; actions_chain.addStep(std::make_unique(result.filter_actions)); @@ -52,8 +50,8 @@ FilterAnalysisResult analyzeFilter(const QueryTreeNodePtr & filter_expression_no /** Construct aggregation analysis result if query tree has GROUP BY or aggregates. * Actions before aggregation are added into actions chain, if result is not null optional. */ -std::pair, std::optional> analyzeAggregation(const QueryTreeNodePtr & query_tree, - const ColumnsWithTypeAndName & current_output_columns, +std::optional analyzeAggregation(const QueryTreeNodePtr & query_tree, + const ColumnsWithTypeAndName & input_columns, const PlannerContextPtr & planner_context, ActionsChain & actions_chain) { @@ -69,9 +67,7 @@ std::pair, std::optional(group_by_input); + ActionsDAGPtr before_aggregation_actions = std::make_shared(input_columns); before_aggregation_actions->getOutputs().clear(); std::unordered_set before_aggregation_actions_output_node_names; @@ -203,14 +199,14 @@ std::pair, std::optional analyzeWindow(const QueryTreeNodePtr & query_tree, - const ColumnsWithTypeAndName & current_output_columns, + const ColumnsWithTypeAndName & input_columns, const PlannerContextPtr & planner_context, ActionsChain & actions_chain) { @@ -220,11 +216,9 @@ std::optional analyzeWindow(const QueryTreeNodePtr & query auto window_descriptions = extractWindowDescriptions(window_function_nodes, *planner_context); - const auto & window_input = current_output_columns; - PlannerActionsVisitor actions_visitor(planner_context); - ActionsDAGPtr before_window_actions = std::make_shared(window_input); + ActionsDAGPtr before_window_actions = std::make_shared(input_columns); before_window_actions->getOutputs().clear(); std::unordered_set before_window_actions_output_node_names; @@ -299,12 +293,11 @@ std::optional analyzeWindow(const QueryTreeNodePtr & query * It is client responsibility to update projection analysis result with project names actions after chain is finalized. */ ProjectionAnalysisResult analyzeProjection(const QueryNode & query_node, - const ColumnsWithTypeAndName & current_output_columns, + const ColumnsWithTypeAndName & input_columns, const PlannerContextPtr & planner_context, ActionsChain & actions_chain) { - const auto & projection_input = current_output_columns; - auto projection_actions = buildActionsDAGFromExpressionNode(query_node.getProjectionNode(), projection_input, planner_context); + auto projection_actions = buildActionsDAGFromExpressionNode(query_node.getProjectionNode(), input_columns, planner_context); auto projection_columns = query_node.getProjectionColumns(); size_t projection_columns_size = projection_columns.size(); @@ -347,13 +340,11 @@ ProjectionAnalysisResult analyzeProjection(const QueryNode & query_node, * Actions before sort are added into actions chain. */ SortAnalysisResult analyzeSort(const QueryNode & query_node, - const ColumnsWithTypeAndName & current_output_columns, + const ColumnsWithTypeAndName & input_columns, const PlannerContextPtr & planner_context, ActionsChain & actions_chain) { - const auto & order_by_input = current_output_columns; - - ActionsDAGPtr before_sort_actions = std::make_shared(order_by_input); + ActionsDAGPtr before_sort_actions = std::make_shared(input_columns); auto & before_sort_actions_outputs = before_sort_actions->getOutputs(); before_sort_actions_outputs.clear(); @@ -436,13 +427,12 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node, * Actions before limit by are added into actions chain. */ LimitByAnalysisResult analyzeLimitBy(const QueryNode & query_node, - const ColumnsWithTypeAndName & current_output_columns, + const ColumnsWithTypeAndName & input_columns, const PlannerContextPtr & planner_context, const NameSet & required_output_nodes_names, ActionsChain & actions_chain) { - const auto & limit_by_input = current_output_columns; - auto before_limit_by_actions = buildActionsDAGFromExpressionNode(query_node.getLimitByNode(), limit_by_input, planner_context); + auto before_limit_by_actions = buildActionsDAGFromExpressionNode(query_node.getLimitByNode(), input_columns, planner_context); NameSet limit_by_column_names_set; Names limit_by_column_names; @@ -480,8 +470,7 @@ PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(const QueryTreeNo std::optional where_analysis_result_optional; std::optional where_action_step_index_optional; - const auto * input_columns = actions_chain.getLastStepAvailableOutputColumnsOrNull(); - ColumnsWithTypeAndName current_output_columns = input_columns ? *input_columns : join_tree_input_columns; + ColumnsWithTypeAndName current_output_columns = join_tree_input_columns; if (query_node.hasWhere()) { @@ -490,9 +479,9 @@ PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(const QueryTreeNo current_output_columns = actions_chain.getLastStepAvailableOutputColumns(); } - auto [aggregation_analysis_result_optional, aggregated_columns_optional] = analyzeAggregation(query_tree, current_output_columns, planner_context, actions_chain); - if (aggregated_columns_optional) - current_output_columns = std::move(*aggregated_columns_optional); + auto aggregation_analysis_result_optional = analyzeAggregation(query_tree, current_output_columns, planner_context, actions_chain); + if (aggregation_analysis_result_optional) + current_output_columns = actions_chain.getLastStepAvailableOutputColumns(); std::optional having_analysis_result_optional; std::optional having_action_step_index_optional; diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 6f818e2c8f7..a48cceebfb6 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -246,17 +246,87 @@ bool applyTrivialCountIfPossible( return true; } -JoinTreeQueryPlan buildQueryPlanForTableExpression(const QueryTreeNodePtr & table_expression, - const SelectQueryInfo & select_query_info, - const SelectQueryOptions & select_query_options, - PlannerContextPtr & planner_context, - bool is_single_table_expression) +void prepareBuildQueryPlanForTableExpression(const QueryTreeNodePtr & table_expression, PlannerContextPtr & planner_context) { const auto & query_context = planner_context->getQueryContext(); const auto & settings = query_context->getSettingsRef(); + auto & table_expression_data = planner_context->getTableExpressionDataOrThrow(table_expression); + auto columns_names = table_expression_data.getColumnNames(); + + auto * table_node = table_expression->as(); + auto * table_function_node = table_expression->as(); + auto * query_node = table_expression->as(); + auto * union_node = table_expression->as(); + + /** The current user must have the SELECT privilege. + * We do not check access rights for table functions because they have been already checked in ITableFunction::execute(). + */ + if (table_node) + { + auto column_names_with_aliases = columns_names; + const auto & alias_columns_names = table_expression_data.getAliasColumnsNames(); + column_names_with_aliases.insert(column_names_with_aliases.end(), alias_columns_names.begin(), alias_columns_names.end()); + checkAccessRights(*table_node, column_names_with_aliases, query_context); + } + + if (columns_names.empty()) + { + NameAndTypePair additional_column_to_read; + + if (table_node || table_function_node) + { + const auto & storage = table_node ? table_node->getStorage() : table_function_node->getStorage(); + const auto & storage_snapshot = table_node ? table_node->getStorageSnapshot() : table_function_node->getStorageSnapshot(); + additional_column_to_read = chooseSmallestColumnToReadFromStorage(storage, storage_snapshot); + + } + else if (query_node || union_node) + { + const auto & projection_columns = query_node ? query_node->getProjectionColumns() : union_node->computeProjectionColumns(); + NamesAndTypesList projection_columns_list(projection_columns.begin(), projection_columns.end()); + additional_column_to_read = ExpressionActions::getSmallestColumn(projection_columns_list); + } + else + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected table, table function, query or union. Actual {}", + table_expression->formatASTForErrorMessage()); + } + + auto & global_planner_context = planner_context->getGlobalPlannerContext(); + const auto & column_identifier = global_planner_context->createColumnIdentifier(additional_column_to_read, table_expression); + columns_names.push_back(additional_column_to_read.name); + table_expression_data.addColumn(additional_column_to_read, column_identifier); + } + + /// Limitation on the number of columns to read + if (settings.max_columns_to_read && columns_names.size() > settings.max_columns_to_read) + throw Exception(ErrorCodes::TOO_MANY_COLUMNS, + "Limit for number of columns to read exceeded. Requested: {}, maximum: {}", + columns_names.size(), + settings.max_columns_to_read); +} + +JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression, + const SelectQueryInfo & select_query_info, + const SelectQueryOptions & select_query_options, + PlannerContextPtr & planner_context, + bool is_single_table_expression, + bool wrap_read_columns_in_subquery) +{ + const auto & query_context = planner_context->getQueryContext(); + const auto & settings = query_context->getSettingsRef(); + + auto & table_expression_data = planner_context->getTableExpressionDataOrThrow(table_expression); + QueryProcessingStage::Enum from_stage = QueryProcessingStage::Enum::FetchColumns; + if (wrap_read_columns_in_subquery) + { + auto columns = table_expression_data.getColumns(); + table_expression = buildSubqueryToReadColumnsFromTableExpression(columns, table_expression, query_context); + } + auto * table_node = table_expression->as(); auto * table_function_node = table_expression->as(); auto * query_node = table_expression->as(); @@ -264,8 +334,6 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(const QueryTreeNodePtr & tabl QueryPlan query_plan; - auto & table_expression_data = planner_context->getTableExpressionDataOrThrow(table_expression); - if (table_node || table_function_node) { const auto & storage = table_node ? table_node->getStorage() : table_function_node->getStorage(); @@ -362,32 +430,6 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(const QueryTreeNodePtr & tabl auto columns_names = table_expression_data.getColumnNames(); - /** The current user must have the SELECT privilege. - * We do not check access rights for table functions because they have been already checked in ITableFunction::execute(). - */ - if (table_node) - { - auto column_names_with_aliases = columns_names; - const auto & alias_columns_names = table_expression_data.getAliasColumnsNames(); - column_names_with_aliases.insert(column_names_with_aliases.end(), alias_columns_names.begin(), alias_columns_names.end()); - checkAccessRights(*table_node, column_names_with_aliases, planner_context->getQueryContext()); - } - - /// Limitation on the number of columns to read - if (settings.max_columns_to_read && columns_names.size() > settings.max_columns_to_read) - throw Exception(ErrorCodes::TOO_MANY_COLUMNS, - "Limit for number of columns to read exceeded. Requested: {}, maximum: {}", - columns_names.size(), - settings.max_columns_to_read); - - if (columns_names.empty()) - { - auto additional_column_to_read = chooseSmallestColumnToReadFromStorage(storage, storage_snapshot); - const auto & column_identifier = planner_context->getGlobalPlannerContext()->createColumnIdentifier(additional_column_to_read, table_expression); - columns_names.push_back(additional_column_to_read.name); - table_expression_data.addColumn(additional_column_to_read, column_identifier); - } - bool need_rewrite_query_with_final = storage->needRewriteQueryWithFinal(columns_names); if (need_rewrite_query_with_final) { @@ -423,6 +465,17 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(const QueryTreeNodePtr & tabl { from_stage = storage->getQueryProcessingStage(query_context, select_query_options.to_stage, storage_snapshot, table_expression_query_info); storage->read(query_plan, columns_names, storage_snapshot, table_expression_query_info, query_context, from_stage, max_block_size, max_streams); + + if (query_context->hasQueryContext() && !select_query_options.is_internal) + { + auto local_storage_id = storage->getStorageID(); + query_context->getQueryContext()->addQueryAccessInfo( + backQuoteIfNeed(local_storage_id.getDatabaseName()), + local_storage_id.getFullTableName(), + columns_names, + {}, + {}); + } } if (query_plan.isInitialized()) @@ -464,16 +517,6 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(const QueryTreeNodePtr & tabl } else { - if (table_expression_data.getColumnNames().empty()) - { - const auto & projection_columns = query_node ? query_node->getProjectionColumns() : union_node->computeProjectionColumns(); - NamesAndTypesList projection_columns_list(projection_columns.begin(), projection_columns.end()); - auto additional_column_to_read = ExpressionActions::getSmallestColumn(projection_columns_list); - - const auto & column_identifier = planner_context->getGlobalPlannerContext()->createColumnIdentifier(additional_column_to_read, table_expression); - table_expression_data.addColumn(additional_column_to_read, column_identifier); - } - auto subquery_options = select_query_options.subquery(); Planner subquery_planner(table_expression, subquery_options, planner_context->getGlobalPlannerContext()); /// Propagate storage limits to subquery @@ -516,10 +559,11 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(const QueryTreeNodePtr & tabl planner.buildQueryPlanIfNeeded(); auto expected_header = planner.getQueryPlan().getCurrentDataStream().header; - materializeBlockInplace(expected_header); if (!blocksHaveEqualStructure(query_plan.getCurrentDataStream().header, expected_header)) { + materializeBlockInplace(expected_header); + auto rename_actions_dag = ActionsDAG::makeConvertingActions( query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(), expected_header.getColumnsWithTypeAndName(), @@ -1059,14 +1103,40 @@ JoinTreeQueryPlan buildJoinTreeQueryPlan(const QueryTreeNodePtr & query_node, const ColumnIdentifierSet & outer_scope_columns, PlannerContextPtr & planner_context) { - const auto & query_node_typed = query_node->as(); - auto table_expressions_stack = buildTableExpressionsStack(query_node_typed.getJoinTree()); + auto table_expressions_stack = buildTableExpressionsStack(query_node->as().getJoinTree()); size_t table_expressions_stack_size = table_expressions_stack.size(); bool is_single_table_expression = table_expressions_stack_size == 1; std::vector table_expressions_outer_scope_columns(table_expressions_stack_size); ColumnIdentifierSet current_outer_scope_columns = outer_scope_columns; + /// For each table, table function, query, union table expressions prepare before query plan build + for (size_t i = 0; i < table_expressions_stack_size; ++i) + { + const auto & table_expression = table_expressions_stack[i]; + auto table_expression_type = table_expression->getNodeType(); + if (table_expression_type == QueryTreeNodeType::JOIN || + table_expression_type == QueryTreeNodeType::ARRAY_JOIN) + continue; + + prepareBuildQueryPlanForTableExpression(table_expression, planner_context); + } + + /** If left most table expression query plan is planned to stage that is not equal to fetch columns, + * then left most table expression is responsible for providing valid JOIN TREE part of final query plan. + * + * Examples: Distributed, LiveView, Merge storages. + */ + auto left_table_expression = table_expressions_stack.front(); + auto left_table_expression_query_plan = buildQueryPlanForTableExpression(left_table_expression, + select_query_info, + select_query_options, + planner_context, + is_single_table_expression, + false /*wrap_read_columns_in_subquery*/); + if (left_table_expression_query_plan.from_stage != QueryProcessingStage::FetchColumns) + return left_table_expression_query_plan; + for (Int64 i = static_cast(table_expressions_stack_size) - 1; i >= 0; --i) { table_expressions_outer_scope_columns[i] = current_outer_scope_columns; @@ -1120,19 +1190,23 @@ JoinTreeQueryPlan buildJoinTreeQueryPlan(const QueryTreeNodePtr & query_node, } else { - const auto & table_expression_data = planner_context->getTableExpressionDataOrThrow(table_expression); - if (table_expression_data.isRemote() && i != 0) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, - "JOIN with multiple remote storages is unsupported"); + if (table_expression == left_table_expression) + { + query_plans_stack.push_back(std::move(left_table_expression_query_plan)); /// NOLINT + left_table_expression = {}; + continue; + } + /** If table expression is remote and it is not left most table expression, we wrap read columns from such + * table expression in subquery. + */ + bool is_remote = planner_context->getTableExpressionDataOrThrow(table_expression).isRemote(); query_plans_stack.push_back(buildQueryPlanForTableExpression(table_expression, select_query_info, select_query_options, planner_context, - is_single_table_expression)); - - if (query_plans_stack.back().from_stage != QueryProcessingStage::FetchColumns) - break; + is_single_table_expression, + is_remote /*wrap_read_columns_in_subquery*/)); } } diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index e1c137ddfb8..63fe3cc7b55 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -61,6 +62,8 @@ void JoinClause::dump(WriteBuffer & buffer) const for (const auto & dag_node : dag_nodes) { dag_nodes_dump += dag_node->result_name; + dag_nodes_dump += " "; + dag_nodes_dump += dag_node->result_type->getName(); dag_nodes_dump += ", "; } @@ -655,7 +658,7 @@ std::shared_ptr chooseJoinAlgorithm(std::shared_ptr & table_jo return std::make_shared(table_join, right_table_expression_header); } - if (!table_join->oneDisjunct() && !table_join->isEnabledAlgorithm(JoinAlgorithm::HASH)) + if (!table_join->oneDisjunct() && !table_join->isEnabledAlgorithm(JoinAlgorithm::HASH) && !table_join->isEnabledAlgorithm(JoinAlgorithm::AUTO)) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Only `hash` join supports multiple ORs for keys in JOIN ON section"); /// Direct JOIN with special storages that support key value access. For example JOIN with Dictionary @@ -708,7 +711,11 @@ std::shared_ptr chooseJoinAlgorithm(std::shared_ptr & table_jo } if (table_join->isEnabledAlgorithm(JoinAlgorithm::AUTO)) - return std::make_shared(table_join, right_table_expression_header); + { + if (MergeJoin::isSupported(table_join)) + return std::make_shared(table_join, right_table_expression_header); + return std::make_shared(table_join, right_table_expression_header); + } throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Can't execute any of specified algorithms for specified strictness/kind and right storage type"); diff --git a/src/Planner/TableExpressionData.h b/src/Planner/TableExpressionData.h index e828f128e38..0f74e671ac7 100644 --- a/src/Planner/TableExpressionData.h +++ b/src/Planner/TableExpressionData.h @@ -101,6 +101,17 @@ public: return column_names; } + NamesAndTypes getColumns() const + { + NamesAndTypes result; + result.reserve(column_names.size()); + + for (const auto & column_name : column_names) + result.push_back(column_name_to_column.at(column_name)); + + return result; + } + ColumnIdentifiers getColumnIdentifiers() const { ColumnIdentifiers result; diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index 5c5eadac55d..2018ddafcdd 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -19,6 +20,7 @@ #include #include +#include #include #include #include @@ -341,27 +343,6 @@ QueryTreeNodePtr mergeConditionNodes(const QueryTreeNodes & condition_nodes, con return function_node; } -std::optional tryExtractConstantFromConditionNode(const QueryTreeNodePtr & condition_node) -{ - const auto * constant_node = condition_node->as(); - if (!constant_node) - return {}; - - const auto & value = constant_node->getValue(); - auto constant_type = constant_node->getResultType(); - constant_type = removeNullable(removeLowCardinality(constant_type)); - - auto which_constant_type = WhichDataType(constant_type); - if (!which_constant_type.isUInt8() && !which_constant_type.isNothing()) - return {}; - - if (value.isNull()) - return false; - - UInt8 predicate_value = value.safeGet(); - return predicate_value > 0; -} - QueryTreeNodePtr replaceTablesAndTableFunctionsWithDummyTables(const QueryTreeNodePtr & query_node, const ContextPtr & context, ResultReplacementMap * result_replacement_map) @@ -391,4 +372,36 @@ QueryTreeNodePtr replaceTablesAndTableFunctionsWithDummyTables(const QueryTreeNo return query_node->cloneAndReplace(replacement_map); } +QueryTreeNodePtr buildSubqueryToReadColumnsFromTableExpression(const NamesAndTypes & columns, + const QueryTreeNodePtr & table_expression, + const ContextPtr & context) +{ + auto projection_columns = columns; + + QueryTreeNodes subquery_projection_nodes; + subquery_projection_nodes.reserve(projection_columns.size()); + + for (const auto & column : projection_columns) + subquery_projection_nodes.push_back(std::make_shared(column, table_expression)); + + if (subquery_projection_nodes.empty()) + { + auto constant_data_type = std::make_shared(); + subquery_projection_nodes.push_back(std::make_shared(1UL, constant_data_type)); + projection_columns.push_back({"1", std::move(constant_data_type)}); + } + + auto context_copy = Context::createCopy(context); + updateContextForSubqueryExecution(context_copy); + + auto query_node = std::make_shared(std::move(context_copy)); + + query_node->resolveProjectionColumns(projection_columns); + query_node->getProjection().getNodes() = std::move(subquery_projection_nodes); + query_node->getJoinTree() = table_expression; + query_node->setIsSubquery(true); + + return query_node; +} + } diff --git a/src/Planner/Utils.h b/src/Planner/Utils.h index 0520bd67d26..0effb1d08ae 100644 --- a/src/Planner/Utils.h +++ b/src/Planner/Utils.h @@ -63,13 +63,15 @@ bool queryHasWithTotalsInAnySubqueryInJoinTree(const QueryTreeNodePtr & query_no /// Returns `and` function node that has condition nodes as its arguments QueryTreeNodePtr mergeConditionNodes(const QueryTreeNodes & condition_nodes, const ContextPtr & context); -/// Try extract boolean constant from condition node -std::optional tryExtractConstantFromConditionNode(const QueryTreeNodePtr & condition_node); - /// Replace tables nodes and table function nodes with dummy table nodes using ResultReplacementMap = std::unordered_map; QueryTreeNodePtr replaceTablesAndTableFunctionsWithDummyTables(const QueryTreeNodePtr & query_node, const ContextPtr & context, ResultReplacementMap * result_replacement_map = nullptr); +/// Build subquery to read specified columns from table expression +QueryTreeNodePtr buildSubqueryToReadColumnsFromTableExpression(const NamesAndTypes & columns, + const QueryTreeNodePtr & table_expression, + const ContextPtr & context); + } diff --git a/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp index bf0e2448082..c85c0342c8c 100644 --- a/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp @@ -17,6 +17,24 @@ namespace ErrorCodes extern const int UNKNOWN_EXCEPTION; } +namespace +{ + +arrow::Compression::type getArrowCompression(FormatSettings::ArrowCompression method) +{ + switch (method) + { + case FormatSettings::ArrowCompression::NONE: + return arrow::Compression::type::UNCOMPRESSED; + case FormatSettings::ArrowCompression::ZSTD: + return arrow::Compression::type::ZSTD; + case FormatSettings::ArrowCompression::LZ4_FRAME: + return arrow::Compression::type::LZ4_FRAME; + } +} + +} + ArrowBlockOutputFormat::ArrowBlockOutputFormat(WriteBuffer & out_, const Block & header_, bool stream_, const FormatSettings & format_settings_) : IOutputFormat(header_, out_) , stream{stream_} @@ -78,12 +96,14 @@ void ArrowBlockOutputFormat::prepareWriter(const std::shared_ptr { arrow_ostream = std::make_shared(out); arrow::Result> writer_status; + arrow::ipc::IpcWriteOptions options = arrow::ipc::IpcWriteOptions::Defaults(); + options.codec = *arrow::util::Codec::Create(getArrowCompression(format_settings.arrow.output_compression_method)); // TODO: should we use arrow::ipc::IpcOptions::alignment? if (stream) - writer_status = arrow::ipc::MakeStreamWriter(arrow_ostream.get(), schema); + writer_status = arrow::ipc::MakeStreamWriter(arrow_ostream.get(), schema, options); else - writer_status = arrow::ipc::MakeFileWriter(arrow_ostream.get(), schema); + writer_status = arrow::ipc::MakeFileWriter(arrow_ostream.get(), schema,options); if (!writer_status.ok()) throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, diff --git a/src/Processors/Formats/Impl/NativeFormat.cpp b/src/Processors/Formats/Impl/NativeFormat.cpp index bd1b13ce2ef..d3fd9ef73e1 100644 --- a/src/Processors/Formats/Impl/NativeFormat.cpp +++ b/src/Processors/Formats/Impl/NativeFormat.cpp @@ -23,6 +23,7 @@ public: 0, settings.skip_unknown_fields, settings.null_as_default, + settings.native.allow_types_conversion, settings.defaults_for_omitted_fields ? &block_missing_values : nullptr)) , header(header_) {} diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp index 86d9560beb9..67345a0dfb0 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp @@ -28,6 +28,34 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_COLUMN; + extern const int NOT_IMPLEMENTED; +} + +namespace +{ + +orc::CompressionKind getORCCompression(FormatSettings::ORCCompression method) +{ + if (method == FormatSettings::ORCCompression::NONE) + return orc::CompressionKind::CompressionKind_NONE; + +#if USE_SNAPPY + if (method == FormatSettings::ORCCompression::SNAPPY) + return orc::CompressionKind::CompressionKind_SNAPPY; +#endif + + if (method == FormatSettings::ORCCompression::ZSTD) + return orc::CompressionKind::CompressionKind_ZSTD; + + if (method == FormatSettings::ORCCompression::LZ4) + return orc::CompressionKind::CompressionKind_LZ4; + + if (method == FormatSettings::ORCCompression::ZLIB) + return orc::CompressionKind::CompressionKind_ZLIB; + + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported compression method"); +} + } ORCOutputStream::ORCOutputStream(WriteBuffer & out_) : out(out_) {} @@ -544,7 +572,7 @@ void ORCBlockOutputFormat::prepareWriter() { const Block & header = getPort(PortKind::Main).getHeader(); schema = orc::createStructType(); - options.setCompression(orc::CompressionKind::CompressionKind_NONE); + options.setCompression(getORCCompression(format_settings.orc.output_compression_method)); size_t columns_count = header.columns(); for (size_t i = 0; i != columns_count; ++i) schema->addStructField(header.safeGetByPosition(i).name, getORCType(recursiveRemoveLowCardinality(data_types[i]))); diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index fca097d8ea7..87a0fbf77a8 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -45,44 +45,38 @@ Chunk ParquetBlockInputFormat::generate() block_missing_values.clear(); if (!file_reader) - { prepareReader(); - file_reader->set_batch_size(format_settings.parquet.max_block_size); - std::vector row_group_indices; - for (int i = 0; i < row_group_total; ++i) - { - if (!skip_row_groups.contains(i)) - row_group_indices.emplace_back(i); - } - auto read_status = file_reader->GetRecordBatchReader(row_group_indices, column_indices, ¤t_record_batch_reader); - if (!read_status.ok()) - throw DB::ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading Parquet data: {}", read_status.ToString()); - } if (is_stopped) return {}; - auto batch = current_record_batch_reader->Next(); - if (!batch.ok()) - { - throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading Parquet data: {}", - batch.status().ToString()); - } - if (*batch) - { - auto tmp_table = arrow::Table::FromRecordBatches({*batch}); - /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. - /// Otherwise fill the missing columns with zero values of its type. - BlockMissingValues * block_missing_values_ptr = format_settings.defaults_for_omitted_fields ? &block_missing_values : nullptr; - arrow_column_to_ch_column->arrowTableToCHChunk(res, *tmp_table, (*tmp_table)->num_rows(), block_missing_values_ptr); - } - else - { - current_record_batch_reader.reset(); - file_reader.reset(); - return {}; - } + while (row_group_current < row_group_total && skip_row_groups.contains(row_group_current)) + ++row_group_current; + if (row_group_current >= row_group_total) + return res; + + std::shared_ptr table; + + std::unique_ptr<::arrow::RecordBatchReader> rbr; + std::vector row_group_indices { row_group_current }; + arrow::Status get_batch_reader_status = file_reader->GetRecordBatchReader(row_group_indices, column_indices, &rbr); + + if (!get_batch_reader_status.ok()) + throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading Parquet data: {}", + get_batch_reader_status.ToString()); + + arrow::Status read_status = rbr->ReadAll(&table); + + if (!read_status.ok()) + throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading Parquet data: {}", read_status.ToString()); + + ++row_group_current; + + /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. + /// Otherwise fill the missing columns with zero values of its type. + BlockMissingValues * block_missing_values_ptr = format_settings.defaults_for_omitted_fields ? &block_missing_values : nullptr; + arrow_column_to_ch_column->arrowTableToCHChunk(res, table, table->num_rows(), block_missing_values_ptr); return res; } diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index 18c81f8fd6a..759f773a574 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -14,9 +14,13 @@ namespace DB namespace ErrorCodes { extern const int UNKNOWN_EXCEPTION; + extern const int NOT_IMPLEMENTED; } -static parquet::ParquetVersion::type getParquetVersion(const FormatSettings & settings) +namespace +{ + +parquet::ParquetVersion::type getParquetVersion(const FormatSettings & settings) { switch (settings.parquet.output_version) { @@ -31,6 +35,35 @@ static parquet::ParquetVersion::type getParquetVersion(const FormatSettings & se } } +parquet::Compression::type getParquetCompression(FormatSettings::ParquetCompression method) +{ + if (method == FormatSettings::ParquetCompression::NONE) + return parquet::Compression::type::UNCOMPRESSED; + +#if USE_SNAPPY + if (method == FormatSettings::ParquetCompression::SNAPPY) + return parquet::Compression::type::SNAPPY; +#endif + +#if USE_BROTLI + if (method == FormatSettings::ParquetCompression::BROTLI) + return parquet::Compression::type::BROTLI; +#endif + + if (method == FormatSettings::ParquetCompression::ZSTD) + return parquet::Compression::type::ZSTD; + + if (method == FormatSettings::ParquetCompression::LZ4) + return parquet::Compression::type::LZ4; + + if (method == FormatSettings::ParquetCompression::GZIP) + return parquet::Compression::type::GZIP; + + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported compression method"); +} + +} + ParquetBlockOutputFormat::ParquetBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) : IOutputFormat(header_, out_), format_settings{format_settings_} { @@ -60,9 +93,7 @@ void ParquetBlockOutputFormat::consume(Chunk chunk) parquet::WriterProperties::Builder builder; builder.version(getParquetVersion(format_settings)); -#if USE_SNAPPY - builder.compression(parquet::Compression::SNAPPY); -#endif + builder.compression(getParquetCompression(format_settings.parquet.output_compression_method)); auto props = builder.build(); auto status = parquet::arrow::FileWriter::Open( *arrow_table->schema(), diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp index 560be60987b..ef103eb508c 100644 --- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp @@ -117,7 +117,7 @@ static void postprocessChunk(Chunk & chunk, const AggregatingSortedAlgorithm::Co { const auto & from_type = desc.nested_type; const auto & to_type = desc.real_type; - columns[desc.column_number] = recursiveTypeConversion(columns[desc.column_number], from_type, to_type); + columns[desc.column_number] = recursiveLowCardinalityTypeConversion(columns[desc.column_number], from_type, to_type); } } diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp index 0f1775d4ac0..d8e95e6b950 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp @@ -450,7 +450,7 @@ static void postprocessChunk( { const auto & from_type = desc.nested_type; const auto & to_type = desc.real_type; - res_columns[desc.column_numbers[0]] = recursiveTypeConversion(column, from_type, to_type); + res_columns[desc.column_numbers[0]] = recursiveLowCardinalityTypeConversion(column, from_type, to_type); } else res_columns[desc.column_numbers[0]] = std::move(column); diff --git a/src/Processors/QueryPlan/ISourceStep.cpp b/src/Processors/QueryPlan/ISourceStep.cpp index 0644d9b44eb..37f56bc7a43 100644 --- a/src/Processors/QueryPlan/ISourceStep.cpp +++ b/src/Processors/QueryPlan/ISourceStep.cpp @@ -12,10 +12,19 @@ ISourceStep::ISourceStep(DataStream output_stream_) QueryPipelineBuilderPtr ISourceStep::updatePipeline(QueryPipelineBuilders, const BuildQueryPipelineSettings & settings) { auto pipeline = std::make_unique(); - QueryPipelineProcessorsCollector collector(*pipeline, this); + + /// For `Source` step, since it's not add new Processors to `pipeline->pipe` + /// in `initializePipeline`, but make an assign with new created Pipe. + /// And Processors for the Step is added here. So we do not need to use + /// `QueryPipelineProcessorsCollector` to collect Processors. initializePipeline(*pipeline, settings); - auto added_processors = collector.detachProcessors(); - processors.insert(processors.end(), added_processors.begin(), added_processors.end()); + + /// But we need to set QueryPlanStep manually for the Processors, which + /// will be used in `EXPLAIN PIPELINE` + for (auto & processor : processors) + { + processor->setQueryPlanStep(this); + } return pipeline; } diff --git a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp index 0874a3771ae..9407504579b 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp @@ -519,8 +519,9 @@ AggregationInputOrder buildInputOrderInfo( enreachFixedColumns(sorting_key_dag, fixed_key_columns); - for (auto it = matches.cbegin(); it != matches.cend(); ++it) + for (const auto * output : dag->getOutputs()) { + auto it = matches.find(output); const MatchedTrees::Match * match = &it->second; if (match->node) { diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 870106d794f..c27e73804ad 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -710,6 +711,7 @@ IProcessor::Status FinalizingViewsTransform::prepare() if (!output.canPush()) return Status::PortFull; + bool materialized_views_ignore_errors = views_data->context->getSettingsRef().materialized_views_ignore_errors; size_t num_finished = 0; size_t pos = 0; for (auto & input : inputs) @@ -735,7 +737,7 @@ IProcessor::Status FinalizingViewsTransform::prepare() else statuses[i].exception = data.exception; - if (i == 0 && statuses[0].is_first) + if (i == 0 && statuses[0].is_first && !materialized_views_ignore_errors) { output.pushData(std::move(data)); return Status::PortFull; @@ -752,7 +754,7 @@ IProcessor::Status FinalizingViewsTransform::prepare() if (!statuses.empty()) return Status::Ready; - if (any_exception) + if (any_exception && !materialized_views_ignore_errors) output.pushException(any_exception); output.finish(); @@ -782,6 +784,8 @@ static std::exception_ptr addStorageToException(std::exception_ptr ptr, const St void FinalizingViewsTransform::work() { + bool materialized_views_ignore_errors = views_data->context->getSettingsRef().materialized_views_ignore_errors; + size_t i = 0; for (auto & view : views_data->views) { @@ -794,6 +798,10 @@ void FinalizingViewsTransform::work() any_exception = status.exception; view.setException(addStorageToException(status.exception, view.table_id)); + + /// Exception will be ignored, it is saved here for the system.query_views_log + if (materialized_views_ignore_errors) + tryLogException(view.exception, &Poco::Logger::get("PushingToViews"), "Cannot push to the storage, ignoring the error"); } else { diff --git a/src/QueryPipeline/printPipeline.h b/src/QueryPipeline/printPipeline.h index 76143211875..e91909cb50b 100644 --- a/src/QueryPipeline/printPipeline.h +++ b/src/QueryPipeline/printPipeline.h @@ -10,7 +10,6 @@ namespace DB * You can render it with: * dot -T png < pipeline.dot > pipeline.png */ - template void printPipeline(const Processors & processors, const Statuses & statuses, WriteBuffer & out) { @@ -70,5 +69,4 @@ void printPipeline(const Processors & processors, WriteBuffer & out) /// If QueryPlanStep wasn't set for processor, representation may be not correct. /// If with_header is set, prints block header for each edge. void printPipelineCompact(const Processors & processors, WriteBuffer & out, bool with_header); - } diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index eccabc44fbe..a6924310c3c 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -843,6 +843,7 @@ namespace query_context->getClientInfo().client_trace_context, query_context->getSettingsRef(), query_context->getOpenTelemetrySpanLog()); + thread_trace_context->root_span.kind = OpenTelemetry::SERVER; /// Prepare for sending exceptions and logs. const Settings & settings = query_context->getSettingsRef(); diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index f468167f782..2c27879f7b7 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -1001,6 +1001,7 @@ void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse client_info.client_trace_context, context->getSettingsRef(), context->getOpenTelemetrySpanLog()); + thread_trace_context->root_span.kind = OpenTelemetry::SERVER; thread_trace_context->root_span.addAttribute("clickhouse.uri", request.getURI()); response.setContentType("text/plain; charset=UTF-8"); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 27b3712ffd3..cf948961298 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -277,6 +277,7 @@ void TCPHandler::runImpl() query_context->getClientInfo().client_trace_context, query_context->getSettingsRef(), query_context->getOpenTelemetrySpanLog()); + thread_trace_context->root_span.kind = OpenTelemetry::SERVER; query_scope.emplace(query_context, /* fatal_error_callback */ [this] { diff --git a/src/Storages/HDFS/StorageHDFSCluster.cpp b/src/Storages/HDFS/StorageHDFSCluster.cpp index 8dbaa0796e9..a88470d01c7 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.cpp +++ b/src/Storages/HDFS/StorageHDFSCluster.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -83,8 +84,12 @@ Pipe StorageHDFSCluster::read( auto extension = getTaskIteratorExtension(query_info.query, context); /// Calculate the header. This is significant, because some columns could be thrown away in some cases like query with count(*) - Block header = - InterpreterSelectQuery(query_info.query, context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); + Block header; + + if (context->getSettingsRef().allow_experimental_analyzer) + header = InterpreterSelectQueryAnalyzer::getSampleBlock(query_info.query, context, SelectQueryOptions(processed_stage).analyze()); + else + header = InterpreterSelectQuery(query_info.query, context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); const Scalars & scalars = context->hasQueryContext() ? context->getQueryContext()->getScalars() : Scalars{}; diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index b961b70428e..9d9d8420e2c 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -96,6 +96,32 @@ static void extractMergingAndGatheringColumns( } } +static void addMissedColumnsToSerializationInfos( + size_t num_rows_in_parts, + const Names & part_columns, + const ColumnsDescription & storage_columns, + const SerializationInfo::Settings & info_settings, + SerializationInfoByName & new_infos) +{ + NameSet part_columns_set(part_columns.begin(), part_columns.end()); + + for (const auto & column : storage_columns) + { + if (part_columns_set.contains(column.name)) + continue; + + if (column.default_desc.kind != ColumnDefaultKind::Default) + continue; + + if (column.default_desc.expression) + continue; + + auto new_info = column.type->createSerializationInfo(info_settings); + new_info->addDefaults(num_rows_in_parts); + new_infos.emplace(column.name, std::move(new_info)); + } +} + bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() { @@ -205,7 +231,19 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() ctx->force_ttl = true; } - infos.add(part->getSerializationInfos()); + if (!info_settings.isAlwaysDefault()) + { + auto part_infos = part->getSerializationInfos(); + + addMissedColumnsToSerializationInfos( + part->rows_count, + part->getColumns().getNames(), + global_ctx->metadata_snapshot->getColumns(), + info_settings, + part_infos); + + infos.add(part_infos); + } } global_ctx->new_data_part->setColumns(global_ctx->storage_columns, infos); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 73007e3f178..d7cea944689 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -4121,9 +4121,9 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, const Contex ProfileEvents::increment(ProfileEvents::RejectedInserts); throw Exception( ErrorCodes::TOO_MANY_PARTS, - "Too many parts ({}) in all partitions in total. This indicates wrong choice of partition key. The threshold can be modified " + "Too many parts ({}) in all partitions in total in table '{}'. This indicates wrong choice of partition key. The threshold can be modified " "with 'max_parts_in_total' setting in element in config.xml or with per-table setting.", - parts_count_in_total); + parts_count_in_total, getLogName()); } size_t outdated_parts_over_threshold = 0; @@ -4137,8 +4137,8 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, const Contex ProfileEvents::increment(ProfileEvents::RejectedInserts); throw Exception( ErrorCodes::TOO_MANY_PARTS, - "Too many inactive parts ({}). Parts cleaning are processing significantly slower than inserts", - outdated_parts_count_in_partition); + "Too many inactive parts ({}) in table '{}'. Parts cleaning are processing significantly slower than inserts", + outdated_parts_count_in_partition, getLogName()); } if (settings->inactive_parts_to_delay_insert > 0 && outdated_parts_count_in_partition >= settings->inactive_parts_to_delay_insert) outdated_parts_over_threshold = outdated_parts_count_in_partition - settings->inactive_parts_to_delay_insert + 1; @@ -4151,6 +4151,7 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, const Contex const auto active_parts_to_throw_insert = query_settings.parts_to_throw_insert ? query_settings.parts_to_throw_insert : settings->parts_to_throw_insert; size_t active_parts_over_threshold = 0; + { bool parts_are_large_enough_in_average = settings->max_avg_part_size_for_too_many_parts && average_part_size > settings->max_avg_part_size_for_too_many_parts; @@ -4160,9 +4161,10 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, const Contex ProfileEvents::increment(ProfileEvents::RejectedInserts); throw Exception( ErrorCodes::TOO_MANY_PARTS, - "Too many parts ({} with average size of {}). Merges are processing significantly slower than inserts", + "Too many parts ({} with average size of {}) in table '{}'. Merges are processing significantly slower than inserts", parts_count_in_partition, - ReadableSize(average_part_size)); + ReadableSize(average_part_size), + getLogName()); } if (active_parts_to_delay_insert > 0 && parts_count_in_partition >= active_parts_to_delay_insert && !parts_are_large_enough_in_average) diff --git a/src/Storages/MergeTree/MergeTreeIndexInverted.cpp b/src/Storages/MergeTree/MergeTreeIndexInverted.cpp index e7d86f2a635..8e8409f3868 100644 --- a/src/Storages/MergeTree/MergeTreeIndexInverted.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexInverted.cpp @@ -201,6 +201,7 @@ MergeTreeConditionInverted::MergeTreeConditionInverted( rpn.push_back(RPNElement::FUNCTION_UNKNOWN); return; } + rpn = std::move( RPNBuilder( query_info.filter_actions_dag->getOutputs().at(0), context_, @@ -208,10 +209,10 @@ MergeTreeConditionInverted::MergeTreeConditionInverted( { return this->traverseAtomAST(node, out); }).extractRPN()); + return; } ASTPtr filter_node = buildFilterNode(query_info.query); - if (!filter_node) { rpn.push_back(RPNElement::FUNCTION_UNKNOWN); @@ -226,7 +227,6 @@ MergeTreeConditionInverted::MergeTreeConditionInverted( query_info.prepared_sets, [&](const RPNBuilderTreeNode & node, RPNElement & out) { return traverseAtomAST(node, out); }); rpn = std::move(builder).extractRPN(); - } /// Keep in-sync with MergeTreeConditionFullText::alwaysUnknownOrTrue diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 94715822e52..9f7a12745c6 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -651,6 +651,12 @@ static NameToNameVector collectFilesForRenames( } } + if (!source_part->getSerializationInfos().empty() + && new_part->getSerializationInfos().empty()) + { + rename_vector.emplace_back(IMergeTreeDataPart::SERIALIZATION_FILE_NAME, ""); + } + return rename_vector; } diff --git a/src/Storages/MergeTree/RPNBuilder.cpp b/src/Storages/MergeTree/RPNBuilder.cpp index cee5038ed21..fb3592a1541 100644 --- a/src/Storages/MergeTree/RPNBuilder.cpp +++ b/src/Storages/MergeTree/RPNBuilder.cpp @@ -59,7 +59,7 @@ void appendColumnNameWithoutAlias(const ActionsDAG::Node & node, WriteBuffer & o { auto name = node.function_base->getName(); if (legacy && name == "modulo") - writeCString("moduleLegacy", out); + writeCString("moduloLegacy", out); else writeString(name, out); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp index 557123ddae2..c859c994818 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp @@ -60,11 +60,11 @@ void ReplicatedMergeTreeAttachThread::run() if (needs_retry) { - LOG_ERROR(log, "Initialization failed. Error: {}", e.message()); + LOG_ERROR(log, "Initialization failed. Error: {}", getCurrentExceptionMessage(/* with_stacktrace */ true)); } else { - LOG_ERROR(log, "Initialization failed, table will remain readonly. Error: {}", e.message()); + LOG_ERROR(log, "Initialization failed, table will remain readonly. Error: {}", getCurrentExceptionMessage(/* with_stacktrace */ true)); storage.initialization_done = true; } } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 4eb454e5156..77972b67644 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -39,11 +39,16 @@ #include #include +#include +#include #include #include #include +#include +#include #include #include +#include #include #include @@ -55,6 +60,7 @@ #include #include #include +#include #include #include #include @@ -69,12 +75,14 @@ #include #include +#include #include #include #include #include +#include #include #include #include @@ -138,6 +146,7 @@ namespace ErrorCodes extern const int DISTRIBUTED_TOO_MANY_PENDING_BYTES; extern const int ARGUMENT_OUT_OF_BOUND; extern const int TOO_LARGE_DISTRIBUTED_DEPTH; + extern const int DISTRIBUTED_IN_JOIN_SUBQUERY_DENIED; } namespace ActionLocks @@ -634,12 +643,278 @@ StorageSnapshotPtr StorageDistributed::getStorageSnapshotForQuery( namespace { -QueryTreeNodePtr buildQueryTreeDistributedTableReplacedWithLocalTable(const SelectQueryInfo & query_info, +/// Visitor that collect column source to columns mapping from query and all subqueries +class CollectColumnSourceToColumnsVisitor : public InDepthQueryTreeVisitor +{ +public: + struct Columns + { + NameSet column_names; + NamesAndTypes columns; + + void addColumn(NameAndTypePair column) + { + if (column_names.contains(column.name)) + return; + + column_names.insert(column.name); + columns.push_back(std::move(column)); + } + }; + + const std::unordered_map & getColumnSourceToColumns() const + { + return column_source_to_columns; + } + + void visitImpl(QueryTreeNodePtr & node) + { + auto * column_node = node->as(); + if (!column_node) + return; + + auto column_source = column_node->getColumnSourceOrNull(); + if (!column_source) + return; + + auto it = column_source_to_columns.find(column_source); + if (it == column_source_to_columns.end()) + { + auto [insert_it, _] = column_source_to_columns.emplace(column_source, Columns()); + it = insert_it; + } + + it->second.addColumn(column_node->getColumn()); + } + +private: + std::unordered_map column_source_to_columns; +}; + +/** Visitor that rewrites IN and JOINs in query and all subqueries according to distributed_product_mode and + * prefer_global_in_and_join settings. + * + * Additionally collects GLOBAL JOIN and GLOBAL IN query nodes. + * + * If distributed_product_mode = deny, then visitor throws exception if there are multiple distributed tables. + * If distributed_product_mode = local, then visitor collects replacement map for tables that must be replaced + * with local tables. + * If distributed_product_mode = global or prefer_global_in_and_join setting is true, then visitor rewrites JOINs and IN functions that + * contain distributed tables to GLOBAL JOINs and GLOBAL IN functions. + * If distributed_product_mode = allow, then visitor does not rewrite query if there are multiple distributed tables. + */ +class DistributedProductModeRewriteInJoinVisitor : public InDepthQueryTreeVisitorWithContext +{ +public: + using Base = InDepthQueryTreeVisitorWithContext; + using Base::Base; + + explicit DistributedProductModeRewriteInJoinVisitor(const ContextPtr & context_) + : Base(context_) + {} + + struct InFunctionOrJoin + { + QueryTreeNodePtr query_node; + size_t subquery_depth = 0; + }; + + const std::unordered_map & getReplacementMap() const + { + return replacement_map; + } + + const std::vector & getGlobalInOrJoinNodes() const + { + return global_in_or_join_nodes; + } + + static bool needChildVisit(QueryTreeNodePtr & parent, QueryTreeNodePtr & child) + { + auto * function_node = parent->as(); + if (function_node && isNameOfGlobalInFunction(function_node->getFunctionName())) + return false; + + auto * join_node = parent->as(); + if (join_node && join_node->getLocality() == JoinLocality::Global && join_node->getRightTableExpression() == child) + return false; + + return true; + } + + void visitImpl(QueryTreeNodePtr & node) + { + auto * function_node = node->as(); + auto * join_node = node->as(); + + if ((function_node && isNameOfGlobalInFunction(function_node->getFunctionName())) || + (join_node && join_node->getLocality() == JoinLocality::Global)) + { + InFunctionOrJoin in_function_or_join_entry; + in_function_or_join_entry.query_node = node; + in_function_or_join_entry.subquery_depth = getSubqueryDepth(); + global_in_or_join_nodes.push_back(std::move(in_function_or_join_entry)); + return; + } + + if ((function_node && isNameOfLocalInFunction(function_node->getFunctionName())) || + (join_node && join_node->getLocality() != JoinLocality::Global)) + { + InFunctionOrJoin in_function_or_join_entry; + in_function_or_join_entry.query_node = node; + in_function_or_join_entry.subquery_depth = getSubqueryDepth(); + in_function_or_join_stack.push_back(in_function_or_join_entry); + return; + } + + if (node->getNodeType() == QueryTreeNodeType::TABLE) + tryRewriteTableNodeIfNeeded(node); + } + + void leaveImpl(QueryTreeNodePtr & node) + { + if (!in_function_or_join_stack.empty() && node.get() == in_function_or_join_stack.back().query_node.get()) + in_function_or_join_stack.pop_back(); + } + +private: + void tryRewriteTableNodeIfNeeded(const QueryTreeNodePtr & table_node) + { + const auto & table_node_typed = table_node->as(); + const auto * distributed_storage = typeid_cast(table_node_typed.getStorage().get()); + if (!distributed_storage) + return; + + bool distributed_valid_for_rewrite = distributed_storage->getShardCount() >= 2; + if (!distributed_valid_for_rewrite) + return; + + auto distributed_product_mode = getSettings().distributed_product_mode; + + if (distributed_product_mode == DistributedProductMode::LOCAL) + { + StorageID remote_storage_id = StorageID{distributed_storage->getRemoteDatabaseName(), + distributed_storage->getRemoteTableName()}; + auto resolved_remote_storage_id = getContext()->resolveStorageID(remote_storage_id); + const auto & distributed_storage_columns = table_node_typed.getStorageSnapshot()->metadata->getColumns(); + auto storage = std::make_shared(resolved_remote_storage_id, distributed_storage_columns); + auto replacement_table_expression = std::make_shared(std::move(storage), getContext()); + replacement_map.emplace(table_node.get(), std::move(replacement_table_expression)); + } + else if ((distributed_product_mode == DistributedProductMode::GLOBAL || getSettings().prefer_global_in_and_join) && + !in_function_or_join_stack.empty()) + { + auto * in_or_join_node_to_modify = in_function_or_join_stack.back().query_node.get(); + + if (auto * in_function_to_modify = in_or_join_node_to_modify->as()) + { + auto global_in_function_name = getGlobalInFunctionNameForLocalInFunctionName(in_function_to_modify->getFunctionName()); + auto global_in_function_resolver = FunctionFactory::instance().get(global_in_function_name, getContext()); + in_function_to_modify->resolveAsFunction(global_in_function_resolver->build(in_function_to_modify->getArgumentColumns())); + } + else if (auto * join_node_to_modify = in_or_join_node_to_modify->as()) + { + join_node_to_modify->setLocality(JoinLocality::Global); + } + + global_in_or_join_nodes.push_back(in_function_or_join_stack.back()); + } + else if (distributed_product_mode == DistributedProductMode::ALLOW) + { + return; + } + else if (distributed_product_mode == DistributedProductMode::DENY) + { + throw Exception(ErrorCodes::DISTRIBUTED_IN_JOIN_SUBQUERY_DENIED, + "Double-distributed IN/JOIN subqueries is denied (distributed_product_mode = 'deny'). " + "You may rewrite query to use local tables " + "in subqueries, or use GLOBAL keyword, or set distributed_product_mode to suitable value."); + } + } + + std::vector in_function_or_join_stack; + std::unordered_map replacement_map; + std::vector global_in_or_join_nodes; +}; + +/** Execute subquery node and put result in mutable context temporary table. + * Returns table node that is initialized with temporary table storage. + */ +QueryTreeNodePtr executeSubqueryNode(const QueryTreeNodePtr & subquery_node, + ContextMutablePtr & mutable_context, + size_t subquery_depth) +{ + auto subquery_hash = subquery_node->getTreeHash(); + String temporary_table_name = fmt::format("_data_{}_{}", subquery_hash.first, subquery_hash.second); + + const auto & external_tables = mutable_context->getExternalTables(); + auto external_table_it = external_tables.find(temporary_table_name); + if (external_table_it != external_tables.end()) + { + auto temporary_table_expression_node = std::make_shared(external_table_it->second, mutable_context); + temporary_table_expression_node->setTemporaryTableName(temporary_table_name); + return temporary_table_expression_node; + } + + auto subquery_options = SelectQueryOptions(QueryProcessingStage::Complete, subquery_depth, true /*is_subquery*/); + auto context_copy = Context::createCopy(mutable_context); + updateContextForSubqueryExecution(context_copy); + + InterpreterSelectQueryAnalyzer interpreter(subquery_node, context_copy, subquery_options); + auto & query_plan = interpreter.getQueryPlan(); + + auto sample_block_with_unique_names = query_plan.getCurrentDataStream().header; + makeUniqueColumnNamesInBlock(sample_block_with_unique_names); + + if (!blocksHaveEqualStructure(sample_block_with_unique_names, query_plan.getCurrentDataStream().header)) + { + auto actions_dag = ActionsDAG::makeConvertingActions( + query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(), + sample_block_with_unique_names.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Position); + auto converting_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(actions_dag)); + query_plan.addStep(std::move(converting_step)); + } + + Block sample = interpreter.getSampleBlock(); + NamesAndTypesList columns = sample.getNamesAndTypesList(); + + auto external_storage_holder = TemporaryTableHolder( + mutable_context, + ColumnsDescription{columns}, + ConstraintsDescription{}, + nullptr /*query*/, + true /*create_for_global_subquery*/); + + StoragePtr external_storage = external_storage_holder.getTable(); + auto temporary_table_expression_node = std::make_shared(external_storage, mutable_context); + temporary_table_expression_node->setTemporaryTableName(temporary_table_name); + + auto table_out = external_storage->write({}, external_storage->getInMemoryMetadataPtr(), mutable_context); + auto io = interpreter.execute(); + io.pipeline.complete(std::move(table_out)); + CompletedPipelineExecutor executor(io.pipeline); + executor.execute(); + + mutable_context->addExternalTable(temporary_table_name, std::move(external_storage_holder)); + + return temporary_table_expression_node; +} + +QueryTreeNodePtr buildQueryTreeDistributed(SelectQueryInfo & query_info, const StorageSnapshotPtr & distributed_storage_snapshot, const StorageID & remote_storage_id, const ASTPtr & remote_table_function) { - const auto & query_context = query_info.planner_context->getQueryContext(); + auto & planner_context = query_info.planner_context; + const auto & query_context = planner_context->getQueryContext(); + + std::optional table_expression_modifiers; + + if (auto * query_info_table_node = query_info.table_expression->as()) + table_expression_modifiers = query_info_table_node->getTableExpressionModifiers(); + else if (auto * query_info_table_function_node = query_info.table_expression->as()) + table_expression_modifiers = query_info_table_function_node->getTableExpressionModifiers(); QueryTreeNodePtr replacement_table_expression; @@ -651,6 +926,9 @@ QueryTreeNodePtr buildQueryTreeDistributedTableReplacedWithLocalTable(const Sele auto table_function_node = std::make_shared(remote_table_function_node.getFunctionName()); table_function_node->getArgumentsNode() = remote_table_function_node.getArgumentsNode(); + if (table_expression_modifiers) + table_function_node->setTableExpressionModifiers(*table_expression_modifiers); + QueryAnalysisPass query_analysis_pass; query_analysis_pass.run(table_function_node, query_context); @@ -660,13 +938,89 @@ QueryTreeNodePtr buildQueryTreeDistributedTableReplacedWithLocalTable(const Sele { auto resolved_remote_storage_id = query_context->resolveStorageID(remote_storage_id); auto storage = std::make_shared(resolved_remote_storage_id, distributed_storage_snapshot->metadata->getColumns()); + auto table_node = std::make_shared(std::move(storage), query_context); - replacement_table_expression = std::make_shared(std::move(storage), query_context); + if (table_expression_modifiers) + table_node->setTableExpressionModifiers(*table_expression_modifiers); + + replacement_table_expression = std::move(table_node); } replacement_table_expression->setAlias(query_info.table_expression->getAlias()); - return query_info.query_tree->cloneAndReplace(query_info.table_expression, std::move(replacement_table_expression)); + auto query_tree_to_modify = query_info.query_tree->cloneAndReplace(query_info.table_expression, std::move(replacement_table_expression)); + + CollectColumnSourceToColumnsVisitor collect_column_source_to_columns_visitor; + collect_column_source_to_columns_visitor.visit(query_tree_to_modify); + + const auto & column_source_to_columns = collect_column_source_to_columns_visitor.getColumnSourceToColumns(); + + DistributedProductModeRewriteInJoinVisitor visitor(query_info.planner_context->getQueryContext()); + visitor.visit(query_tree_to_modify); + + auto replacement_map = visitor.getReplacementMap(); + const auto & global_in_or_join_nodes = visitor.getGlobalInOrJoinNodes(); + + for (const auto & global_in_or_join_node : global_in_or_join_nodes) + { + if (auto * join_node = global_in_or_join_node.query_node->as()) + { + auto join_right_table_expression = join_node->getRightTableExpression(); + auto join_right_table_expression_node_type = join_right_table_expression->getNodeType(); + + QueryTreeNodePtr subquery_node; + + if (join_right_table_expression_node_type == QueryTreeNodeType::QUERY || + join_right_table_expression_node_type == QueryTreeNodeType::UNION) + { + subquery_node = join_right_table_expression; + } + else if (join_right_table_expression_node_type == QueryTreeNodeType::TABLE || + join_right_table_expression_node_type == QueryTreeNodeType::TABLE_FUNCTION) + { + const auto & columns = column_source_to_columns.at(join_right_table_expression).columns; + subquery_node = buildSubqueryToReadColumnsFromTableExpression(columns, + join_right_table_expression, + planner_context->getQueryContext()); + } + else + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Expected JOIN right table expression to be table, table function, query or union node. Actual {}", + join_right_table_expression->formatASTForErrorMessage()); + } + + auto temporary_table_expression_node = executeSubqueryNode(subquery_node, + planner_context->getMutableQueryContext(), + global_in_or_join_node.subquery_depth); + temporary_table_expression_node->setAlias(join_right_table_expression->getAlias()); + replacement_map.emplace(join_right_table_expression.get(), std::move(temporary_table_expression_node)); + continue; + } + else if (auto * in_function_node = global_in_or_join_node.query_node->as()) + { + auto & in_function_subquery_node = in_function_node->getArguments().getNodes().at(1); + auto in_function_node_type = in_function_subquery_node->getNodeType(); + if (in_function_node_type != QueryTreeNodeType::QUERY && in_function_node_type != QueryTreeNodeType::UNION) + continue; + + auto temporary_table_expression_node = executeSubqueryNode(in_function_subquery_node, + planner_context->getMutableQueryContext(), + global_in_or_join_node.subquery_depth); + in_function_subquery_node = std::move(temporary_table_expression_node); + } + else + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Expected global IN or JOIN query node. Actual {}", + global_in_or_join_node.query_node->formatASTForErrorMessage()); + } + } + + if (!replacement_map.empty()) + query_tree_to_modify = query_tree_to_modify->cloneAndReplace(replacement_map); + + return query_tree_to_modify; } } @@ -694,17 +1048,13 @@ void StorageDistributed::read( if (!remote_table_function_ptr) remote_storage_id = StorageID{remote_database, remote_table}; - auto query_tree_with_replaced_distributed_table = buildQueryTreeDistributedTableReplacedWithLocalTable(query_info, + auto query_tree_distributed = buildQueryTreeDistributed(query_info, storage_snapshot, remote_storage_id, remote_table_function_ptr); - query_ast = queryNodeToSelectQuery(query_tree_with_replaced_distributed_table); - - Planner planner(query_tree_with_replaced_distributed_table, SelectQueryOptions(processed_stage).analyze()); - planner.buildQueryPlanIfNeeded(); - - header = planner.getQueryPlan().getCurrentDataStream().header; + query_ast = queryNodeToSelectQuery(query_tree_distributed); + header = InterpreterSelectQueryAnalyzer::getSampleBlock(query_ast, local_context, SelectQueryOptions(processed_stage).analyze()); } else { diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index 881cbc18b10..5df644bf48e 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -192,6 +193,83 @@ private: }; +class ReadFromMemoryStorageStep final : public ISourceStep +{ +public: + explicit ReadFromMemoryStorageStep(Pipe pipe_) : + ISourceStep(DataStream{.header = pipe_.getHeader()}), + pipe(std::move(pipe_)) + { + } + + ReadFromMemoryStorageStep() = delete; + ReadFromMemoryStorageStep(const ReadFromMemoryStorageStep &) = delete; + ReadFromMemoryStorageStep & operator=(const ReadFromMemoryStorageStep &) = delete; + + ReadFromMemoryStorageStep(ReadFromMemoryStorageStep &&) = default; + ReadFromMemoryStorageStep & operator=(ReadFromMemoryStorageStep &&) = default; + + String getName() const override { return name; } + + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override + { + // use move - make sure that the call will only be made once. + pipeline.init(std::move(pipe)); + } + + static Pipe makePipe(const Names & columns_to_read_, + const StorageSnapshotPtr & storage_snapshot_, + size_t num_streams_, + const bool delay_read_for_global_subqueries_) + { + storage_snapshot_->check(columns_to_read_); + + const auto & snapshot_data = assert_cast(*storage_snapshot_->data); + auto current_data = snapshot_data.blocks; + + if (delay_read_for_global_subqueries_) + { + /// Note: for global subquery we use single source. + /// Mainly, the reason is that at this point table is empty, + /// and we don't know the number of blocks are going to be inserted into it. + /// + /// It may seem to be not optimal, but actually data from such table is used to fill + /// set for IN or hash table for JOIN, which can't be done concurrently. + /// Since no other manipulation with data is done, multiple sources shouldn't give any profit. + + return Pipe(std::make_shared( + columns_to_read_, + storage_snapshot_, + nullptr /* data */, + nullptr /* parallel execution index */, + [current_data](std::shared_ptr & data_to_initialize) + { + data_to_initialize = current_data; + })); + } + + size_t size = current_data->size(); + + if (num_streams_ > size) + num_streams_ = size; + + Pipes pipes; + + auto parallel_execution_index = std::make_shared>(0); + + for (size_t stream = 0; stream < num_streams_; ++stream) + { + pipes.emplace_back(std::make_shared(columns_to_read_, storage_snapshot_, current_data, parallel_execution_index)); + } + return Pipe::unitePipes(std::move(pipes)); + } + +private: + static constexpr auto name = "ReadFromMemoryStorage"; + Pipe pipe; +}; + + StorageMemory::StorageMemory( const StorageID & table_id_, ColumnsDescription columns_description_, @@ -233,47 +311,29 @@ Pipe StorageMemory::read( size_t /*max_block_size*/, size_t num_streams) { - storage_snapshot->check(column_names); + return ReadFromMemoryStorageStep::makePipe(column_names, storage_snapshot, num_streams, delay_read_for_global_subqueries); +} - const auto & snapshot_data = assert_cast(*storage_snapshot->data); - auto current_data = snapshot_data.blocks; - - if (delay_read_for_global_subqueries) +void StorageMemory::read( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) +{ + // @TODO it looks like IStorage::readFromPipe. different only step's type. + auto pipe = read(column_names, storage_snapshot, query_info, context, processed_stage, max_block_size, num_streams); + if (pipe.empty()) { - /// Note: for global subquery we use single source. - /// Mainly, the reason is that at this point table is empty, - /// and we don't know the number of blocks are going to be inserted into it. - /// - /// It may seem to be not optimal, but actually data from such table is used to fill - /// set for IN or hash table for JOIN, which can't be done concurrently. - /// Since no other manipulation with data is done, multiple sources shouldn't give any profit. - - return Pipe(std::make_shared( - column_names, - storage_snapshot, - nullptr /* data */, - nullptr /* parallel execution index */, - [current_data](std::shared_ptr & data_to_initialize) - { - data_to_initialize = current_data; - })); + auto header = storage_snapshot->getSampleBlockForColumns(column_names); + InterpreterSelectQuery::addEmptySourceToQueryPlan(query_plan, header, query_info, context); + return; } - - size_t size = current_data->size(); - - if (num_streams > size) - num_streams = size; - - Pipes pipes; - - auto parallel_execution_index = std::make_shared>(0); - - for (size_t stream = 0; stream < num_streams; ++stream) - { - pipes.emplace_back(std::make_shared(column_names, storage_snapshot, current_data, parallel_execution_index)); - } - - return Pipe::unitePipes(std::move(pipes)); + auto read_step = std::make_unique(std::move(pipe)); + query_plan.addStep(std::move(read_step)); } diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h index c739088dbe4..fcaadaff3ba 100644 --- a/src/Storages/StorageMemory.h +++ b/src/Storages/StorageMemory.h @@ -53,6 +53,16 @@ public: size_t max_block_size, size_t num_streams) override; + void read( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) override; + bool supportsParallelInsert() const override { return true; } bool supportsSubcolumns() const override { return true; } bool supportsDynamicSubcolumns() const override { return true; } diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index aadd7b8c20a..d9bb189524c 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -223,8 +224,12 @@ void StorageMergeTree::read( auto cluster = local_context->getCluster(local_context->getSettingsRef().cluster_for_parallel_replicas); - Block header = - InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); + Block header; + + if (local_context->getSettingsRef().allow_experimental_analyzer) + header = InterpreterSelectQueryAnalyzer::getSampleBlock(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()); + else + header = InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); ClusterProxy::SelectStreamFactory select_stream_factory = ClusterProxy::SelectStreamFactory( diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index c7535bb4550..a3fd24b4f98 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -102,7 +103,20 @@ Pipe StorageS3Cluster::read( auto extension = getTaskIteratorExtension(query_info.query, context); /// Calculate the header. This is significant, because some columns could be thrown away in some cases like query with count(*) - auto interpreter = InterpreterSelectQuery(query_info.query, context, SelectQueryOptions(processed_stage).analyze()); + + Block sample_block; + ASTPtr query_to_send = query_info.query; + + if (context->getSettingsRef().allow_experimental_analyzer) + { + sample_block = InterpreterSelectQueryAnalyzer::getSampleBlock(query_info.query, context, SelectQueryOptions(processed_stage)); + } + else + { + auto interpreter = InterpreterSelectQuery(query_info.query, context, SelectQueryOptions(processed_stage).analyze()); + sample_block = interpreter.getSampleBlock(); + query_to_send = interpreter.getQueryInfo().query->clone(); + } const Scalars & scalars = context->hasQueryContext() ? context->getQueryContext()->getScalars() : Scalars{}; @@ -110,7 +124,6 @@ Pipe StorageS3Cluster::read( const bool add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState; - ASTPtr query_to_send = interpreter.getQueryInfo().query->clone(); if (!structure_argument_was_provided) addColumnsStructureToQueryWithClusterEngine( query_to_send, StorageDictionary::generateNamesAndTypesDescription(storage_snapshot->metadata->getColumns().getAll()), 5, getName()); @@ -136,7 +149,7 @@ Pipe StorageS3Cluster::read( shard_info.pool, std::vector{try_result}, queryToString(query_to_send), - interpreter.getSampleBlock(), + sample_block, context, /*throttler=*/nullptr, scalars, diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index c0ddb0bc48a..f6b0cdd8389 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -17,6 +18,7 @@ #include #include #include +#include #include #include @@ -218,6 +220,15 @@ namespace uri_options.size() == 1, download_threads); + try + { + total_size += getFileSizeFromReadBuffer(*read_buf); + } + catch (...) + { + // we simply continue without total_size + } + auto input_format = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size, format_settings); QueryPipelineBuilder builder; @@ -258,7 +269,14 @@ namespace Chunk chunk; if (reader->pull(chunk)) + { + UInt64 num_rows = chunk.getNumRows(); + if (num_rows && total_size) + updateRowsProgressApprox( + *this, chunk, total_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); + return chunk; + } pipeline->reset(); reader.reset(); @@ -448,6 +466,11 @@ namespace std::unique_ptr reader; Poco::Net::HTTPBasicCredentials credentials; + + size_t total_size = 0; + UInt64 total_rows_approx_max = 0; + size_t total_rows_count_times = 0; + UInt64 total_rows_approx_accumulated = 0; }; } diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 1a7050b4dff..7e12a972768 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -117,6 +118,10 @@ StorageView::StorageView( SelectQueryDescription description; description.inner_query = query.select->ptr(); + + NormalizeSelectWithUnionQueryVisitor::Data data{SetOperationMode::Unspecified}; + NormalizeSelectWithUnionQueryVisitor{data}.visit(description.inner_query); + is_parameterized_view = query.isParameterizedView(); parameter_types = analyzeReceiveQueryParamsWithType(description.inner_query); storage_metadata.setSelectQuery(description); @@ -167,7 +172,7 @@ void StorageView::read( query_plan.addStep(std::move(materializing)); /// And also convert to expected structure. - const auto & expected_header = storage_snapshot->getSampleBlockForColumns(column_names,parameter_values); + const auto & expected_header = storage_snapshot->getSampleBlockForColumns(column_names, parameter_values); const auto & header = query_plan.getCurrentDataStream().header; const auto * select_with_union = current_inner_query->as(); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 3a74fd5fc75..3471e4ea6bf 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -78,6 +78,7 @@ namespace ErrorCodes extern const int SUPPORT_IS_DISABLED; extern const int TABLE_WAS_NOT_DROPPED; extern const int NOT_IMPLEMENTED; + extern const int UNSUPPORTED_METHOD; } namespace @@ -1158,6 +1159,10 @@ StorageWindowView::StorageWindowView( , fire_signal_timeout_s(context_->getSettingsRef().wait_for_window_view_fire_signal_timeout.totalSeconds()) , clean_interval_usec(context_->getSettingsRef().window_view_clean_interval.totalMicroseconds()) { + if (context_->getSettingsRef().allow_experimental_analyzer) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Experimental WINDOW VIEW feature is not supported with new infrastructure for query analysis (the setting 'allow_experimental_analyzer')"); + if (!query.select) throw Exception(ErrorCodes::INCORRECT_QUERY, "SELECT query is not specified for {}", getName()); diff --git a/src/TableFunctions/TableFunctionMerge.cpp b/src/TableFunctions/TableFunctionMerge.cpp index 066caa8170d..586cee54085 100644 --- a/src/TableFunctions/TableFunctionMerge.cpp +++ b/src/TableFunctions/TableFunctionMerge.cpp @@ -53,7 +53,7 @@ std::vector TableFunctionMerge::skipAnalysisForArguments(const QueryTree result.push_back(i); } - return {0}; + return result; } void TableFunctionMerge::parseArguments(const ASTPtr & ast_function, ContextPtr context) diff --git a/src/TableFunctions/TableFunctionURL.cpp b/src/TableFunctions/TableFunctionURL.cpp index 5de6c6b4ccc..cc3a858e4dc 100644 --- a/src/TableFunctions/TableFunctionURL.cpp +++ b/src/TableFunctions/TableFunctionURL.cpp @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include #include #include @@ -26,6 +28,24 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +std::vector TableFunctionURL::skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr) const +{ + auto & table_function_node = query_node_table_function->as(); + auto & table_function_arguments_nodes = table_function_node.getArguments().getNodes(); + size_t table_function_arguments_size = table_function_arguments_nodes.size(); + + std::vector result; + + for (size_t i = 0; i < table_function_arguments_size; ++i) + { + auto * function_node = table_function_arguments_nodes[i]->as(); + if (function_node && function_node->getFunctionName() == "headers") + result.push_back(i); + } + + return result; +} + void TableFunctionURL::parseArguments(const ASTPtr & ast, ContextPtr context) { const auto & ast_function = assert_cast(ast.get()); diff --git a/src/TableFunctions/TableFunctionURL.h b/src/TableFunctions/TableFunctionURL.h index a670bdc0682..dca5123fb69 100644 --- a/src/TableFunctions/TableFunctionURL.h +++ b/src/TableFunctions/TableFunctionURL.h @@ -12,7 +12,7 @@ class Context; /* url(source, format[, structure, compression]) - creates a temporary storage from url. */ -class TableFunctionURL : public ITableFunctionFileLike +class TableFunctionURL final: public ITableFunctionFileLike { public: static constexpr auto name = "url"; @@ -23,10 +23,11 @@ public: ColumnsDescription getActualTableStructure(ContextPtr context) const override; -protected: +private: + std::vector skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr context) const override; + void parseArguments(const ASTPtr & ast, ContextPtr context) override; -private: StoragePtr getStorage( const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context, const std::string & table_name, const String & compression_method_) const override; diff --git a/tests/ci/stress.py b/tests/ci/stress.py index 4f723dba101..5e151e6c098 100755 --- a/tests/ci/stress.py +++ b/tests/ci/stress.py @@ -30,13 +30,16 @@ def get_options(i, upgrade_check): if i % 2 == 1: join_alg_num = i // 2 - if join_alg_num % 4 == 0: + if join_alg_num % 5 == 0: client_options.append("join_algorithm='parallel_hash'") - if join_alg_num % 4 == 1: + if join_alg_num % 5 == 1: client_options.append("join_algorithm='partial_merge'") - if join_alg_num % 4 == 2: + if join_alg_num % 5 == 2: client_options.append("join_algorithm='full_sorting_merge'") - if join_alg_num % 4 == 3: + if join_alg_num % 5 == 3 and not upgrade_check: + # Some crashes are not fixed in 23.2 yet, so ignore the setting in Upgrade check + client_options.append("join_algorithm='grace_hash'") + if join_alg_num % 5 == 4: client_options.append("join_algorithm='auto'") client_options.append("max_rows_in_join=1000") @@ -222,6 +225,20 @@ def prepare_for_hung_check(drop_databases): return True +def is_ubsan_build(): + try: + query = """clickhouse client -q "SELECT value FROM system.build_options WHERE name = 'CXX_FLAGS'" """ + output = ( + check_output(query, shell=True, stderr=STDOUT, timeout=30) + .decode("utf-8") + .strip() + ) + return "-fsanitize=undefined" in output + except Exception as e: + logging.info("Failed to get build flags: %s", str(e)) + return False + + if __name__ == "__main__": logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") parser = argparse.ArgumentParser( @@ -241,6 +258,10 @@ if __name__ == "__main__": args = parser.parse_args() if args.drop_databases and not args.hung_check: raise Exception("--drop-databases only used in hung check (--hung-check)") + + # FIXME Hung check with ubsan is temporarily disabled due to https://github.com/ClickHouse/ClickHouse/issues/45372 + suppress_hung_check = is_ubsan_build() + func_pipes = [] func_pipes = run_func_test( args.test_cmd, @@ -305,7 +326,7 @@ if __name__ == "__main__": res = call(cmd, shell=True, stdout=tee.stdin, stderr=STDOUT) if tee.stdin is not None: tee.stdin.close() - if res != 0 and have_long_running_queries: + if res != 0 and have_long_running_queries and not suppress_hung_check: logging.info("Hung check failed with exit code %d", res) else: hung_check_status = "No queries hung\tOK\t\\N\t\n" diff --git a/tests/config/config.d/keeper_port.xml b/tests/config/config.d/keeper_port.xml index 2066dedfa56..cffd325e968 100644 --- a/tests/config/config.d/keeper_port.xml +++ b/tests/config/config.d/keeper_port.xml @@ -9,8 +9,7 @@ 10000 100000 10000 - - true + false 240000 1000000000000000 diff --git a/tests/integration/test_async_insert_memory/test.py b/tests/integration/test_async_insert_memory/test.py deleted file mode 100644 index 279542f087c..00000000000 --- a/tests/integration/test_async_insert_memory/test.py +++ /dev/null @@ -1,40 +0,0 @@ -import pytest - -from helpers.cluster import ClickHouseCluster - -cluster = ClickHouseCluster(__file__) - -node = cluster.add_instance("node") - - -@pytest.fixture(scope="module", autouse=True) -def start_cluster(): - try: - cluster.start() - yield cluster - finally: - cluster.shutdown() - - -def test_memory_usage(): - node.query( - "CREATE TABLE async_table(data Array(UInt64)) ENGINE=MergeTree() ORDER BY data" - ) - - node.get_query_request("SELECT count() FROM system.numbers") - - INSERT_QUERY = "INSERT INTO async_table SETTINGS async_insert=1, wait_for_async_insert=1 VALUES ({})" - for iter in range(10): - values = list(range(iter * 5000000, (iter + 1) * 5000000)) - node.query(INSERT_QUERY.format(values)) - - response = node.get_query_request( - "SELECT groupArray(number) FROM numbers(1000000) SETTINGS max_memory_usage_for_user={}".format( - 30 * (2**23) - ) - ) - - _, err = response.get_answer_and_error() - assert err == "", "Query failed with error {}".format(err) - - node.query("DROP TABLE async_table") diff --git a/tests/integration/test_backward_compatibility/test_convert_ordinary.py b/tests/integration/test_backward_compatibility/test_convert_ordinary.py index afc44c91fc4..8b1afd358eb 100644 --- a/tests/integration/test_backward_compatibility/test_convert_ordinary.py +++ b/tests/integration/test_backward_compatibility/test_convert_ordinary.py @@ -188,7 +188,18 @@ def check_convert_all_dbs_to_atomic(): node.exec_in_container( ["bash", "-c", f"touch /var/lib/clickhouse/flags/convert_ordinary_to_atomic"] ) - node.restart_clickhouse() + node.stop_clickhouse() + cannot_start = False + try: + node.start_clickhouse() + except: + cannot_start = True + assert cannot_start + + node.exec_in_container( + ["bash", "-c", f"rm /var/lib/clickhouse/flags/convert_ordinary_to_atomic"] + ) + node.start_clickhouse() assert "Ordinary\n" == node.query( "SELECT engine FROM system.databases where name='ordinary'" diff --git a/tests/queries/0_stateless/00002_system_numbers.sql b/tests/queries/0_stateless/00002_system_numbers.sql index 95f75573201..d5934c7d387 100644 --- a/tests/queries/0_stateless/00002_system_numbers.sql +++ b/tests/queries/0_stateless/00002_system_numbers.sql @@ -6,7 +6,7 @@ SELECT number FROM system.numbers WHERE number >= 5 LIMIT 2; SELECT * FROM system.numbers WHERE number == 7 LIMIT 1; SELECT number AS n FROM system.numbers WHERE number IN(8, 9) LIMIT 2; select number from system.numbers limit 0; -select x from system.numbers limit 1; -- { clientError 0 serverError 47 } +select x from system.numbers limit 1; -- { serverError UNKNOWN_IDENTIFIER } SELECT x, number FROM system.numbers LIMIT 1; -- { serverError 47 } SELECT * FROM system.number LIMIT 1; -- { serverError 60 } SELECT * FROM system LIMIT 1; -- { serverError 60 } diff --git a/tests/queries/0_stateless/00202_cross_join.reference b/tests/queries/0_stateless/00202_cross_join.reference index 122cf0a6e06..a8db281730a 100644 --- a/tests/queries/0_stateless/00202_cross_join.reference +++ b/tests/queries/0_stateless/00202_cross_join.reference @@ -13,3 +13,33 @@ 2 2 2 3 2 4 +0 0 +0 1 +0 2 +0 3 +0 4 +1 0 +1 1 +1 2 +1 3 +1 4 +2 0 +2 1 +2 2 +2 3 +2 4 +0 0 +0 1 +0 2 +0 3 +0 4 +1 0 +1 1 +1 2 +1 3 +1 4 +2 0 +2 1 +2 2 +2 3 +2 4 diff --git a/tests/queries/0_stateless/00202_cross_join.sql b/tests/queries/0_stateless/00202_cross_join.sql index ed435d90021..8d62c56b3f1 100644 --- a/tests/queries/0_stateless/00202_cross_join.sql +++ b/tests/queries/0_stateless/00202_cross_join.sql @@ -1 +1,7 @@ SELECT x, y FROM (SELECT number AS x FROM system.numbers LIMIT 3) js1 CROSS JOIN (SELECT number AS y FROM system.numbers LIMIT 5) js2; + +SET join_algorithm = 'auto'; +SELECT x, y FROM (SELECT number AS x FROM system.numbers LIMIT 3) js1 CROSS JOIN (SELECT number AS y FROM system.numbers LIMIT 5) js2; + +SET allow_experimental_analyzer = 1; +SELECT x, y FROM (SELECT number AS x FROM system.numbers LIMIT 3) js1 CROSS JOIN (SELECT number AS y FROM system.numbers LIMIT 5) js2; diff --git a/tests/queries/0_stateless/00378_json_quote_64bit_integers.reference b/tests/queries/0_stateless/00378_json_quote_64bit_integers.reference index 5174c13a9e0..b8d51e5d078 100644 --- a/tests/queries/0_stateless/00378_json_quote_64bit_integers.reference +++ b/tests/queries/0_stateless/00378_json_quote_64bit_integers.reference @@ -48,10 +48,10 @@ { "i0": "0", "u0": "0", - "ip": "9223372036854775807", - "in": "-9223372036854775808", - "up": "18446744073709551615", - "arr": ["0"], + "ip": "0", + "in": "0", + "up": "0", + "arr": [], "tuple": ["0","0"] }, @@ -119,7 +119,7 @@ ["0", "0", "9223372036854775807", "-9223372036854775808", "18446744073709551615", ["0"], ["0","0"]] ], - "totals": ["0", "0", "9223372036854775807", "-9223372036854775808", "18446744073709551615", ["0"], ["0","0"]], + "totals": ["0", "0", "0", "0", "0", [], ["0","0"]], "extremes": { @@ -180,10 +180,10 @@ { "i0": 0, "u0": 0, - "ip": 9223372036854775807, - "in": -9223372036854775808, - "up": 18446744073709551615, - "arr": [0], + "ip": 0, + "in": 0, + "up": 0, + "arr": [], "tuple": [0,0] }, @@ -251,7 +251,7 @@ [0, 0, 9223372036854775807, -9223372036854775808, 18446744073709551615, [0], [0,0]] ], - "totals": [0, 0, 9223372036854775807, -9223372036854775808, 18446744073709551615, [0], [0,0]], + "totals": [0, 0, 0, 0, 0, [], [0,0]], "extremes": { diff --git a/tests/queries/0_stateless/00378_json_quote_64bit_integers.sql b/tests/queries/0_stateless/00378_json_quote_64bit_integers.sql index 3a70b64bc86..e7b59bc3f7f 100644 --- a/tests/queries/0_stateless/00378_json_quote_64bit_integers.sql +++ b/tests/queries/0_stateless/00378_json_quote_64bit_integers.sql @@ -2,6 +2,7 @@ SET output_format_write_statistics = 0; SET extremes = 1; +SET allow_experimental_analyzer = 1; SET output_format_json_quote_64bit_integers = 1; SELECT toInt64(0) as i0, toUInt64(0) as u0, toInt64(9223372036854775807) as ip, toInt64(-9223372036854775808) as in, toUInt64(18446744073709551615) as up, [toInt64(0)] as arr, (toUInt64(0), toUInt64(0)) as tuple GROUP BY i0, u0, ip, in, up, arr, tuple WITH TOTALS FORMAT JSON; diff --git a/tests/queries/0_stateless/00386_has_column_in_table.sql b/tests/queries/0_stateless/00386_has_column_in_table.sql index d543bb42ca7..7347293e05b 100644 --- a/tests/queries/0_stateless/00386_has_column_in_table.sql +++ b/tests/queries/0_stateless/00386_has_column_in_table.sql @@ -21,11 +21,11 @@ SELECT hasColumnInTable('localhost', currentDatabase(), 'has_column_in_table', ' SELECT hasColumnInTable('system', 'one', ''); /* bad queries */ -SELECT hasColumnInTable('', '', ''); -- { serverError 60; } -SELECT hasColumnInTable('', 't', 'c'); -- { serverError 81; } -SELECT hasColumnInTable(currentDatabase(), '', 'c'); -- { serverError 60; } -SELECT hasColumnInTable('d', 't', 's'); -- { serverError 81; } -SELECT hasColumnInTable(currentDatabase(), 't', 's'); -- { serverError 60; } +SELECT hasColumnInTable('', '', ''); -- { serverError 60 } +SELECT hasColumnInTable('', 't', 'c'); -- { serverError 81 } +SELECT hasColumnInTable(currentDatabase(), '', 'c'); -- { serverError 60 } +SELECT hasColumnInTable('d', 't', 's'); -- { serverError 81 } +SELECT hasColumnInTable(currentDatabase(), 't', 's'); -- { serverError 60 } DROP TABLE has_column_in_table; diff --git a/tests/queries/0_stateless/00445_join_nullable_keys.reference b/tests/queries/0_stateless/00445_join_nullable_keys.reference index afc8003910c..cc1c06d593b 100644 --- a/tests/queries/0_stateless/00445_join_nullable_keys.reference +++ b/tests/queries/0_stateless/00445_join_nullable_keys.reference @@ -22,13 +22,13 @@ 13 13 14 14 \N 8 -0 0 -0 2 -0 4 -0 6 -0 8 1 1 3 3 5 5 7 7 9 9 +\N 0 +\N 2 +\N 4 +\N 6 +\N 8 diff --git a/tests/queries/0_stateless/00445_join_nullable_keys.sql b/tests/queries/0_stateless/00445_join_nullable_keys.sql index a0453356e98..774594f90f3 100644 --- a/tests/queries/0_stateless/00445_join_nullable_keys.sql +++ b/tests/queries/0_stateless/00445_join_nullable_keys.sql @@ -1,3 +1,4 @@ +SET allow_experimental_analyzer = 1; SET join_use_nulls = 0; SET any_join_distinct_right_table_keys = 1; diff --git a/tests/queries/0_stateless/00534_filimonov.data b/tests/queries/0_stateless/00534_filimonov.data index 911a8e4d1f3..eb4500877e5 100644 --- a/tests/queries/0_stateless/00534_filimonov.data +++ b/tests/queries/0_stateless/00534_filimonov.data @@ -276,10 +276,14 @@ SELECT runningDifference(CAST( 0 AS Nullable(Int8))); SELECT runningDifference(CAST( 0 AS Nullable(Int16))); SELECT runningDifference(CAST( 0 AS Nullable(Int32))); SELECT runningDifference(CAST( 0 AS Nullable(Int64))); +SELECT runningDifference(CAST( 0 AS Nullable(Int128))); +SELECT runningDifference(CAST( 0 AS Nullable(Int256))); SELECT runningDifference(CAST( 0 AS Nullable(UInt8))); SELECT runningDifference(CAST( 0 AS Nullable(UInt16))); SELECT runningDifference(CAST( 0 AS Nullable(UInt32))); SELECT runningDifference(CAST( 0 AS Nullable(UInt64))); +SELECT runningDifference(CAST( 0 AS Nullable(UInt128))); +SELECT runningDifference(CAST( 0 AS Nullable(UInt256))); SELECT runningDifference(CAST( 0 AS Nullable(Float32))); SELECT runningDifference(CAST( 0 AS Nullable(Float64))); SELECT runningDifference(CAST( 0 AS Nullable(Date))); @@ -288,10 +292,14 @@ SELECT runningDifference(CAST(NULL AS Nullable(Int8))); SELECT runningDifference(CAST(NULL AS Nullable(Int16))); SELECT runningDifference(CAST(NULL AS Nullable(Int32))); SELECT runningDifference(CAST(NULL AS Nullable(Int64))); +SELECT runningDifference(CAST(NULL AS Nullable(Int128))); +SELECT runningDifference(CAST(NULL AS Nullable(Int256))); SELECT runningDifference(CAST(NULL AS Nullable(UInt8))); SELECT runningDifference(CAST(NULL AS Nullable(UInt16))); SELECT runningDifference(CAST(NULL AS Nullable(UInt32))); SELECT runningDifference(CAST(NULL AS Nullable(UInt64))); +SELECT runningDifference(CAST(NULL AS Nullable(UInt128))); +SELECT runningDifference(CAST(NULL AS Nullable(UInt256)); SELECT runningDifference(CAST(NULL AS Nullable(Float32))); SELECT runningDifference(CAST(NULL AS Nullable(Float64))); SELECT runningDifference(CAST(NULL AS Nullable(Date))); diff --git a/tests/queries/0_stateless/00636_partition_key_parts_pruning.sh b/tests/queries/0_stateless/00636_partition_key_parts_pruning.sh index fdaecd87f53..7ec4d99f028 100755 --- a/tests/queries/0_stateless/00636_partition_key_parts_pruning.sh +++ b/tests/queries/0_stateless/00636_partition_key_parts_pruning.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} --query="SELECT '*** Single column partition key ***'" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS single_col_partition_key" -${CLICKHOUSE_CLIENT} --query="CREATE TABLE single_col_partition_key(x UInt32) ENGINE MergeTree ORDER BY x PARTITION BY intDiv(x, 10)" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE single_col_partition_key(x UInt32) ENGINE MergeTree ORDER BY x PARTITION BY intDiv(x, 10) SETTINGS index_granularity=4" ${CLICKHOUSE_CLIENT} --query="INSERT INTO single_col_partition_key VALUES (1), (2), (3), (4), (11), (12), (20)" diff --git a/tests/queries/0_stateless/00646_url_engine.python b/tests/queries/0_stateless/00646_url_engine.python index c955eaff643..5f3b7546dd5 100644 --- a/tests/queries/0_stateless/00646_url_engine.python +++ b/tests/queries/0_stateless/00646_url_engine.python @@ -82,6 +82,10 @@ class CSVHTTPServer(BaseHTTPRequestHandler): self.wfile.write((', '.join(row) + '\n').encode()) return + def do_HEAD(self): + self._set_headers() + return + def read_chunk(self): msg = '' while True: diff --git a/tests/queries/0_stateless/00653_running_difference.reference b/tests/queries/0_stateless/00653_running_difference.reference index e2833e0bb3e..624ce92ce0f 100644 --- a/tests/queries/0_stateless/00653_running_difference.reference +++ b/tests/queries/0_stateless/00653_running_difference.reference @@ -19,6 +19,30 @@ \N \N 2 +- +0 +1 +4 +5 +170141183460469231731687303715884105717 +- +0 +1 +4 +5 +170141183460469231731687303715884105718 +- +0 +1 +4 +5 +170141183460469231731687303715884105717 +- +0 +1 +4 +5 +170141183460469231731687303715884105718 --Date Difference-- \N \N diff --git a/tests/queries/0_stateless/00653_running_difference.sql b/tests/queries/0_stateless/00653_running_difference.sql index f2b4a7300b2..1f18cfc42a7 100644 --- a/tests/queries/0_stateless/00653_running_difference.sql +++ b/tests/queries/0_stateless/00653_running_difference.sql @@ -5,6 +5,14 @@ select '-'; select runningDifference(x) from (select arrayJoin([Null, 1]) as x); select '-'; select runningDifference(x) from (select arrayJoin([Null, Null, 1, 3, Null, Null, 5]) as x); +select '-'; +select runningDifference(x) from (select arrayJoin([0, 1, 5, 10, 170141183460469231731687303715884105727]::Array(UInt128)) as x); +select '-'; +select runningDifference(x) from (select arrayJoin([0, 1, 5, 10, 170141183460469231731687303715884105728]::Array(UInt256)) as x); +select '-'; +select runningDifference(x) from (select arrayJoin([0, 1, 5, 10, 170141183460469231731687303715884105727]::Array(Int128)) as x); +select '-'; +select runningDifference(x) from (select arrayJoin([0, 1, 5, 10, 170141183460469231731687303715884105728]::Array(Int256)) as x); select '--Date Difference--'; select runningDifference(x) from (select arrayJoin([Null, Null, toDate('1970-1-1'), toDate('1970-12-31'), Null, Null, toDate('2010-8-9')]) as x); select '-'; diff --git a/tests/queries/0_stateless/00718_format_datetime.sql b/tests/queries/0_stateless/00718_format_datetime.sql index 74ec03d83d3..3f8c927dfe7 100644 --- a/tests/queries/0_stateless/00718_format_datetime.sql +++ b/tests/queries/0_stateless/00718_format_datetime.sql @@ -1,14 +1,14 @@ SET send_logs_level = 'fatal'; -SELECT formatDateTime(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH (42) } -SELECT formatDateTime('not a datetime', 'IGNORED'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT (43) } -SELECT formatDateTime(now(), now()); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT (43) } -SELECT formatDateTime(now(), 'good format pattern', now()); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT (43) } -SELECT formatDateTime(now(), 'unescaped %'); -- { serverError BAD_ARGUMENTS (36) } -SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%U'); -- { serverError NOT_IMPLEMENTED (48) } -SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%v'); -- { serverError NOT_IMPLEMENTED (48) } -SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%x'); -- { serverError NOT_IMPLEMENTED (48) } -SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%X'); -- { serverError NOT_IMPLEMENTED (48) } +SELECT formatDateTime(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT formatDateTime('not a datetime', 'IGNORED'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT formatDateTime(now(), now()); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT formatDateTime(now(), 'good format pattern', now()); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT formatDateTime(now(), 'unescaped %'); -- { serverError BAD_ARGUMENTS } +SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%U'); -- { serverError NOT_IMPLEMENTED } +SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%v'); -- { serverError NOT_IMPLEMENTED } +SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%x'); -- { serverError NOT_IMPLEMENTED } +SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%X'); -- { serverError NOT_IMPLEMENTED } SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%a'), formatDateTime(toDate32('2018-01-02'), '%a'); SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%b'), formatDateTime(toDate32('2018-01-02'), '%b'); diff --git a/tests/queries/0_stateless/00722_inner_join.reference b/tests/queries/0_stateless/00722_inner_join.reference index 86c07e6e84e..b5e8a77a20d 100644 --- a/tests/queries/0_stateless/00722_inner_join.reference +++ b/tests/queries/0_stateless/00722_inner_join.reference @@ -16,24 +16,24 @@ ┌─x──────┬─name─┐ │ system │ one │ └────────┴──────┘ -┌─database─┬─t.name─┐ -│ system │ one │ -└──────────┴────────┘ +┌─database─┬─name─┐ +│ system │ one │ +└──────────┴──────┘ ┌─db.x───┬─name─┐ │ system │ one │ └────────┴──────┘ -┌─db.name─┬─t.name─┐ -│ system │ one │ -└─────────┴────────┘ -┌─db.name─┬─t.name─┐ -│ system │ one │ -└─────────┴────────┘ -┌─t.database─┬─t.name─┐ -│ system │ one │ -└────────────┴────────┘ -┌─database─┬─t.name─┐ -│ system │ one │ -└──────────┴────────┘ +┌─db.name─┬─name─┐ +│ system │ one │ +└─────────┴──────┘ +┌─db.name─┬─name─┐ +│ system │ one │ +└─────────┴──────┘ +┌─database─┬─name─┐ +│ system │ one │ +└──────────┴──────┘ +┌─database─┬─name─┐ +│ system │ one │ +└──────────┴──────┘ 2 2 2 diff --git a/tests/queries/0_stateless/00722_inner_join.sql b/tests/queries/0_stateless/00722_inner_join.sql index 75ef40ff2b7..0d5a543b99d 100644 --- a/tests/queries/0_stateless/00722_inner_join.sql +++ b/tests/queries/0_stateless/00722_inner_join.sql @@ -1,3 +1,5 @@ +-- Tags: no-parallel + SET allow_experimental_analyzer = 1; DROP TABLE IF EXISTS one; diff --git a/tests/queries/0_stateless/00848_join_use_nulls_segfault.reference b/tests/queries/0_stateless/00848_join_use_nulls_segfault.reference index 6bfe0db1448..43f48089b06 100644 --- a/tests/queries/0_stateless/00848_join_use_nulls_segfault.reference +++ b/tests/queries/0_stateless/00848_join_use_nulls_segfault.reference @@ -10,13 +10,13 @@ l \N \N String Nullable(String) \N \N \N \N using -l \N String Nullable(String) - \N String Nullable(String) -l \N String Nullable(String) +l \N Nullable(String) Nullable(String) +l \N Nullable(String) Nullable(String) +\N \N Nullable(String) Nullable(String) +\N \N Nullable(String) Nullable(String) +l \N Nullable(String) Nullable(String) +l \N Nullable(String) Nullable(String) \N \N Nullable(String) Nullable(String) -l \N String Nullable(String) - \N String Nullable(String) -l \N String Nullable(String) \N \N Nullable(String) Nullable(String) \N \N \N \N @@ -32,13 +32,13 @@ l \N \N Nullable(String) Nullable(String) \N \N \N \N using + join_use_nulls -l \N String Nullable(String) l \N Nullable(String) Nullable(String) -\N \N Nullable(String) Nullable(String) -\N \N Nullable(String) Nullable(String) -l \N String Nullable(String) l \N Nullable(String) Nullable(String) -\N \N Nullable(String) Nullable(String) -\N \N Nullable(String) Nullable(String) +r \N Nullable(String) Nullable(String) +r \N Nullable(String) Nullable(String) +l \N Nullable(String) Nullable(String) +l \N Nullable(String) Nullable(String) +r \N Nullable(String) Nullable(String) +r \N Nullable(String) Nullable(String) \N \N \N \N diff --git a/tests/queries/0_stateless/00848_join_use_nulls_segfault.sql b/tests/queries/0_stateless/00848_join_use_nulls_segfault.sql index 57eca0eb9e0..2f6cca0284c 100644 --- a/tests/queries/0_stateless/00848_join_use_nulls_segfault.sql +++ b/tests/queries/0_stateless/00848_join_use_nulls_segfault.sql @@ -1,4 +1,5 @@ SET any_join_distinct_right_table_keys = 1; +SET allow_experimental_analyzer = 1; DROP TABLE IF EXISTS t1_00848; DROP TABLE IF EXISTS t2_00848; @@ -53,16 +54,16 @@ SELECT t3.id = 'l', t3.not_id = 'l' FROM t1_00848 t1 LEFT JOIN t3_00848 t3 ON t1 SELECT 'using + join_use_nulls'; -SELECT *, toTypeName(t1.id), toTypeName(t3.id) FROM t1_00848 t1 ANY LEFT JOIN t3_00848 t3 USING(id) ORDER BY t1.id, t3.id; -SELECT *, toTypeName(t1.id), toTypeName(t3.id) FROM t1_00848 t1 ANY FULL JOIN t3_00848 t3 USING(id) ORDER BY t1.id, t3.id; -SELECT *, toTypeName(t2.id), toTypeName(t3.id) FROM t2_00848 t2 ANY FULL JOIN t3_00848 t3 USING(id) ORDER BY t2.id, t3.id; +SELECT *, toTypeName(t1.id), toTypeName(t3.id) FROM t1_00848 t1 ANY LEFT JOIN t3_00848 t3 USING(id) ORDER BY id; +SELECT *, toTypeName(t1.id), toTypeName(t3.id) FROM t1_00848 t1 ANY FULL JOIN t3_00848 t3 USING(id) ORDER BY id; +SELECT *, toTypeName(t2.id), toTypeName(t3.id) FROM t2_00848 t2 ANY FULL JOIN t3_00848 t3 USING(id) ORDER BY id; -SELECT *, toTypeName(t1.id), toTypeName(t3.id) FROM t1_00848 t1 LEFT JOIN t3_00848 t3 USING(id) ORDER BY t1.id, t3.id; -SELECT *, toTypeName(t1.id), toTypeName(t3.id) FROM t1_00848 t1 FULL JOIN t3_00848 t3 USING(id) ORDER BY t1.id, t3.id; -SELECT *, toTypeName(t2.id), toTypeName(t3.id) FROM t2_00848 t2 FULL JOIN t3_00848 t3 USING(id) ORDER BY t2.id, t3.id; +SELECT *, toTypeName(t1.id), toTypeName(t3.id) FROM t1_00848 t1 LEFT JOIN t3_00848 t3 USING(id) ORDER BY id; +SELECT *, toTypeName(t1.id), toTypeName(t3.id) FROM t1_00848 t1 FULL JOIN t3_00848 t3 USING(id) ORDER BY id; +SELECT *, toTypeName(t2.id), toTypeName(t3.id) FROM t2_00848 t2 FULL JOIN t3_00848 t3 USING(id) ORDER BY id; -SELECT t3.id = 'l', t3.not_id = 'l' FROM t1_00848 t1 ANY LEFT JOIN t3_00848 t3 USING(id) ORDER BY t1.id, t3.id; -SELECT t3.id = 'l', t3.not_id = 'l' FROM t1_00848 t1 LEFT JOIN t3_00848 t3 USING(id) ORDER BY t1.id, t3.id; +SELECT t3.id = 'l', t3.not_id = 'l' FROM t1_00848 t1 ANY LEFT JOIN t3_00848 t3 USING(id) ORDER BY id; +SELECT t3.id = 'l', t3.not_id = 'l' FROM t1_00848 t1 LEFT JOIN t3_00848 t3 USING(id) ORDER BY id; DROP TABLE t1_00848; DROP TABLE t2_00848; diff --git a/tests/queries/0_stateless/00853_join_with_nulls_crash.reference b/tests/queries/0_stateless/00853_join_with_nulls_crash.reference index 459b73acdbf..5df14d02d5e 100644 --- a/tests/queries/0_stateless/00853_join_with_nulls_crash.reference +++ b/tests/queries/0_stateless/00853_join_with_nulls_crash.reference @@ -15,5 +15,5 @@ bar bar 1 2 String Nullable(String) \N 0 1 String Nullable(String) foo 2 0 String bar 1 2 String -test 0 1 String + 0 1 String 0 1 String diff --git a/tests/queries/0_stateless/00853_join_with_nulls_crash.sql b/tests/queries/0_stateless/00853_join_with_nulls_crash.sql index c63c2d99cba..b620b8a7902 100644 --- a/tests/queries/0_stateless/00853_join_with_nulls_crash.sql +++ b/tests/queries/0_stateless/00853_join_with_nulls_crash.sql @@ -27,7 +27,7 @@ SELECT s1.other, s2.other, count_a, count_b, toTypeName(s1.other), toTypeName(s2 ( SELECT other, count() AS count_a FROM table_a GROUP BY other ) s1 ALL FULL JOIN ( SELECT other, count() AS count_b FROM table_b GROUP BY other ) s2 -USING other +ON s1.other = s2.other ORDER BY s2.other DESC, count_a, s1.other; SELECT s1.something, s2.something, count_a, count_b, toTypeName(s1.something), toTypeName(s2.something) FROM @@ -41,7 +41,7 @@ SELECT s1.something, s2.something, count_a, count_b, toTypeName(s1.something), t ( SELECT something, count() AS count_a FROM table_a GROUP BY something ) s1 ALL RIGHT JOIN ( SELECT something, count() AS count_b FROM table_b GROUP BY something ) s2 -USING (something) +ON s1.something = s2.something ORDER BY count_a DESC, s1.something, s2.something; SET joined_subquery_requires_alias = 0; @@ -50,7 +50,7 @@ SELECT something, count_a, count_b, toTypeName(something) FROM ( SELECT something, count() AS count_a FROM table_a GROUP BY something ) as s1 ALL FULL JOIN ( SELECT something, count() AS count_b FROM table_b GROUP BY something ) as s2 -USING (something) +ON s1.something = s2.something ORDER BY count_a DESC, something DESC; DROP TABLE table_a; diff --git a/tests/queries/0_stateless/00858_issue_4756.reference b/tests/queries/0_stateless/00858_issue_4756.reference index d00491fd7e5..e8183f05f5d 100644 --- a/tests/queries/0_stateless/00858_issue_4756.reference +++ b/tests/queries/0_stateless/00858_issue_4756.reference @@ -1 +1,3 @@ 1 +1 +1 diff --git a/tests/queries/0_stateless/00858_issue_4756.sql b/tests/queries/0_stateless/00858_issue_4756.sql index 3da0766c4e9..9eacd5ef364 100644 --- a/tests/queries/0_stateless/00858_issue_4756.sql +++ b/tests/queries/0_stateless/00858_issue_4756.sql @@ -1,3 +1,4 @@ +set allow_experimental_analyzer = 1; set distributed_product_mode = 'local'; drop table if exists shard1; @@ -21,7 +22,7 @@ where distr1.id in from distr1 join distr2 on distr1.id = distr2.id where distr1.id > 0 -); -- { serverError 288 } +); select distinct(d0.id) from distr1 d0 where d0.id in @@ -32,15 +33,14 @@ where d0.id in where d1.id > 0 ); --- TODO ---select distinct(distr1.id) from distr1 ---where distr1.id in ---( --- select distr1.id --- from distr1 as d1 --- join distr2 as d2 on distr1.id = distr2.id --- where distr1.id > 0 ---); +select distinct(distr1.id) from distr1 +where distr1.id in +( + select distr1.id + from distr1 as d1 + join distr2 as d2 on distr1.id = distr2.id + where distr1.id > 0 +); drop table shard1; drop table shard2; diff --git a/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.reference b/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.reference index f1839bae259..e142c6c79fe 100644 --- a/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.reference +++ b/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.reference @@ -1 +1,3 @@ +99999 +99999 0 0 13 diff --git a/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.sh b/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.sh index 390d6a70ef1..7bf4a88e972 100755 --- a/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.sh +++ b/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.sh @@ -13,15 +13,24 @@ $CLICKHOUSE_CLIENT --query="CREATE TABLE small_table (a UInt64 default 0, n UInt $CLICKHOUSE_CLIENT --query="INSERT INTO small_table (n) SELECT * from system.numbers limit 100000;" $CLICKHOUSE_CLIENT --query="OPTIMIZE TABLE small_table FINAL;" -cached_query="SELECT count() FROM small_table where n > 0;" +cached_query="SELECT count() FROM small_table WHERE n > 0;" -$CLICKHOUSE_CLIENT --use_uncompressed_cache=1 --query="$cached_query" &> /dev/null - -$CLICKHOUSE_CLIENT --use_uncompressed_cache=1 --allow_prefetched_read_pool_for_remote_filesystem=0 --allow_prefetched_read_pool_for_local_filesystem=0 --query_id="test-query-uncompressed-cache" --query="$cached_query" &> /dev/null +$CLICKHOUSE_CLIENT --log_queries 1 --use_uncompressed_cache 1 --query="$cached_query" +$CLICKHOUSE_CLIENT --log_queries 1 --use_uncompressed_cache 1 --allow_prefetched_read_pool_for_remote_filesystem 0 --allow_prefetched_read_pool_for_local_filesystem 0 --query_id="test-query-uncompressed-cache" --query="$cached_query" $CLICKHOUSE_CLIENT --query="SYSTEM FLUSH LOGS" - -$CLICKHOUSE_CLIENT --query="SELECT ProfileEvents['Seek'], ProfileEvents['ReadCompressedBytes'], ProfileEvents['UncompressedCacheHits'] AS hit FROM system.query_log WHERE (query_id = 'test-query-uncompressed-cache') and current_database = currentDatabase() AND (type = 2) AND event_date >= yesterday() ORDER BY event_time DESC LIMIT 1" +$CLICKHOUSE_CLIENT --query=" + SELECT + ProfileEvents['Seek'], + ProfileEvents['ReadCompressedBytes'], + ProfileEvents['UncompressedCacheHits'] AS hit + FROM system.query_log + WHERE query_id = 'test-query-uncompressed-cache' + AND current_database = currentDatabase() + AND type = 2 + AND event_date >= yesterday() + ORDER BY event_time DESC + LIMIT 1" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS small_table" diff --git a/tests/queries/0_stateless/00975_values_list.sql b/tests/queries/0_stateless/00975_values_list.sql index 40c86898966..35afc99e93e 100644 --- a/tests/queries/0_stateless/00975_values_list.sql +++ b/tests/queries/0_stateless/00975_values_list.sql @@ -12,8 +12,8 @@ SELECT * FROM VALUES('n UInt64, s String, ss String', (1 + 22, '23', toString(23 SELECT * FROM VALUES('a Decimal(4, 4), b String, c String', (divide(toDecimal32(5, 3), 3), 'a', 'b')); -SELECT * FROM VALUES('x Float64', toUInt64(-1)); -- { serverError 69; } -SELECT * FROM VALUES('x Float64', NULL); -- { serverError 53; } +SELECT * FROM VALUES('x Float64', toUInt64(-1)); -- { serverError 69 } +SELECT * FROM VALUES('x Float64', NULL); -- { serverError 53 } SELECT * FROM VALUES('x Nullable(Float64)', NULL); DROP TABLE values_list; diff --git a/tests/queries/0_stateless/00988_expansion_aliases_limit.sql b/tests/queries/0_stateless/00988_expansion_aliases_limit.sql index 15c9f82da6f..e78ccf56093 100644 --- a/tests/queries/0_stateless/00988_expansion_aliases_limit.sql +++ b/tests/queries/0_stateless/00988_expansion_aliases_limit.sql @@ -1 +1,3 @@ -SELECT 1 AS a, a + a AS b, b + b AS c, c + c AS d, d + d AS e, e + e AS f, f + f AS g, g + g AS h, h + h AS i, i + i AS j, j + j AS k, k + k AS l, l + l AS m, m + m AS n, n + n AS o, o + o AS p, p + p AS q, q + q AS r, r + r AS s, s + s AS t, t + t AS u, u + u AS v, v + v AS w, w + w AS x, x + x AS y, y + y AS z; -- { serverError 168 } +SET allow_experimental_analyzer = 1; + +SELECT 1 AS a, a + a AS b, b + b AS c, c + c AS d, d + d AS e, e + e AS f, f + f AS g, g + g AS h, h + h AS i, i + i AS j, j + j AS k, k + k AS l, l + l AS m, m + m AS n, n + n AS o, o + o AS p, p + p AS q, q + q AS r, r + r AS s, s + s AS t, t + t AS u, u + u AS v, v + v AS w, w + w AS x, x + x AS y, y + y AS z; -- { serverError 36 } diff --git a/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql b/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql index 7804ce32a5a..f9f30b44700 100644 --- a/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql +++ b/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql @@ -3,7 +3,10 @@ SET max_memory_usage = 32000000; SET join_on_disk_max_files_to_merge = 4; -SELECT number * 200000 as n, j FROM numbers(5) nums +SELECT n, j FROM +( + SELECT number * 200000 as n FROM numbers(5) +) nums ANY LEFT JOIN ( SELECT number * 2 AS n, number AS j FROM numbers(1000000) @@ -13,14 +16,20 @@ USING n; -- { serverError 241 } SET join_algorithm = 'partial_merge'; SET default_max_bytes_in_join = 0; -SELECT number * 200000 as n, j FROM numbers(5) nums +SELECT n, j FROM +( + SELECT number * 200000 as n FROM numbers(5) +) nums ANY LEFT JOIN ( SELECT number * 2 AS n, number AS j FROM numbers(1000000) ) js2 USING n; -- { serverError 12 } -SELECT number * 200000 as n, j FROM numbers(5) nums +SELECT n, j FROM +( + SELECT number * 200000 as n FROM numbers(5) +) nums ANY LEFT JOIN ( SELECT number * 2 AS n, number AS j FROM numbers(1000000) @@ -28,7 +37,10 @@ ANY LEFT JOIN ( USING n SETTINGS max_bytes_in_join = 30000000; -- { serverError 241 } -SELECT number * 200000 as n, j FROM numbers(5) nums +SELECT n, j FROM +( + SELECT number * 200000 as n FROM numbers(5) +) nums ANY LEFT JOIN ( SELECT number * 2 AS n, number AS j FROM numbers(1000000) @@ -39,7 +51,10 @@ SETTINGS max_bytes_in_join = 10000000; SET partial_merge_join_optimizations = 1; -SELECT number * 200000 as n, j FROM numbers(5) nums +SELECT n, j FROM +( + SELECT number * 200000 as n FROM numbers(5) +) nums LEFT JOIN ( SELECT number * 2 AS n, number AS j FROM numbers(1000000) @@ -50,7 +65,10 @@ SETTINGS max_rows_in_join = 100000; SET default_max_bytes_in_join = 10000000; -SELECT number * 200000 as n, j FROM numbers(5) nums +SELECT n, j FROM +( + SELECT number * 200000 as n FROM numbers(5) +) nums JOIN ( SELECT number * 2 AS n, number AS j FROM numbers(1000000) diff --git a/tests/queries/0_stateless/01018_ambiguous_column.reference b/tests/queries/0_stateless/01018_ambiguous_column.reference index a2a1d6ea4f6..308726fa184 100644 --- a/tests/queries/0_stateless/01018_ambiguous_column.reference +++ b/tests/queries/0_stateless/01018_ambiguous_column.reference @@ -1,12 +1,15 @@ 0 0 0 0 +0 0 0 0 0 0 -┌─one.dummy─┬─A.dummy─┬─B.dummy─┐ -│ 0 │ 0 │ 0 │ -└───────────┴─────────┴─────────┘ +0 +0 +┌─system.one.dummy─┬─A.dummy─┬─B.dummy─┐ +│ 0 │ 0 │ 0 │ +└──────────────────┴─────────┴─────────┘ ┌─A.dummy─┬─one.dummy─┬─two.dummy─┐ │ 0 │ 0 │ 0 │ └─────────┴───────────┴───────────┘ diff --git a/tests/queries/0_stateless/01018_ambiguous_column.sql b/tests/queries/0_stateless/01018_ambiguous_column.sql index 54603aab810..620bdb6ba3f 100644 --- a/tests/queries/0_stateless/01018_ambiguous_column.sql +++ b/tests/queries/0_stateless/01018_ambiguous_column.sql @@ -1,4 +1,6 @@ -select * from system.one cross join system.one; -- { serverError 352 } +SET allow_experimental_analyzer = 1; + +select * from system.one cross join system.one; select * from system.one cross join system.one r; select * from system.one l cross join system.one; select * from system.one left join system.one using dummy; @@ -8,10 +10,10 @@ USE system; SELECT dummy FROM one AS A JOIN one ON A.dummy = one.dummy; SELECT dummy FROM one JOIN one AS A ON A.dummy = one.dummy; -SELECT dummy FROM one l JOIN one r ON dummy = r.dummy; -- { serverError 352 } -SELECT dummy FROM one l JOIN one r ON l.dummy = dummy; -- { serverError 352 } -SELECT dummy FROM one l JOIN one r ON one.dummy = r.dummy; -- { serverError 352 } -SELECT dummy FROM one l JOIN one r ON l.dummy = one.dummy; -- { serverError 352 } +SELECT dummy FROM one l JOIN one r ON dummy = r.dummy; +SELECT dummy FROM one l JOIN one r ON l.dummy = dummy; -- { serverError 403 } +SELECT dummy FROM one l JOIN one r ON one.dummy = r.dummy; +SELECT dummy FROM one l JOIN one r ON l.dummy = one.dummy; -- { serverError 403 } SELECT * from one JOIN one A ON one.dummy = A.dummy diff --git a/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.reference b/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.reference index c89fe48d9f9..8d40aebacf2 100644 --- a/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.reference +++ b/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.reference @@ -1,5 +1,5 @@ 122 -Table dictdb_01041_01040.dict_invalidate doesn\'t exist +1 133 diff --git a/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.sh b/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.sh index 7249d5e1a82..6856f952a47 100755 --- a/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.sh +++ b/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.sh @@ -53,7 +53,7 @@ function check_exception_detected() export -f check_exception_detected; timeout 30 bash -c check_exception_detected 2> /dev/null -$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" 2>&1 | grep -Eo "Table dictdb_01041_01040.dict_invalidate .* exist" +$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" 2>&1 | grep -Eo "dictdb_01041_01040.dict_invalidate.*UNKNOWN_TABLE" | wc -l $CLICKHOUSE_CLIENT --query " CREATE TABLE dictdb_01041_01040.dict_invalidate diff --git a/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql b/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql index 2d9911287a3..bf1ac254783 100644 --- a/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql +++ b/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql @@ -1,5 +1,6 @@ -- Tags: no-parallel +SET allow_experimental_analyzer = 0; SET allow_experimental_window_view = 1; DROP DATABASE IF EXISTS test_01047; set allow_deprecated_database_ordinary=1; diff --git a/tests/queries/0_stateless/01048_window_view_parser.sql b/tests/queries/0_stateless/01048_window_view_parser.sql index 4c329f99f6e..f87d9aa023e 100644 --- a/tests/queries/0_stateless/01048_window_view_parser.sql +++ b/tests/queries/0_stateless/01048_window_view_parser.sql @@ -1,5 +1,6 @@ -- Tags: no-parallel +SET allow_experimental_analyzer = 0; SET allow_experimental_window_view = 1; DROP DATABASE IF EXISTS test_01048; set allow_deprecated_database_ordinary=1; diff --git a/tests/queries/0_stateless/01050_window_view_parser_tumble.sql b/tests/queries/0_stateless/01050_window_view_parser_tumble.sql index d9604bb2b52..f49fbc251fd 100644 --- a/tests/queries/0_stateless/01050_window_view_parser_tumble.sql +++ b/tests/queries/0_stateless/01050_window_view_parser_tumble.sql @@ -1,3 +1,4 @@ +SET allow_experimental_analyzer = 0; SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01051_window_view_parser_hop.sql b/tests/queries/0_stateless/01051_window_view_parser_hop.sql index 472dc66f1a2..45877cf0647 100644 --- a/tests/queries/0_stateless/01051_window_view_parser_hop.sql +++ b/tests/queries/0_stateless/01051_window_view_parser_hop.sql @@ -1,3 +1,4 @@ +SET allow_experimental_analyzer = 0; SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh b/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh index 9fdc66191d7..e75b7d9570b 100755 --- a/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh +++ b/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh @@ -4,7 +4,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --multiquery <&1| grep -Fa "Exception: " | grep -Fv "REPLICA_ALREADY_EXISTS" | grep -Fiv "Will not try to start it up" | \ grep -Fv "Coordination::Exception" | grep -Fv "already contains some data and it does not look like Replicated database path" sleep 0.$RANDOM diff --git a/tests/queries/0_stateless/01211_optimize_skip_unused_shards_type_mismatch.sql b/tests/queries/0_stateless/01211_optimize_skip_unused_shards_type_mismatch.sql index 65adaf3ad71..de41132df62 100644 --- a/tests/queries/0_stateless/01211_optimize_skip_unused_shards_type_mismatch.sql +++ b/tests/queries/0_stateless/01211_optimize_skip_unused_shards_type_mismatch.sql @@ -9,7 +9,7 @@ create table data_02000 (key Int) Engine=Null(); create table dist_02000 as data_02000 Engine=Distributed(test_cluster_two_shards, currentDatabase(), data_02000, key); select * from data_02000 where key = 0xdeadbeafdeadbeaf; -select * from dist_02000 where key = 0xdeadbeafdeadbeaf settings force_optimize_skip_unused_shards=2; -- { serverError 507; } +select * from dist_02000 where key = 0xdeadbeafdeadbeaf settings force_optimize_skip_unused_shards=2; -- { serverError 507 } select * from dist_02000 where key = 0xdeadbeafdeadbeaf; drop table data_02000; diff --git a/tests/queries/0_stateless/01225_drop_dictionary_as_table.sql b/tests/queries/0_stateless/01225_drop_dictionary_as_table.sql index 513ecbd4ed4..be2f7b2a9bf 100644 --- a/tests/queries/0_stateless/01225_drop_dictionary_as_table.sql +++ b/tests/queries/0_stateless/01225_drop_dictionary_as_table.sql @@ -16,7 +16,7 @@ LAYOUT(FLAT()); SYSTEM RELOAD DICTIONARY dict_db_01225.dict; -DROP TABLE dict_db_01225.dict; -- { serverError 520; } +DROP TABLE dict_db_01225.dict; -- { serverError 520 } DROP DICTIONARY dict_db_01225.dict; DROP DATABASE dict_db_01225; diff --git a/tests/queries/0_stateless/01225_show_create_table_from_dictionary.sql b/tests/queries/0_stateless/01225_show_create_table_from_dictionary.sql index 09cde642ed2..bc733a0c546 100644 --- a/tests/queries/0_stateless/01225_show_create_table_from_dictionary.sql +++ b/tests/queries/0_stateless/01225_show_create_table_from_dictionary.sql @@ -18,7 +18,7 @@ LIFETIME(MIN 0 MAX 0) LAYOUT(FLAT()); SHOW CREATE TABLE dict_db_01225_dictionary.`dict_db_01225.dict` FORMAT TSVRaw; -SHOW CREATE TABLE dict_db_01225_dictionary.`dict_db_01225.no_such_dict`; -- { serverError 487; } +SHOW CREATE TABLE dict_db_01225_dictionary.`dict_db_01225.no_such_dict`; -- { serverError 487 } DROP DATABASE dict_db_01225; DROP DATABASE dict_db_01225_dictionary; diff --git a/tests/queries/0_stateless/01231_log_queries_min_type.sql b/tests/queries/0_stateless/01231_log_queries_min_type.sql index c2470bb9a56..0ed5e3e605c 100644 --- a/tests/queries/0_stateless/01231_log_queries_min_type.sql +++ b/tests/queries/0_stateless/01231_log_queries_min_type.sql @@ -15,7 +15,7 @@ select count() from system.query_log where current_database = currentDatabase() set max_rows_to_read='100K'; set log_queries_min_type='EXCEPTION_WHILE_PROCESSING'; -select '01231_log_queries_min_type/EXCEPTION_WHILE_PROCESSING', max(number) from system.numbers limit 1e6; -- { serverError 158; } +select '01231_log_queries_min_type/EXCEPTION_WHILE_PROCESSING', max(number) from system.numbers limit 1e6; -- { serverError 158 } set max_rows_to_read=0; system flush logs; select count() from system.query_log where current_database = currentDatabase() @@ -23,7 +23,7 @@ select count() from system.query_log where current_database = currentDatabase() and event_date >= yesterday() and type = 'ExceptionWhileProcessing'; set max_rows_to_read='100K'; -select '01231_log_queries_min_type w/ Settings/EXCEPTION_WHILE_PROCESSING', max(number) from system.numbers limit 1e6; -- { serverError 158; } +select '01231_log_queries_min_type w/ Settings/EXCEPTION_WHILE_PROCESSING', max(number) from system.numbers limit 1e6; -- { serverError 158 } system flush logs; set max_rows_to_read=0; select count() from system.query_log where diff --git a/tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.sh b/tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.sh index 0e258bbbb09..08cc97c84bf 100755 --- a/tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.sh +++ b/tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.sh @@ -76,7 +76,7 @@ insert into data_01278 select reinterpretAsString(number), // s6 reinterpretAsString(number), // s7 reinterpretAsString(number) // s8 -from numbers(100000); -- { serverError 241; }" > /dev/null 2>&1 +from numbers(100000); -- { serverError 241 }" > /dev/null 2>&1 local ret_code=$? if [[ $ret_code -eq 0 ]]; then diff --git a/tests/queries/0_stateless/01284_port.sql.j2 b/tests/queries/0_stateless/01284_port.sql.j2 index 6f78b3b8e3b..50e096c6deb 100644 --- a/tests/queries/0_stateless/01284_port.sql.j2 +++ b/tests/queries/0_stateless/01284_port.sql.j2 @@ -19,9 +19,9 @@ select port{{ suffix }}('http://127.0.0.1/', toUInt16(80)); select port{{ suffix }}('http://foobar.com/', toUInt16(80)); -- unsupported -/* ILLEGAL_TYPE_OF_ARGUMENT */ select port(toFixedString('', 1)); -- { serverError 43; } -/* ILLEGAL_TYPE_OF_ARGUMENT */ select port{{ suffix }}('', 1); -- { serverError 43; } -/* NUMBER_OF_ARGUMENTS_DOESNT_MATCH */ select port{{ suffix }}('', 1, 1); -- { serverError 42; } +/* ILLEGAL_TYPE_OF_ARGUMENT */ select port(toFixedString('', 1)); -- { serverError 43 } +/* ILLEGAL_TYPE_OF_ARGUMENT */ select port{{ suffix }}('', 1); -- { serverError 43 } +/* NUMBER_OF_ARGUMENTS_DOESNT_MATCH */ select port{{ suffix }}('', 1, 1); -- { serverError 42 } -- -- Known limitations of domain() (getURLHost()) diff --git a/tests/queries/0_stateless/01291_aggregation_in_order.reference b/tests/queries/0_stateless/01291_aggregation_in_order.reference index c072a8aed3e..cf058b9f2f5 100644 --- a/tests/queries/0_stateless/01291_aggregation_in_order.reference +++ b/tests/queries/0_stateless/01291_aggregation_in_order.reference @@ -22,8 +22,8 @@ 2 4 109 2 1 619 1 2 537 2 -1 619 1 -2 537 2 +-2 537 2 +-1 619 1 2019-05-05 00:00:00 -45363190 2019-05-05 00:00:00 -1249512288 2019-05-05 00:00:00 345522721 diff --git a/tests/queries/0_stateless/01291_aggregation_in_order.sql b/tests/queries/0_stateless/01291_aggregation_in_order.sql index c4357811520..e93eadc3329 100644 --- a/tests/queries/0_stateless/01291_aggregation_in_order.sql +++ b/tests/queries/0_stateless/01291_aggregation_in_order.sql @@ -14,7 +14,7 @@ SELECT a FROM pk_order GROUP BY a ORDER BY a; SELECT a, b, sum(c), avg(d) FROM pk_order GROUP BY a, b ORDER BY a, b; SELECT a, sum(c), avg(d) FROM pk_order GROUP BY a ORDER BY a; -SELECT a, sum(c), avg(d) FROM pk_order GROUP BY -a ORDER BY a; +SELECT -a, sum(c), avg(d) FROM pk_order GROUP BY -a ORDER BY -a; DROP TABLE IF EXISTS pk_order; @@ -27,7 +27,7 @@ INSERT INTO pk_order set max_block_size = 1; SELECT d, max(b) FROM pk_order GROUP BY d, a ORDER BY d, a LIMIT 5; -SELECT d, avg(a) FROM pk_order GROUP BY toString(d) ORDER BY toString(d) LIMIT 5; +SELECT toString(d), avg(a) FROM pk_order GROUP BY toString(d) ORDER BY toString(d) LIMIT 5; SELECT toStartOfHour(d) as d1, min(a), max(b) FROM pk_order GROUP BY d1 ORDER BY d1 LIMIT 5; DROP TABLE pk_order; diff --git a/tests/queries/0_stateless/01308_orc_output_format_arrays.reference b/tests/queries/0_stateless/01308_orc_output_format_arrays.reference index 1f9646ac112..7feea7cec35 100644 Binary files a/tests/queries/0_stateless/01308_orc_output_format_arrays.reference and b/tests/queries/0_stateless/01308_orc_output_format_arrays.reference differ diff --git a/tests/queries/0_stateless/01308_orc_output_format_arrays.sh b/tests/queries/0_stateless/01308_orc_output_format_arrays.sh index 1d9aea353b6..498854874cf 100755 --- a/tests/queries/0_stateless/01308_orc_output_format_arrays.sh +++ b/tests/queries/0_stateless/01308_orc_output_format_arrays.sh @@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT --query="CREATE TABLE orc (array1 Array(Int32), array2 Array( $CLICKHOUSE_CLIENT --query="INSERT INTO orc VALUES ([1,2,3,4,5], [[1,2], [3,4], [5]]), ([42], [[42, 42], [42]])"; -$CLICKHOUSE_CLIENT --query="SELECT * FROM orc FORMAT ORC"; +$CLICKHOUSE_CLIENT --query="SELECT * FROM orc FORMAT ORC SETTINGS output_format_orc_compression_method='none'" | md5sum; $CLICKHOUSE_CLIENT --query="DROP TABLE orc"; diff --git a/tests/queries/0_stateless/01375_GROUP_BY_injective_elimination_dictGet_BAD_ARGUMENTS.sql b/tests/queries/0_stateless/01375_GROUP_BY_injective_elimination_dictGet_BAD_ARGUMENTS.sql index 88a2b25c2db..8ff9cd2b9f2 100644 --- a/tests/queries/0_stateless/01375_GROUP_BY_injective_elimination_dictGet_BAD_ARGUMENTS.sql +++ b/tests/queries/0_stateless/01375_GROUP_BY_injective_elimination_dictGet_BAD_ARGUMENTS.sql @@ -1 +1 @@ -SELECT dictGetString(concat('default', '.countryId'), 'country', toUInt64(number)) AS country FROM numbers(2) GROUP BY country; -- { serverError 36; } +SELECT dictGetString(concat('default', '.countryId'), 'country', toUInt64(number)) AS country FROM numbers(2) GROUP BY country; -- { serverError 36 } diff --git a/tests/queries/0_stateless/01376_GROUP_BY_injective_elimination_dictGet.sql b/tests/queries/0_stateless/01376_GROUP_BY_injective_elimination_dictGet.sql index 258d96829a5..29ffcb46fbf 100644 --- a/tests/queries/0_stateless/01376_GROUP_BY_injective_elimination_dictGet.sql +++ b/tests/queries/0_stateless/01376_GROUP_BY_injective_elimination_dictGet.sql @@ -1,7 +1,7 @@ -- Tags: no-parallel -- https://github.com/ClickHouse/ClickHouse/issues/11469 -SELECT dictGet('default.countryId', 'country', toUInt64(number)) AS country FROM numbers(2) GROUP BY country; -- { serverError 36; } +SELECT dictGet('default.countryId', 'country', toUInt64(number)) AS country FROM numbers(2) GROUP BY country; -- { serverError 36 } -- with real dictionary diff --git a/tests/queries/0_stateless/01402_cast_nullable_string_to_enum.sql b/tests/queries/0_stateless/01402_cast_nullable_string_to_enum.sql index 3b53e593095..b8b5370515a 100644 --- a/tests/queries/0_stateless/01402_cast_nullable_string_to_enum.sql +++ b/tests/queries/0_stateless/01402_cast_nullable_string_to_enum.sql @@ -5,9 +5,9 @@ SELECT CAST(CAST(NULL AS Nullable(String)) AS Nullable(Enum8('Hello' = 1))); SELECT CAST(CAST(NULL AS Nullable(FixedString(1))) AS Nullable(Enum8('Hello' = 1))); -- empty string still not acceptable -SELECT CAST(CAST('' AS Nullable(String)) AS Nullable(Enum8('Hello' = 1))); -- { serverError 36; } -SELECT CAST(CAST('' AS Nullable(FixedString(1))) AS Nullable(Enum8('Hello' = 1))); -- { serverError 36; } +SELECT CAST(CAST('' AS Nullable(String)) AS Nullable(Enum8('Hello' = 1))); -- { serverError 36 } +SELECT CAST(CAST('' AS Nullable(FixedString(1))) AS Nullable(Enum8('Hello' = 1))); -- { serverError 36 } -- non-Nullable Enum() still not acceptable -SELECT CAST(CAST(NULL AS Nullable(String)) AS Enum8('Hello' = 1)); -- { serverError 349; } -SELECT CAST(CAST(NULL AS Nullable(FixedString(1))) AS Enum8('Hello' = 1)); -- { serverError 349; } +SELECT CAST(CAST(NULL AS Nullable(String)) AS Enum8('Hello' = 1)); -- { serverError 349 } +SELECT CAST(CAST(NULL AS Nullable(FixedString(1))) AS Enum8('Hello' = 1)); -- { serverError 349 } diff --git a/tests/queries/0_stateless/01404_roundUpToPowerOfTwoOrZero_safety.sql b/tests/queries/0_stateless/01404_roundUpToPowerOfTwoOrZero_safety.sql index 4ee6e1fa5e4..d61a35c9999 100644 --- a/tests/queries/0_stateless/01404_roundUpToPowerOfTwoOrZero_safety.sql +++ b/tests/queries/0_stateless/01404_roundUpToPowerOfTwoOrZero_safety.sql @@ -1,4 +1,4 @@ -- repeat() with this length and this number of rows will allocation huge enough region (MSB set), -- which will cause roundUpToPowerOfTwoOrZero() returns 0 for such allocation (before the fix), -- and later repeat() will try to use this memory and will got SIGSEGV. -SELECT repeat('0.0001048576', number * (number * (number * 255))) FROM numbers(65535); -- { serverError 131; } +SELECT repeat('0.0001048576', number * (number * (number * 255))) FROM numbers(65535); -- { serverError 131 } diff --git a/tests/queries/0_stateless/01407_lambda_arrayJoin.sql b/tests/queries/0_stateless/01407_lambda_arrayJoin.sql index 363b1d92dbb..e1b8c1d5a76 100644 --- a/tests/queries/0_stateless/01407_lambda_arrayJoin.sql +++ b/tests/queries/0_stateless/01407_lambda_arrayJoin.sql @@ -1,5 +1,5 @@ SELECT arrayFilter((a) -> ((a, arrayJoin([])) IN (Null, [Null])), []); SELECT arrayFilter((a) -> ((a, arrayJoin([[]])) IN (Null, [Null])), []); -SELECT * FROM system.one ARRAY JOIN arrayFilter((a) -> ((a, arrayJoin([])) IN (NULL)), []) AS arr_x; -- { serverError 43; } +SELECT * FROM system.one ARRAY JOIN arrayFilter((a) -> ((a, arrayJoin([])) IN (NULL)), []) AS arr_x; -- { serverError 43 } SELECT * FROM numbers(1) LEFT ARRAY JOIN arrayFilter((x_0, x_1) -> (arrayJoin([]) IN (NULL)), [], []) AS arr_x; diff --git a/tests/queries/0_stateless/01408_range_overflow.sql b/tests/queries/0_stateless/01408_range_overflow.sql index 2107e8c3f36..d26507f8358 100644 --- a/tests/queries/0_stateless/01408_range_overflow.sql +++ b/tests/queries/0_stateless/01408_range_overflow.sql @@ -1,7 +1,7 @@ -- executeGeneric() SELECT range(1025, 1048576 + 9223372036854775807, 9223372036854775807); SELECT range(1025, 1048576 + (9223372036854775807 AS i), i); -SELECT range(1025, 18446744073709551615, 1); -- { serverError 69; } +SELECT range(1025, 18446744073709551615, 1); -- { serverError 69 } -- executeConstStep() SELECT range(number, 1048576 + 9223372036854775807, 9223372036854775807) FROM system.numbers LIMIT 1 OFFSET 1025; diff --git a/tests/queries/0_stateless/01429_join_on_error_messages.sql b/tests/queries/0_stateless/01429_join_on_error_messages.sql index 9b8688c8415..23aed83a66f 100644 --- a/tests/queries/0_stateless/01429_join_on_error_messages.sql +++ b/tests/queries/0_stateless/01429_join_on_error_messages.sql @@ -15,7 +15,7 @@ SET join_algorithm = 'partial_merge'; SELECT 1 FROM (select 1 a) A JOIN (select 1 b, 1 c) B ON a = b OR a = c; -- { serverError 48 } -- works for a = b OR a = b because of equivalent disjunct optimization -SET join_algorithm = 'auto'; +SET join_algorithm = 'grace_hash'; SELECT 1 FROM (select 1 a) A JOIN (select 1 b, 1 c) B ON a = b OR a = c; -- { serverError 48 } -- works for a = b OR a = b because of equivalent disjunct optimization diff --git a/tests/queries/0_stateless/01455_opentelemetry_distributed.reference b/tests/queries/0_stateless/01455_opentelemetry_distributed.reference index d48b3738bc2..9343593e1ba 100644 --- a/tests/queries/0_stateless/01455_opentelemetry_distributed.reference +++ b/tests/queries/0_stateless/01455_opentelemetry_distributed.reference @@ -1,10 +1,10 @@ ===http=== -{"query":"select 1 from remote('127.0.0.2', system, one) format Null\n","status":"QueryFinish","tracestate":"some custom state","sorted_by_start_time":1} +{"query":"select 1 from remote('127.0.0.2', system, one) settings allow_experimental_analyzer = 1 format Null\n","status":"QueryFinish","tracestate":"some custom state","sorted_by_start_time":1} {"query":"DESC TABLE system.one","status":"QueryFinish","tracestate":"some custom state","sorted_by_start_time":1} -{"query":"SELECT 1 FROM `system`.`one`","status":"QueryFinish","tracestate":"some custom state","sorted_by_start_time":1} +{"query":"SELECT 1 AS `1` FROM `system`.`one`","status":"QueryFinish","tracestate":"some custom state","sorted_by_start_time":1} {"query":"DESC TABLE system.one","query_status":"QueryFinish","tracestate":"some custom state","sorted_by_finish_time":1} -{"query":"SELECT 1 FROM `system`.`one`","query_status":"QueryFinish","tracestate":"some custom state","sorted_by_finish_time":1} -{"query":"select 1 from remote('127.0.0.2', system, one) format Null\n","query_status":"QueryFinish","tracestate":"some custom state","sorted_by_finish_time":1} +{"query":"SELECT 1 AS `1` FROM `system`.`one`","query_status":"QueryFinish","tracestate":"some custom state","sorted_by_finish_time":1} +{"query":"select 1 from remote('127.0.0.2', system, one) settings allow_experimental_analyzer = 1 format Null\n","query_status":"QueryFinish","tracestate":"some custom state","sorted_by_finish_time":1} {"total spans":"3","unique spans":"3","unique non-zero parent spans":"3"} {"initial query spans with proper parent":"1"} {"unique non-empty tracestate values":"1"} @@ -12,10 +12,12 @@ {"query":"select * from url('http:\/\/127.0.0.2:8123\/?query=select%201%20format%20Null', CSV, 'a int')","status":"QueryFinish","tracestate":"another custom state","sorted_by_start_time":1} {"query":"select 1 format Null\n","status":"QueryFinish","tracestate":"another custom state","sorted_by_start_time":1} {"query":"select 1 format Null\n","status":"QueryFinish","tracestate":"another custom state","sorted_by_start_time":1} +{"query":"select 1 format Null\n","status":"QueryFinish","tracestate":"another custom state","sorted_by_start_time":1} +{"query":"select 1 format Null\n","query_status":"QueryFinish","tracestate":"another custom state","sorted_by_finish_time":1} {"query":"select 1 format Null\n","query_status":"QueryFinish","tracestate":"another custom state","sorted_by_finish_time":1} {"query":"select 1 format Null\n","query_status":"QueryFinish","tracestate":"another custom state","sorted_by_finish_time":1} {"query":"select * from url('http:\/\/127.0.0.2:8123\/?query=select%201%20format%20Null', CSV, 'a int')","query_status":"QueryFinish","tracestate":"another custom state","sorted_by_finish_time":1} -{"total spans":"3","unique spans":"3","unique non-zero parent spans":"3"} +{"total spans":"4","unique spans":"4","unique non-zero parent spans":"4"} {"initial query spans with proper parent":"1"} {"unique non-empty tracestate values":"1"} ===sampled=== diff --git a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh index b2b5ae89105..0dfec6097db 100755 --- a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh +++ b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh @@ -12,6 +12,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function check_log { ${CLICKHOUSE_CLIENT} --format=JSONEachRow -nq " +set allow_experimental_analyzer = 1; system flush logs; -- Show queries sorted by start time. @@ -55,7 +56,7 @@ select count(*) "'"'"initial query spans with proper parent"'"'" where trace_id = UUIDNumToString(toFixedString(unhex('$trace_id'), 16)) and operation_name = 'query' - and parent_span_id in ( + and parent_span_id in ( select span_id from system.opentelemetry_span_log where trace_id = UUIDNumToString(toFixedString(unhex('$trace_id'), 16)) and parent_span_id = reinterpretAsUInt64(unhex('73')) ) ; @@ -76,7 +77,7 @@ select uniqExact(value) "'"'"unique non-empty tracestate values"'"'" # Generate some random trace id so that the prevous runs of the test do not interfere. echo "===http===" -trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(reverse(reinterpretAsString(generateUUIDv4()))))") +trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(reverse(reinterpretAsString(generateUUIDv4())))) settings allow_experimental_analyzer = 1") # Check that the HTTP traceparent is read, and then passed through `remote` # table function. We expect 4 queries -- one initial, one SELECT and two @@ -86,7 +87,7 @@ ${CLICKHOUSE_CURL} \ --header "traceparent: 00-$trace_id-0000000000000073-01" \ --header "tracestate: some custom state" "$CLICKHOUSE_URL" \ --get \ - --data-urlencode "query=select 1 from remote('127.0.0.2', system, one) format Null" + --data-urlencode "query=select 1 from remote('127.0.0.2', system, one) settings allow_experimental_analyzer = 1 format Null" check_log diff --git a/tests/queries/0_stateless/01457_int256_hashing.sql b/tests/queries/0_stateless/01457_int256_hashing.sql index 861e567950a..510d25f6b8c 100644 --- a/tests/queries/0_stateless/01457_int256_hashing.sql +++ b/tests/queries/0_stateless/01457_int256_hashing.sql @@ -1,13 +1,15 @@ -- Tags: no-fasttest +SET joined_subquery_requires_alias = 0; + SELECT toUInt256(123) IN (NULL); SELECT toUInt256(123) AS k GROUP BY k; -SELECT toUInt256(123) AS k FROM system.one INNER JOIN (SELECT toUInt256(123) AS k) t USING k; +SELECT k FROM (SELECT toUInt256(123) AS k FROM system.one) INNER JOIN (SELECT toUInt256(123) AS k) t USING k; SELECT arrayEnumerateUniq([toUInt256(123), toUInt256(456), toUInt256(123)]); SELECT toInt256(123) IN (NULL); SELECT toInt256(123) AS k GROUP BY k; -SELECT toInt256(123) AS k FROM system.one INNER JOIN (SELECT toInt256(123) AS k) t USING k; +SELECT k FROM (SELECT toInt256(123) AS k FROM system.one) INNER JOIN (SELECT toInt256(123) AS k) t USING k; SELECT arrayEnumerateUniq([toInt256(123), toInt256(456), toInt256(123)]); -- SELECT toUInt128(123) IN (NULL); @@ -17,17 +19,17 @@ SELECT arrayEnumerateUniq([toInt256(123), toInt256(456), toInt256(123)]); SELECT toInt128(123) IN (NULL); SELECT toInt128(123) AS k GROUP BY k; -SELECT toInt128(123) AS k FROM system.one INNER JOIN (SELECT toInt128(123) AS k) t USING k; +SELECT k FROM (SELECT toInt128(123) AS k FROM system.one) INNER JOIN (SELECT toInt128(123) AS k) t USING k; SELECT arrayEnumerateUniq([toInt128(123), toInt128(456), toInt128(123)]); SELECT toNullable(toUInt256(321)) IN (NULL); SELECT toNullable(toUInt256(321)) AS k GROUP BY k; -SELECT toNullable(toUInt256(321)) AS k FROM system.one INNER JOIN (SELECT toUInt256(321) AS k) t USING k; +SELECT k FROM (SELECT toNullable(toUInt256(321)) AS k FROM system.one) INNER JOIN (SELECT toUInt256(321) AS k) t USING k; SELECT arrayEnumerateUniq([toNullable(toUInt256(321)), toNullable(toUInt256(456)), toNullable(toUInt256(321))]); SELECT toNullable(toInt256(321)) IN (NULL); SELECT toNullable(toInt256(321)) AS k GROUP BY k; -SELECT toNullable(toInt256(321)) AS k FROM system.one INNER JOIN (SELECT toInt256(321) AS k) t USING k; +SELECT k FROM (SELECT toNullable(toInt256(321)) AS k FROM system.one) INNER JOIN (SELECT toInt256(321) AS k) t USING k; SELECT arrayEnumerateUniq([toNullable(toInt256(321)), toNullable(toInt256(456)), toNullable(toInt256(321))]); -- SELECT toNullable(toUInt128(321)) IN (NULL); @@ -37,5 +39,5 @@ SELECT arrayEnumerateUniq([toNullable(toInt256(321)), toNullable(toInt256(456)), SELECT toNullable(toInt128(321)) IN (NULL); SELECT toNullable(toInt128(321)) AS k GROUP BY k; -SELECT toNullable(toInt128(321)) AS k FROM system.one INNER JOIN (SELECT toInt128(321) AS k) t USING k; +SELECT k FROM (SELECT toNullable(toInt128(321)) AS k FROM system.one) INNER JOIN (SELECT toInt128(321) AS k) t USING k; SELECT arrayEnumerateUniq([toNullable(toInt128(321)), toNullable(toInt128(456)), toNullable(toInt128(321))]); diff --git a/tests/queries/0_stateless/01470_columns_transformers.sql b/tests/queries/0_stateless/01470_columns_transformers.sql index 22c30ed36bf..8840ce3f3b5 100644 --- a/tests/queries/0_stateless/01470_columns_transformers.sql +++ b/tests/queries/0_stateless/01470_columns_transformers.sql @@ -1,5 +1,3 @@ -SET allow_experimental_analyzer = 1; - DROP TABLE IF EXISTS columns_transformers; CREATE TABLE columns_transformers (i Int64, j Int16, k Int64) Engine=TinyLog; @@ -19,15 +17,15 @@ SELECT a.* APPLY(toDate) EXCEPT(i, j) APPLY(any) from columns_transformers a; SELECT * EXCEPT STRICT i from columns_transformers; SELECT * EXCEPT STRICT (i, j) from columns_transformers; SELECT * EXCEPT STRICT i, j1 from columns_transformers; -- { serverError 47 } -SELECT * EXCEPT STRICT(i, j1) from columns_transformers; -- { serverError 36 } +SELECT * EXCEPT STRICT(i, j1) from columns_transformers; -- { serverError NO_SUCH_COLUMN_IN_TABLE , BAD_ARGUMENTS } SELECT * REPLACE STRICT i + 1 AS i from columns_transformers; -SELECT * REPLACE STRICT(i + 1 AS col) from columns_transformers; -- { serverError 36 } +SELECT * REPLACE STRICT(i + 1 AS col) from columns_transformers; -- { serverError NO_SUCH_COLUMN_IN_TABLE, BAD_ARGUMENTS } SELECT * REPLACE(i + 1 AS i) APPLY(sum) from columns_transformers; SELECT columns_transformers.* REPLACE(j + 2 AS j, i + 1 AS i) APPLY(avg) from columns_transformers; SELECT columns_transformers.* REPLACE(j + 1 AS j, j + 2 AS j) APPLY(avg) from columns_transformers; -- { serverError 43 } -- REPLACE after APPLY will not match anything SELECT a.* APPLY(toDate) REPLACE(i + 1 AS i) APPLY(any) from columns_transformers a; -SELECT a.* APPLY(toDate) REPLACE STRICT(i + 1 AS i) APPLY(any) from columns_transformers a; -- { serverError 36 } +SELECT a.* APPLY(toDate) REPLACE STRICT(i + 1 AS i) APPLY(any) from columns_transformers a; -- { serverError NO_SUCH_COLUMN_IN_TABLE, BAD_ARGUMENTS } EXPLAIN SYNTAX SELECT * APPLY(sum) from columns_transformers; EXPLAIN SYNTAX SELECT columns_transformers.* APPLY(avg) from columns_transformers; diff --git a/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.sql b/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.sql index e4e2e3dd76a..e8643a4468c 100644 --- a/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.sql +++ b/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.sql @@ -7,16 +7,16 @@ insert into test1 values ('2020-09-01 00:01:02', 1), ('2020-09-01 20:01:03', 2), set max_rows_to_read = 1; -- non-optimized -select count() from test1 settings max_parallel_replicas = 3; -- { serverError 158; } +select count() from test1 settings max_parallel_replicas = 3; -- { serverError 158 } -- optimized (toYear is monotonic and we provide the partition expr as is) select count() from test1 where toYear(toDate(p)) = 1999; -- non-optimized (toDate(DateTime) is always monotonic, but we cannot relaxing the predicates to do trivial count()) -select count() from test1 where p > toDateTime('2020-09-01 10:00:00'); -- { serverError 158; } +select count() from test1 where p > toDateTime('2020-09-01 10:00:00'); -- { serverError 158 } -- optimized (partition expr wrapped with non-monotonic functions) select count() FROM test1 where toDate(p) = '2020-09-01' and sipHash64(toString(toDate(p))) % 2 = 1; select count() FROM test1 where toDate(p) = '2020-09-01' and sipHash64(toString(toDate(p))) % 2 = 0; -- non-optimized (some predicate depends on non-partition_expr columns) -select count() FROM test1 where toDate(p) = '2020-09-01' and k = 2; -- { serverError 158; } +select count() FROM test1 where toDate(p) = '2020-09-01' and k = 2; -- { serverError 158 } -- optimized select count() from test1 where toDate(p) > '2020-09-01'; -- non-optimized @@ -35,10 +35,10 @@ select count() from test_tuple where i > 2; -- optimized select count() from test_tuple where i < 1; -- non-optimized -select count() from test_tuple array join [p,p] as c where toDate(p) = '2020-09-01'; -- { serverError 158; } +select count() from test_tuple array join [p,p] as c where toDate(p) = '2020-09-01'; -- { serverError 158 } select count() from test_tuple array join [1,2] as c where toDate(p) = '2020-09-01' settings max_rows_to_read = 4; -- non-optimized -select count() from test_tuple array join [1,2,3] as c where toDate(p) = '2020-09-01'; -- { serverError 158; } +select count() from test_tuple array join [1,2,3] as c where toDate(p) = '2020-09-01'; -- { serverError 158 } select count() from test_tuple array join [1,2,3] as c where toDate(p) = '2020-09-01' settings max_rows_to_read = 6; create table test_two_args(i int, j int, k int) engine MergeTree partition by i + j order by k settings index_granularity = 1; @@ -48,7 +48,7 @@ insert into test_two_args values (1, 2, 3), (2, 1, 3), (0, 3, 4); -- optimized select count() from test_two_args where i + j = 3; -- non-optimized -select count() from test_two_args where i = 1; -- { serverError 158; } +select count() from test_two_args where i = 1; -- { serverError 158 } drop table test1; drop table test_tuple; diff --git a/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.sql b/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.sql index 228e4d73167..3d57518d0f4 100644 --- a/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.sql +++ b/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.sql @@ -13,9 +13,9 @@ set max_memory_usage='500M'; set max_threads=1; set max_block_size=500; -select key, groupArray(repeat('a', 200)), count() from data_01513 group by key format Null settings optimize_aggregation_in_order=0; -- { serverError 241; } +select key, groupArray(repeat('a', 200)), count() from data_01513 group by key format Null settings optimize_aggregation_in_order=0; -- { serverError 241 } select key, groupArray(repeat('a', 200)), count() from data_01513 group by key format Null settings optimize_aggregation_in_order=1; -- for WITH TOTALS previous groups should be kept. -select key, groupArray(repeat('a', 200)), count() from data_01513 group by key with totals format Null settings optimize_aggregation_in_order=1; -- { serverError 241; } +select key, groupArray(repeat('a', 200)), count() from data_01513 group by key with totals format Null settings optimize_aggregation_in_order=1; -- { serverError 241 } drop table data_01513; diff --git a/tests/queries/0_stateless/01516_create_table_primary_key.sql b/tests/queries/0_stateless/01516_create_table_primary_key.sql index b2b9f288eab..630c573c2cc 100644 --- a/tests/queries/0_stateless/01516_create_table_primary_key.sql +++ b/tests/queries/0_stateless/01516_create_table_primary_key.sql @@ -35,7 +35,7 @@ ATTACH TABLE primary_key_test(v1 Int32, v2 Int32) ENGINE=ReplacingMergeTree ORDE SELECT * FROM primary_key_test FINAL; DROP TABLE primary_key_test; -CREATE TABLE primary_key_test(v1 Int64, v2 Int32, v3 String, PRIMARY KEY(v1, gcd(v1, v2))) ENGINE=ReplacingMergeTree ORDER BY v1; -- { serverError 36; } +CREATE TABLE primary_key_test(v1 Int64, v2 Int32, v3 String, PRIMARY KEY(v1, gcd(v1, v2))) ENGINE=ReplacingMergeTree ORDER BY v1; -- { serverError 36 } CREATE TABLE primary_key_test(v1 Int64, v2 Int32, v3 String, PRIMARY KEY(v1, gcd(v1, v2))) ENGINE=ReplacingMergeTree ORDER BY (v1, gcd(v1, v2)); diff --git a/tests/queries/0_stateless/01528_allow_nondeterministic_optimize_skip_unused_shards.sql b/tests/queries/0_stateless/01528_allow_nondeterministic_optimize_skip_unused_shards.sql index 08fba7480d1..ac04178e585 100644 --- a/tests/queries/0_stateless/01528_allow_nondeterministic_optimize_skip_unused_shards.sql +++ b/tests/queries/0_stateless/01528_allow_nondeterministic_optimize_skip_unused_shards.sql @@ -5,7 +5,7 @@ create table dist_01528 as system.one engine=Distributed('test_cluster_two_shard set optimize_skip_unused_shards=1; set force_optimize_skip_unused_shards=1; -select * from dist_01528 where dummy = 2; -- { serverError 507; } +select * from dist_01528 where dummy = 2; -- { serverError 507 } select * from dist_01528 where dummy = 2 settings allow_nondeterministic_optimize_skip_unused_shards=1; drop table dist_01528; diff --git a/tests/queries/0_stateless/01530_drop_database_atomic_sync.sql b/tests/queries/0_stateless/01530_drop_database_atomic_sync.sql index 7a2e64742cf..13b4a4e331b 100644 --- a/tests/queries/0_stateless/01530_drop_database_atomic_sync.sql +++ b/tests/queries/0_stateless/01530_drop_database_atomic_sync.sql @@ -30,7 +30,7 @@ create table db_01530_atomic.data (key Int) Engine=ReplicatedMergeTree('/clickho drop database db_01530_atomic; create database db_01530_atomic Engine=Atomic; -create table db_01530_atomic.data (key Int) Engine=ReplicatedMergeTree('/clickhouse/tables/{database}/db_01530_atomic/data', 'test') order by key; -- { serverError 253; } +create table db_01530_atomic.data (key Int) Engine=ReplicatedMergeTree('/clickhouse/tables/{database}/db_01530_atomic/data', 'test') order by key; -- { serverError 253 } set database_atomic_wait_for_drop_and_detach_synchronously=1; diff --git a/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql b/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql index fea75e1439f..f19c77c68a3 100644 --- a/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql +++ b/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql @@ -17,7 +17,7 @@ create table dist_01555 (key Int) Engine=Distributed(test_cluster_with_incorrect insert into dist_01555 values (1)(2); -- since test_cluster_with_incorrect_pw contains incorrect password ignore error -system flush distributed dist_01555; -- { serverError 516; } +system flush distributed dist_01555; -- { serverError 516 } select length(splitByChar('*', data_path)), replaceRegexpOne(data_path, '^.*/([^/]*)/' , '\\1'), extract(last_exception, 'AUTHENTICATION_FAILED'), dateDiff('s', last_exception_time, now()) < 5 from system.distribution_queue where database = currentDatabase() and table = 'dist_01555' format CSV; drop table dist_01555; diff --git a/tests/queries/0_stateless/01556_explain_select_with_union_query.reference b/tests/queries/0_stateless/01556_explain_select_with_union_query.reference index c18e6b70b0d..27cf4c129b1 100644 --- a/tests/queries/0_stateless/01556_explain_select_with_union_query.reference +++ b/tests/queries/0_stateless/01556_explain_select_with_union_query.reference @@ -1,180 +1,180 @@ Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Distinct Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Distinct Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Distinct Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Distinct Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Distinct Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Distinct Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Distinct Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Distinct Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Distinct Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Distinct Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Distinct Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Distinct Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Distinct Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) diff --git a/tests/queries/0_stateless/01556_explain_select_with_union_query.sql b/tests/queries/0_stateless/01556_explain_select_with_union_query.sql index dcd9bbe7347..bbd96ef5c69 100644 --- a/tests/queries/0_stateless/01556_explain_select_with_union_query.sql +++ b/tests/queries/0_stateless/01556_explain_select_with_union_query.sql @@ -1,3 +1,4 @@ +SET allow_experimental_analyzer = 1; SET union_default_mode = 'DISTINCT'; set enable_global_with_statement = 1; diff --git a/tests/queries/0_stateless/01561_clickhouse_client_stage.reference b/tests/queries/0_stateless/01561_clickhouse_client_stage.reference index 44c39f2a444..00e0f4ddb2e 100644 --- a/tests/queries/0_stateless/01561_clickhouse_client_stage.reference +++ b/tests/queries/0_stateless/01561_clickhouse_client_stage.reference @@ -1,15 +1,15 @@ -execute: default +execute: --allow_experimental_analyzer=1 "foo" 1 -execute: --stage fetch_columns -"dummy" +execute: --allow_experimental_analyzer=1 --stage fetch_columns +"system.one.dummy_0" 0 -execute: --stage with_mergeable_state -"1" +execute: --allow_experimental_analyzer=1 --stage with_mergeable_state +"1_UInt8" 1 -execute: --stage with_mergeable_state_after_aggregation -"1" +execute: --allow_experimental_analyzer=1 --stage with_mergeable_state_after_aggregation +"1_UInt8" 1 -execute: --stage complete +execute: --allow_experimental_analyzer=1 --stage complete "foo" 1 diff --git a/tests/queries/0_stateless/01561_clickhouse_client_stage.sh b/tests/queries/0_stateless/01561_clickhouse_client_stage.sh index 72161333812..99267458421 100755 --- a/tests/queries/0_stateless/01561_clickhouse_client_stage.sh +++ b/tests/queries/0_stateless/01561_clickhouse_client_stage.sh @@ -5,6 +5,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +opts=( + "--allow_experimental_analyzer=1" +) + function execute_query() { if [ $# -eq 0 ]; then @@ -15,8 +19,8 @@ function execute_query() ${CLICKHOUSE_CLIENT} "$@" --format CSVWithNames -q "SELECT 1 AS foo" } -execute_query # default -- complete -execute_query --stage fetch_columns -execute_query --stage with_mergeable_state -execute_query --stage with_mergeable_state_after_aggregation -execute_query --stage complete +execute_query "${opts[@]}" # default -- complete +execute_query "${opts[@]}" --stage fetch_columns +execute_query "${opts[@]}" --stage with_mergeable_state +execute_query "${opts[@]}" --stage with_mergeable_state_after_aggregation +execute_query "${opts[@]}" --stage complete diff --git a/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference b/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference index 8c8bb73b801..49b4d22ea71 100644 --- a/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference +++ b/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference @@ -4,10 +4,10 @@ SELECT FROM test_order_by ORDER BY timestamp ASC LIMIT 10 -Expression (Projection) +Expression (Project names) Limit (preliminary LIMIT (without OFFSET)) Sorting (Sorting for ORDER BY) - Expression (Before ORDER BY) + Expression ((Before ORDER BY + (Projection + Change column names to column identifiers))) ReadFromMergeTree (default.test_order_by) SELECT timestamp, @@ -15,10 +15,10 @@ SELECT FROM test_order_by ORDER BY toDate(timestamp) ASC LIMIT 10 -Expression (Projection) +Expression (Project names) Limit (preliminary LIMIT (without OFFSET)) Sorting (Sorting for ORDER BY) - Expression (Before ORDER BY) + Expression ((Before ORDER BY + (Projection + Change column names to column identifiers))) ReadFromMergeTree (default.test_order_by) SELECT timestamp, @@ -28,10 +28,10 @@ ORDER BY toDate(timestamp) ASC, timestamp ASC LIMIT 10 -Expression (Projection) +Expression (Project names) Limit (preliminary LIMIT (without OFFSET)) Sorting (Sorting for ORDER BY) - Expression (Before ORDER BY) + Expression ((Before ORDER BY + (Projection + Change column names to column identifiers))) ReadFromMergeTree (default.test_order_by) SELECT timestamp, diff --git a/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.sql b/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.sql index 15ddb5a848f..2fe24d1662d 100644 --- a/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.sql +++ b/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.sql @@ -1,3 +1,4 @@ +SET allow_experimental_analyzer = 1; SET optimize_monotonous_functions_in_order_by = 1; SET optimize_read_in_order = 1; diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference index aaa88d66ca0..b981a46b4fd 100644 --- a/tests/queries/0_stateless/01591_window_functions.reference +++ b/tests/queries/0_stateless/01591_window_functions.reference @@ -915,12 +915,12 @@ from (select number, intDiv(number, 3) p, mod(number, 5) o from numbers(16)) t ; -Expression ((Projection + Before ORDER BY)) +Expression ((Project names + Projection)) Window (Window step for window \'\') - Window (Window step for window \'PARTITION BY p\') - Window (Window step for window \'PARTITION BY p ORDER BY o ASC\') - Sorting (Sorting for window \'PARTITION BY p ORDER BY o ASC\') - Expression ((Before window functions + (Projection + Before ORDER BY))) + Window (Window step for window \'PARTITION BY t.p_0\') + Window (Window step for window \'PARTITION BY t.p_0 ORDER BY t.o_1 ASC\') + Sorting (Sorting for window \'PARTITION BY t.p_0 ORDER BY t.o_1 ASC\') + Expression ((Before WINDOW + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))) ReadFromStorage (SystemNumbers) explain select count(*) over (order by o, number), @@ -929,13 +929,13 @@ from (select number, intDiv(number, 3) p, mod(number, 5) o from numbers(16)) t ; -Expression ((Projection + Before ORDER BY)) - Window (Window step for window \'ORDER BY o ASC, number ASC\') - Sorting (Sorting for window \'ORDER BY o ASC, number ASC\') - Window (Window step for window \'ORDER BY number ASC\') - Expression ((Before window functions + (Projection + Before ORDER BY)) [lifted up part]) - Sorting (Sorting for window \'ORDER BY number ASC\') - Expression ((Before window functions + (Projection + Before ORDER BY))) +Expression ((Project names + Projection)) + Window (Window step for window \'ORDER BY t.o_0 ASC, t.number_1 ASC\') + Sorting (Sorting for window \'ORDER BY t.o_0 ASC, t.number_1 ASC\') + Window (Window step for window \'ORDER BY t.number_1 ASC\') + Expression ((Before WINDOW + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))) [lifted up part]) + Sorting (Sorting for window \'ORDER BY t.number_1 ASC\') + Expression ((Before WINDOW + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))) ReadFromStorage (SystemNumbers) -- A test case for the sort comparator found by fuzzer. SELECT diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql index 3f4a028eac2..3c9c1f9cea7 100644 --- a/tests/queries/0_stateless/01591_window_functions.sql +++ b/tests/queries/0_stateless/01591_window_functions.sql @@ -1,5 +1,7 @@ -- Tags: long +SET allow_experimental_analyzer = 1; + -- { echo } -- just something basic diff --git a/tests/queries/0_stateless/01592_toUnixTimestamp_Date.sql b/tests/queries/0_stateless/01592_toUnixTimestamp_Date.sql index 5dc87e31f75..e8411484d71 100644 --- a/tests/queries/0_stateless/01592_toUnixTimestamp_Date.sql +++ b/tests/queries/0_stateless/01592_toUnixTimestamp_Date.sql @@ -1 +1 @@ -select toUnixTimestamp(today()); -- { serverError 44; } +select toUnixTimestamp(today()); -- { serverError 44 } diff --git a/tests/queries/0_stateless/01595_countMatches.sql b/tests/queries/0_stateless/01595_countMatches.sql index 6374fe7bc5b..0b170945d44 100644 --- a/tests/queries/0_stateless/01595_countMatches.sql +++ b/tests/queries/0_stateless/01595_countMatches.sql @@ -25,5 +25,5 @@ select countMatchesCaseInsensitive('foo.com BAR.COM baz.com bam.com', '([^. ]+)\ select countMatchesCaseInsensitive('foo.com@foo.com bar.com@foo.com BAZ.com@foo.com bam.com@foo.com', '([^. ]+)\.([^. ]+)@([^. ]+)\.([^. ]+)'); select 'errors'; -select countMatches(1, 'foo') from numbers(1); -- { serverError 43; } -select countMatches('foobarfoo', toString(number)) from numbers(1); -- { serverError 44; } +select countMatches(1, 'foo') from numbers(1); -- { serverError 43 } +select countMatches('foobarfoo', toString(number)) from numbers(1); -- { serverError 44 } diff --git a/tests/queries/0_stateless/01596_full_join_chertus.sql b/tests/queries/0_stateless/01596_full_join_chertus.sql index 162b9f7be8f..32911abb792 100644 --- a/tests/queries/0_stateless/01596_full_join_chertus.sql +++ b/tests/queries/0_stateless/01596_full_join_chertus.sql @@ -1,9 +1,9 @@ select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) from (select number k, toLowCardinality(toString(number)) s from numbers(2)) as js1 full join (select toLowCardinality(number+1) k, toString(number+1) s from numbers(2)) as js2 -using k order by js1.k, js2.k; +ON js1.k = js2.k order by js1.k, js2.k; select toTypeName(js1.k), toTypeName(js2.k), toTypeName(js1.s), toTypeName(js2.s) from (select number k, toLowCardinality(toString(number)) s from numbers(2)) as js1 full join (select toLowCardinality(number+1) k, toString(number+1) s from numbers(2)) as js2 -using k order by js1.k, js2.k; +ON js1.k = js2.k order by js1.k, js2.k; diff --git a/tests/queries/0_stateless/01709_inactive_parts_to_throw_insert.sql b/tests/queries/0_stateless/01709_inactive_parts_to_throw_insert.sql index 6de0d4f4e0c..2bb92aec713 100644 --- a/tests/queries/0_stateless/01709_inactive_parts_to_throw_insert.sql +++ b/tests/queries/0_stateless/01709_inactive_parts_to_throw_insert.sql @@ -7,6 +7,6 @@ insert into data_01709 values (2); optimize table data_01709 final; -insert into data_01709 values (3); -- { serverError 252; } +insert into data_01709 values (3); -- { serverError 252 } drop table data_01709; diff --git a/tests/queries/0_stateless/01710_projection_with_mixed_pipeline.sql b/tests/queries/0_stateless/01710_projection_with_mixed_pipeline.sql index 734aa659146..5169c667b81 100644 --- a/tests/queries/0_stateless/01710_projection_with_mixed_pipeline.sql +++ b/tests/queries/0_stateless/01710_projection_with_mixed_pipeline.sql @@ -4,6 +4,6 @@ create table t (x UInt32) engine = MergeTree order by tuple() settings index_gra insert into t select number from numbers(100); alter table t add projection p (select uniqHLL12(x)); insert into t select number + 100 from numbers(100); -select uniqHLL12(x) from t settings allow_experimental_projection_optimization = 1, max_bytes_to_read=400, max_block_size=8; -- { serverError 307; } +select uniqHLL12(x) from t settings allow_experimental_projection_optimization = 1, max_bytes_to_read=400, max_block_size=8; -- { serverError 307 } drop table if exists t; diff --git a/tests/queries/0_stateless/01823_explain_json.reference b/tests/queries/0_stateless/01823_explain_json.reference index 9df7c16e4f4..befbf82f4fb 100644 --- a/tests/queries/0_stateless/01823_explain_json.reference +++ b/tests/queries/0_stateless/01823_explain_json.reference @@ -37,63 +37,59 @@ "Node Type": "Aggregating", "Header": [ { - "Name": "number", + "Name": "number_0", "Type": "UInt64" }, { - "Name": "plus(number, 1)", - "Type": "UInt64" - }, - { - "Name": "quantile(0.2)(number)", + "Name": "quantile(0.2_Float64)(number_0)", "Type": "Float64" }, { - "Name": "sumIf(number, greater(number, 0))", + "Name": "sumIf(number_0, greater(number_0, 0_UInt8))", "Type": "UInt64" } ], - "Keys": ["number", "plus(number, 1)"], + "Keys": ["number_0"], "Aggregates": [ { - "Name": "quantile(0.2)(number)", + "Name": "quantile(0.2_Float64)(number_0)", "Function": { "Name": "quantile", "Parameters": ["0.2"], "Argument Types": ["UInt64"], "Result Type": "Float64" }, - "Arguments": ["number"] + "Arguments": ["number_0"] }, { - "Name": "sumIf(number, greater(number, 0))", + "Name": "sumIf(number_0, greater(number_0, 0_UInt8))", "Function": { "Name": "sumIf", "Argument Types": ["UInt64", "UInt8"], "Result Type": "UInt64" }, - "Arguments": ["number", "greater(number, 0)"] + "Arguments": ["number_0", "greater(number_0, 0_UInt8)"] } ], -------- "Node Type": "ArrayJoin", "Left": false, - "Columns": ["x", "y"], + "Columns": ["x_0", "y_1"], -------- "Node Type": "Distinct", - "Columns": ["intDiv(number, 3)", "intDiv(number, 2)"], + "Columns": ["intDiv(number_0, 2_UInt8)", "intDiv(number_0, 3_UInt8)"], -- "Node Type": "Distinct", - "Columns": ["intDiv(number, 3)", "intDiv(number, 2)"], + "Columns": ["intDiv(number_0, 2_UInt8)", "intDiv(number_0, 3_UInt8)"], -------- "Sort Description": [ { - "Column": "number", + "Column": "number_0", "Ascending": false, "With Fill": false }, { - "Column": "plus(number, 1)", + "Column": "plus(number_0, 1_UInt8)", "Ascending": true, "With Fill": false } diff --git a/tests/queries/0_stateless/01823_explain_json.sh b/tests/queries/0_stateless/01823_explain_json.sh index 7868bc0cc78..39128773069 100755 --- a/tests/queries/0_stateless/01823_explain_json.sh +++ b/tests/queries/0_stateless/01823_explain_json.sh @@ -5,26 +5,29 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -q "EXPLAIN json = 1, description = 0 SELECT 1 UNION ALL SELECT 2 FORMAT TSVRaw" +opts=( + "--allow_experimental_analyzer=1" +) +$CLICKHOUSE_CLIENT "${opts[@]}" -q "EXPLAIN json = 1, description = 0 SELECT 1 UNION ALL SELECT 2 FORMAT TSVRaw" echo "--------" -$CLICKHOUSE_CLIENT -q "explain json = 1, description = 0, header = 1 select 1, 2 + dummy FORMAT TSVRaw" 2> /dev/null | grep Header -m 1 -A 8 +$CLICKHOUSE_CLIENT "${opts[@]}" -q "explain json = 1, description = 0, header = 1 select 1, 2 + dummy FORMAT TSVRaw" 2> /dev/null | grep Header -m 1 -A 8 echo "--------" -$CLICKHOUSE_CLIENT -q "EXPLAIN json = 1, actions = 1, header = 1, description = 0 +$CLICKHOUSE_CLIENT "${opts[@]}" -q "EXPLAIN json = 1, actions = 1, header = 1, description = 0 SELECT quantile(0.2)(number), sumIf(number, number > 0) from numbers(2) group by number, number + 1 FORMAT TSVRaw - " | grep Aggregating -A 40 + " | grep Aggregating -A 36 echo "--------" -$CLICKHOUSE_CLIENT -q "EXPLAIN json = 1, actions = 1, description = 0 +$CLICKHOUSE_CLIENT "${opts[@]}" -q "EXPLAIN json = 1, actions = 1, description = 0 SELECT x, y from numbers(2) array join [number, 1] as x, [number + 1] as y FORMAT TSVRaw " | grep ArrayJoin -A 2 echo "--------" -$CLICKHOUSE_CLIENT -q "EXPLAIN json = 1, actions = 1, description = 0 +$CLICKHOUSE_CLIENT "${opts[@]}" -q "EXPLAIN json = 1, actions = 1, description = 0 SELECT distinct intDiv(number, 2), intDiv(number, 3) from numbers(10) FORMAT TSVRaw " | grep Distinct -A 1 echo "--------" -$CLICKHOUSE_CLIENT -q "EXPLAIN json = 1, actions = 1, description = 0 +$CLICKHOUSE_CLIENT "${opts[@]}" -q "EXPLAIN json = 1, actions = 1, description = 0 SELECT number + 1 from numbers(10) order by number desc, number + 1 limit 3 FORMAT TSVRaw " | grep "Sort Description" -A 12 diff --git a/tests/queries/0_stateless/01883_with_grouping_sets.reference b/tests/queries/0_stateless/01883_with_grouping_sets.reference index 8fae10a05a4..499e930541f 100644 --- a/tests/queries/0_stateless/01883_with_grouping_sets.reference +++ b/tests/queries/0_stateless/01883_with_grouping_sets.reference @@ -13,8 +13,7 @@ ExpressionTransform Copy 1 → 2 (Expression) ExpressionTransform - (ReadFromStorage) - Memory 0 → 1 + (ReadFromMemoryStorage) 1 0 1 4500 1 0 3 4700 1 0 5 4900 diff --git a/tests/queries/0_stateless/01888_read_int_safe.sql b/tests/queries/0_stateless/01888_read_int_safe.sql index 3aea8e38ab0..197338775c4 100644 --- a/tests/queries/0_stateless/01888_read_int_safe.sql +++ b/tests/queries/0_stateless/01888_read_int_safe.sql @@ -1,10 +1,10 @@ -select toInt64('--1'); -- { serverError 72; } -select toInt64('+-1'); -- { serverError 72; } -select toInt64('++1'); -- { serverError 72; } -select toInt64('++'); -- { serverError 72; } -select toInt64('+'); -- { serverError 72; } -select toInt64('1+1'); -- { serverError 6; } -select toInt64('1-1'); -- { serverError 6; } -select toInt64(''); -- { serverError 32; } +select toInt64('--1'); -- { serverError 72 } +select toInt64('+-1'); -- { serverError 72 } +select toInt64('++1'); -- { serverError 72 } +select toInt64('++'); -- { serverError 72 } +select toInt64('+'); -- { serverError 72 } +select toInt64('1+1'); -- { serverError 6 } +select toInt64('1-1'); -- { serverError 6 } +select toInt64(''); -- { serverError 32 } select toInt64('1'); select toInt64('-1'); diff --git a/tests/queries/0_stateless/01911_logical_error_minus.sql b/tests/queries/0_stateless/01911_logical_error_minus.sql index 3dcdedd38f5..7f371a463f8 100644 --- a/tests/queries/0_stateless/01911_logical_error_minus.sql +++ b/tests/queries/0_stateless/01911_logical_error_minus.sql @@ -26,7 +26,7 @@ INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32 INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32) SELECT number AS n, 'sin(n*n*n)*n', sin(n * n * n * n* n) AS v, v, v, v FROM system.numbers LIMIT 301, 100; -SELECT IF(-2, NULL, 0.00009999999747378752), IF(1048577, 1048576, NULL), c1.key, IF(1, NULL, NULL), c2.key FROM codecTest AS c1 , codecTest AS c2 WHERE ignore(IF(257, -2, NULL), arrayJoin([65537]), IF(3, 1024, 9223372036854775807)) AND IF(NULL, 256, NULL) AND (IF(NULL, '1048576', NULL) = (c1.key - NULL)) LIMIT 65535; +SELECT IF(2, NULL, 0.00009999999747378752), IF(104, 1048576, NULL), c1.key, IF(1, NULL, NULL), c2.key FROM codecTest AS c1 , codecTest AS c2 WHERE ignore(IF(255, -2, NULL), arrayJoin([65537]), IF(3, 1024, 9223372036854775807)) AND IF(NULL, 256, NULL) AND (IF(NULL, '1048576', NULL) = (c1.key - NULL)) LIMIT 65535; SELECT c1.key, c1.name, c1.ref_valueF64, c1.valueF64, c1.ref_valueF64 - c1.valueF64 AS dF64, '', c2.key, c2.ref_valueF64 FROM codecTest AS c1 , codecTest AS c2 WHERE (dF64 != 3) AND c1.valueF64 != 0 AND (c2.key = (c1.key - 1048576)) LIMIT 0; @@ -72,7 +72,7 @@ INSERT INTO codecTest (key, ref_valueU64, valueU64, ref_valueU32, valueU32, ref_ SELECT number as n, n + (rand64() - 9223372036854775807)/1000 as v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, toDateTime(v), toDateTime(v), toDate(v), toDate(v) FROM system.numbers LIMIT 3001, 1000; -SELECT IF(-2, NULL, 0.00009999999747378752), IF(1048577, 1048576, NULL), c1.key, IF(1, NULL, NULL), c2.key FROM codecTest AS c1 , codecTest AS c2 WHERE ignore(IF(257, -2, NULL), arrayJoin([65537]), IF(3, 1024, 9223372036854775807)) AND IF(NULL, 256, NULL) AND (IF(NULL, '1048576', NULL) = (c1.key - NULL)) LIMIT 65535; +SELECT IF(2, NULL, 0.00009999999747378752), IF(104, 1048576, NULL), c1.key, IF(1, NULL, NULL), c2.key FROM codecTest AS c1 , codecTest AS c2 WHERE ignore(IF(255, -2, NULL), arrayJoin([65537]), IF(3, 1024, 9223372036854775807)) AND IF(NULL, 256, NULL) AND (IF(NULL, '1048576', NULL) = (c1.key - NULL)) LIMIT 65535; DROP TABLE codecTest; diff --git a/tests/queries/0_stateless/01913_names_of_tuple_literal.sql b/tests/queries/0_stateless/01913_names_of_tuple_literal.sql index 09de9e8cf37..879f4c91587 100644 --- a/tests/queries/0_stateless/01913_names_of_tuple_literal.sql +++ b/tests/queries/0_stateless/01913_names_of_tuple_literal.sql @@ -1,2 +1,4 @@ +SET allow_experimental_analyzer = 0; + SELECT ((1, 2), (2, 3), (3, 4)) FORMAT TSVWithNames; SELECT ((1, 2), (2, 3), (3, 4)) FORMAT TSVWithNames SETTINGS legacy_column_name_of_tuple_literal = 1; diff --git a/tests/queries/0_stateless/02008_tuple_to_name_value_pairs.sql b/tests/queries/0_stateless/02008_tuple_to_name_value_pairs.sql index 59987a86590..1f6026bb61e 100644 --- a/tests/queries/0_stateless/02008_tuple_to_name_value_pairs.sql +++ b/tests/queries/0_stateless/02008_tuple_to_name_value_pairs.sql @@ -19,7 +19,7 @@ INSERT INTO test02008 VALUES (tuple(3.3, 5.5, 6.6)); SELECT untuple(arrayJoin(tupleToNameValuePairs(col))) from test02008; DROP TABLE IF EXISTS test02008; -SELECT tupleToNameValuePairs(tuple(1, 1.3)); -- { serverError 43; } -SELECT tupleToNameValuePairs(tuple(1, [1,2])); -- { serverError 43; } -SELECT tupleToNameValuePairs(tuple(1, 'a')); -- { serverError 43; } -SELECT tupleToNameValuePairs(33); -- { serverError 43; } +SELECT tupleToNameValuePairs(tuple(1, 1.3)); -- { serverError 43 } +SELECT tupleToNameValuePairs(tuple(1, [1,2])); -- { serverError 43 } +SELECT tupleToNameValuePairs(tuple(1, 'a')); -- { serverError 43 } +SELECT tupleToNameValuePairs(33); -- { serverError 43 } diff --git a/tests/queries/0_stateless/02048_clickhouse_local_stage.reference b/tests/queries/0_stateless/02048_clickhouse_local_stage.reference index 44c39f2a444..00e0f4ddb2e 100644 --- a/tests/queries/0_stateless/02048_clickhouse_local_stage.reference +++ b/tests/queries/0_stateless/02048_clickhouse_local_stage.reference @@ -1,15 +1,15 @@ -execute: default +execute: --allow_experimental_analyzer=1 "foo" 1 -execute: --stage fetch_columns -"dummy" +execute: --allow_experimental_analyzer=1 --stage fetch_columns +"system.one.dummy_0" 0 -execute: --stage with_mergeable_state -"1" +execute: --allow_experimental_analyzer=1 --stage with_mergeable_state +"1_UInt8" 1 -execute: --stage with_mergeable_state_after_aggregation -"1" +execute: --allow_experimental_analyzer=1 --stage with_mergeable_state_after_aggregation +"1_UInt8" 1 -execute: --stage complete +execute: --allow_experimental_analyzer=1 --stage complete "foo" 1 diff --git a/tests/queries/0_stateless/02048_clickhouse_local_stage.sh b/tests/queries/0_stateless/02048_clickhouse_local_stage.sh index 5c1303b5160..182acc23a13 100755 --- a/tests/queries/0_stateless/02048_clickhouse_local_stage.sh +++ b/tests/queries/0_stateless/02048_clickhouse_local_stage.sh @@ -5,6 +5,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +opts=( + "--allow_experimental_analyzer=1" +) + function execute_query() { if [ $# -eq 0 ]; then @@ -15,8 +19,8 @@ function execute_query() ${CLICKHOUSE_LOCAL} "$@" --format CSVWithNames -q "SELECT 1 AS foo" } -execute_query # default -- complete -execute_query --stage fetch_columns -execute_query --stage with_mergeable_state -execute_query --stage with_mergeable_state_after_aggregation -execute_query --stage complete +execute_query "${opts[@]}" # default -- complete +execute_query "${opts[@]}" --stage fetch_columns +execute_query "${opts[@]}" --stage with_mergeable_state +execute_query "${opts[@]}" --stage with_mergeable_state_after_aggregation +execute_query "${opts[@]}" --stage complete diff --git a/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.sql b/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.sql index 4aad7ae3694..822ffb19764 100644 --- a/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.sql +++ b/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.sql @@ -6,9 +6,9 @@ insert into test values (0); select if(0, y, 42) from test; select if(1, 42, y) from test; select if(toUInt8(0), y, 42) from test; -select if(toInt8(0), y, 42) from test; +select if(toUInt8(0), y, 42) from test; +select if(toUInt8(1), 42, y) from test; select if(toUInt8(1), 42, y) from test; -select if(toInt8(1), 42, y) from test; select if(toUInt8(toUInt8(0)), y, 42) from test; select if(cast(cast(0, 'UInt8'), 'UInt8'), y, 42) from test; explain syntax select x, if((select hasColumnInTable(currentDatabase(), 'test', 'y')), y, x || '_') from test; diff --git a/tests/queries/0_stateless/02125_query_views_log_window_function.sql b/tests/queries/0_stateless/02125_query_views_log_window_function.sql index 1de2cc95b14..fff1e943c58 100644 --- a/tests/queries/0_stateless/02125_query_views_log_window_function.sql +++ b/tests/queries/0_stateless/02125_query_views_log_window_function.sql @@ -1,4 +1,6 @@ +set allow_experimental_analyzer = 0; set allow_experimental_window_view = 1; + CREATE TABLE data ( `id` UInt64, `timestamp` DateTime) ENGINE = Memory; CREATE WINDOW VIEW wv Engine Memory as select count(id), tumbleStart(w_id) as window_start from data group by tumble(timestamp, INTERVAL '10' SECOND) as w_id; diff --git a/tests/queries/0_stateless/02153_native_bounds_check.reference b/tests/queries/0_stateless/02153_native_bounds_check.reference deleted file mode 100644 index d00491fd7e5..00000000000 --- a/tests/queries/0_stateless/02153_native_bounds_check.reference +++ /dev/null @@ -1 +0,0 @@ -1 diff --git a/tests/queries/0_stateless/02153_native_bounds_check.sh b/tests/queries/0_stateless/02153_native_bounds_check.sh deleted file mode 100755 index a3475ddacae..00000000000 --- a/tests/queries/0_stateless/02153_native_bounds_check.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -# Should correctly handle error. - -${CLICKHOUSE_LOCAL} --query "SELECT toString(number) AS a, toString(number) AS a FROM numbers(10)" --output-format Native | - ${CLICKHOUSE_LOCAL} --query "SELECT * FROM table" --input-format Native --structure 'a LowCardinality(String)' 2>&1 | - grep -c -F Exception diff --git a/tests/queries/0_stateless/02160_untuple_exponential_growth.sh b/tests/queries/0_stateless/02160_untuple_exponential_growth.sh index 9ec6594af69..2bc8f74a524 100755 --- a/tests/queries/0_stateless/02160_untuple_exponential_growth.sh +++ b/tests/queries/0_stateless/02160_untuple_exponential_growth.sh @@ -7,5 +7,5 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # Should finish in reasonable time (milliseconds). # In previous versions this query led to exponential complexity of query analysis. -${CLICKHOUSE_LOCAL} --query "SELECT untuple(tuple(untuple((1, untuple((untuple(tuple(untuple(tuple(untuple((untuple((1, 1, 1, 1)), 1, 1, 1)))))), 1, 1))))))" 2>&1 | grep -cF 'TOO_BIG_AST' -${CLICKHOUSE_LOCAL} --query "SELECT untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple((1, 1, 1, 1, 1))))))))))))))))))))))))))" 2>&1 | grep -cF 'TOO_BIG_AST' +${CLICKHOUSE_LOCAL} --query "SELECT untuple(tuple(untuple((1, untuple((untuple(tuple(untuple(tuple(untuple((untuple((1, 1, 1, 1)), 1, 1, 1)))))), 1, 1))))))" 2>&1 | grep -cF 'too big' +${CLICKHOUSE_LOCAL} --query "SELECT untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple((1, 1, 1, 1, 1))))))))))))))))))))))))))" 2>&1 | grep -cF 'too big' diff --git a/tests/queries/0_stateless/02174_cte_scalar_cache.reference b/tests/queries/0_stateless/02174_cte_scalar_cache.reference index 817116eda88..1acbef35325 100644 --- a/tests/queries/0_stateless/02174_cte_scalar_cache.reference +++ b/tests/queries/0_stateless/02174_cte_scalar_cache.reference @@ -1,3 +1,3 @@ -02177_CTE_GLOBAL_ON 5 500 11 0 5 -02177_CTE_GLOBAL_OFF 1 100 5 0 1 -02177_CTE_NEW_ANALYZER 2 200 3 0 2 +02177_CTE_GLOBAL_ON 1 100 4 0 1 +02177_CTE_GLOBAL_OFF 1 100 4 0 1 +02177_CTE_NEW_ANALYZER 1 100 4 0 1 diff --git a/tests/queries/0_stateless/02174_cte_scalar_cache.sql b/tests/queries/0_stateless/02174_cte_scalar_cache.sql index 9ed80d08cff..50a10834e64 100644 --- a/tests/queries/0_stateless/02174_cte_scalar_cache.sql +++ b/tests/queries/0_stateless/02174_cte_scalar_cache.sql @@ -1,3 +1,5 @@ +SET allow_experimental_analyzer = 1; + WITH ( SELECT sleep(0.0001) FROM system.one ) as a1, ( SELECT sleep(0.0001) FROM system.one ) as a2, diff --git a/tests/queries/0_stateless/02184_hash_functions_and_ip_types.reference b/tests/queries/0_stateless/02184_hash_functions_and_ip_types.reference index 07705827428..b305806cd08 100644 --- a/tests/queries/0_stateless/02184_hash_functions_and_ip_types.reference +++ b/tests/queries/0_stateless/02184_hash_functions_and_ip_types.reference @@ -1,54 +1,54 @@ Row 1: ────── -ipv4: 1.2.3.4 -halfMD5(toIPv4('1.2.3.4')): 14356538739656272800 -farmFingerprint64(toIPv4('1.2.3.4')): 5715546585361069049 -xxh3(toIPv4('1.2.3.4')): 14355428563589734825 -wyHash64(toIPv4('1.2.3.4')): 13096729196120951355 -xxHash32(toIPv4('1.2.3.4')): 2430391091 -gccMurmurHash(toIPv4('1.2.3.4')): 5478801830569062645 -murmurHash2_32(toIPv4('1.2.3.4')): 1658978282 -javaHashUTF16LE(toIPv4('1.2.3.4')): 24190 -intHash64(toIPv4('1.2.3.4')): 5715546585361069049 -intHash32(toIPv4('1.2.3.4')): 3152671896 -metroHash64(toIPv4('1.2.3.4')): 5715546585361069049 -hex(murmurHash3_128(toIPv4('1.2.3.4'))): 549E9EF692591F6BB55874EF9A0DE88E -jumpConsistentHash(toIPv4('1.2.3.4'), 42): 37 -sipHash64(toIPv4('1.2.3.4')): 10711397536826262068 -hex(sipHash128(toIPv4('1.2.3.4'))): DBB6A76B92B59789EFB42185DC32311D -kostikConsistentHash(toIPv4('1.2.3.4'), 42): 0 -xxHash64(toIPv4('1.2.3.4')): 14496144933713060978 -murmurHash2_64(toIPv4('1.2.3.4')): 10829690723193326442 -cityHash64(toIPv4('1.2.3.4')): 5715546585361069049 -hiveHash(toIPv4('1.2.3.4')): 122110 -murmurHash3_64(toIPv4('1.2.3.4')): 16570805747704317665 -murmurHash3_32(toIPv4('1.2.3.4')): 1165084099 -yandexConsistentHash(toIPv4('1.2.3.4'), 42): 0 +ipv4: 1.2.3.4 +halfMD5(ipv4): 14356538739656272800 +farmFingerprint64(ipv4): 5715546585361069049 +xxh3(ipv4): 14355428563589734825 +wyHash64(ipv4): 13096729196120951355 +xxHash32(ipv4): 2430391091 +gccMurmurHash(ipv4): 5478801830569062645 +murmurHash2_32(ipv4): 1658978282 +javaHashUTF16LE(ipv4): 24190 +intHash64(ipv4): 5715546585361069049 +intHash32(ipv4): 3152671896 +metroHash64(ipv4): 5715546585361069049 +hex(murmurHash3_128(ipv4)): 549E9EF692591F6BB55874EF9A0DE88E +jumpConsistentHash(ipv4, 42): 37 +sipHash64(ipv4): 10711397536826262068 +hex(sipHash128(ipv4)): DBB6A76B92B59789EFB42185DC32311D +kostikConsistentHash(ipv4, 42): 0 +xxHash64(ipv4): 14496144933713060978 +murmurHash2_64(ipv4): 10829690723193326442 +cityHash64(ipv4): 5715546585361069049 +hiveHash(ipv4): 122110 +murmurHash3_64(ipv4): 16570805747704317665 +murmurHash3_32(ipv4): 1165084099 +yandexConsistentHash(ipv4, 42): 0 Row 1: ────── -ipv6: fe80::62:5aff:fed1:daf0 -halfMD5(toIPv6('fe80::62:5aff:fed1:daf0')): 9503062220758009199 -hex(MD4(toIPv6('fe80::62:5aff:fed1:daf0'))): E35A1A4FB3A3953421AB348B2E1A4A1A -hex(MD5(toIPv6('fe80::62:5aff:fed1:daf0'))): 83E1A8BD8AB7456FC229208409F79798 -hex(SHA1(toIPv6('fe80::62:5aff:fed1:daf0'))): A6D5DCE882AC44804382DE4639E6001612E1C8B5 -hex(SHA224(toIPv6('fe80::62:5aff:fed1:daf0'))): F6995FD7BED2BCA21F68DAC6BBABE742DC1BA177BA8594CEF1715C52 -hex(SHA256(toIPv6('fe80::62:5aff:fed1:daf0'))): F75497BAD6F7747BD6B150B6F69BA2DEE354F1C2A34B7BEA6183973B78640250 -hex(SHA512(toIPv6('fe80::62:5aff:fed1:daf0'))): 0C2893CCBF44BC19CCF339AEED5B68CBFD5A2EF38263A48FE21C3379BA4438E7FF7A02F59D7542442C6E6ED538E6D13D65D3573DADB381651D3D8A5DEA232EAC -farmFingerprint64(toIPv6('fe80::62:5aff:fed1:daf0')): 6643158734288374888 -javaHash(toIPv6('fe80::62:5aff:fed1:daf0')): 684606770 -xxh3(toIPv6('fe80::62:5aff:fed1:daf0')): 4051340969481364358 -wyHash64(toIPv6('fe80::62:5aff:fed1:daf0')): 18071806066582739916 -xxHash32(toIPv6('fe80::62:5aff:fed1:daf0')): 3353862080 -gccMurmurHash(toIPv6('fe80::62:5aff:fed1:daf0')): 11049311547848936878 -murmurHash2_32(toIPv6('fe80::62:5aff:fed1:daf0')): 1039121047 -javaHashUTF16LE(toIPv6('fe80::62:5aff:fed1:daf0')): -666938696 -metroHash64(toIPv6('fe80::62:5aff:fed1:daf0')): 15333045864940909774 -hex(sipHash128(toIPv6('fe80::62:5aff:fed1:daf0'))): 31D50562F877B1F92A99B05B646568B7 -hex(murmurHash3_128(toIPv6('fe80::62:5aff:fed1:daf0'))): 6FFEF0C1DF8B5B472FE2EDF0C76C12B9 -sipHash64(toIPv6('fe80::62:5aff:fed1:daf0')): 5681592867096972315 -xxHash64(toIPv6('fe80::62:5aff:fed1:daf0')): 4533874364641685764 -murmurHash2_64(toIPv6('fe80::62:5aff:fed1:daf0')): 11839090601505681839 -cityHash64(toIPv6('fe80::62:5aff:fed1:daf0')): 1599722731594796935 -hiveHash(toIPv6('fe80::62:5aff:fed1:daf0')): 684606770 -murmurHash3_64(toIPv6('fe80::62:5aff:fed1:daf0')): 18323430650022796352 -murmurHash3_32(toIPv6('fe80::62:5aff:fed1:daf0')): 3971193740 +ipv6: fe80::62:5aff:fed1:daf0 +halfMD5(ipv6): 9503062220758009199 +hex(MD4(ipv6)): E35A1A4FB3A3953421AB348B2E1A4A1A +hex(MD5(ipv6)): 83E1A8BD8AB7456FC229208409F79798 +hex(SHA1(ipv6)): A6D5DCE882AC44804382DE4639E6001612E1C8B5 +hex(SHA224(ipv6)): F6995FD7BED2BCA21F68DAC6BBABE742DC1BA177BA8594CEF1715C52 +hex(SHA256(ipv6)): F75497BAD6F7747BD6B150B6F69BA2DEE354F1C2A34B7BEA6183973B78640250 +hex(SHA512(ipv6)): 0C2893CCBF44BC19CCF339AEED5B68CBFD5A2EF38263A48FE21C3379BA4438E7FF7A02F59D7542442C6E6ED538E6D13D65D3573DADB381651D3D8A5DEA232EAC +farmFingerprint64(ipv6): 6643158734288374888 +javaHash(ipv6): 684606770 +xxh3(ipv6): 4051340969481364358 +wyHash64(ipv6): 18071806066582739916 +xxHash32(ipv6): 3353862080 +gccMurmurHash(ipv6): 11049311547848936878 +murmurHash2_32(ipv6): 1039121047 +javaHashUTF16LE(ipv6): -666938696 +metroHash64(ipv6): 15333045864940909774 +hex(sipHash128(ipv6)): 31D50562F877B1F92A99B05B646568B7 +hex(murmurHash3_128(ipv6)): 6FFEF0C1DF8B5B472FE2EDF0C76C12B9 +sipHash64(ipv6): 5681592867096972315 +xxHash64(ipv6): 4533874364641685764 +murmurHash2_64(ipv6): 11839090601505681839 +cityHash64(ipv6): 1599722731594796935 +hiveHash(ipv6): 684606770 +murmurHash3_64(ipv6): 18323430650022796352 +murmurHash3_32(ipv6): 3971193740 diff --git a/tests/queries/0_stateless/02184_hash_functions_and_ip_types.sql b/tests/queries/0_stateless/02184_hash_functions_and_ip_types.sql index 67aae812144..d96574ef4fe 100644 --- a/tests/queries/0_stateless/02184_hash_functions_and_ip_types.sql +++ b/tests/queries/0_stateless/02184_hash_functions_and_ip_types.sql @@ -1,5 +1,7 @@ -- Tags: no-fasttest +SET allow_experimental_analyzer = 1; + SELECT toIPv4('1.2.3.4') AS ipv4, halfMD5(ipv4), diff --git a/tests/queries/0_stateless/02205_HTTP_user_agent.python b/tests/queries/0_stateless/02205_HTTP_user_agent.python index 397e06cbe82..0d3a563c094 100644 --- a/tests/queries/0_stateless/02205_HTTP_user_agent.python +++ b/tests/queries/0_stateless/02205_HTTP_user_agent.python @@ -125,7 +125,7 @@ def test_select(): def main(): # HEAD + GET - t = start_server(2) + t = start_server(3) t.start() test_select() t.join() diff --git a/tests/queries/0_stateless/02227_union_match_by_name.reference b/tests/queries/0_stateless/02227_union_match_by_name.reference index cebcc42dcba..e51ea983f7f 100644 --- a/tests/queries/0_stateless/02227_union_match_by_name.reference +++ b/tests/queries/0_stateless/02227_union_match_by_name.reference @@ -1,40 +1,53 @@ --- { echo } +-- { echoOn } + EXPLAIN header = 1, optimize = 0 SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y); -Expression (Projection) +Expression (Project names) Header: avgWeighted(x, y) Nullable(Float64) - Expression (Before ORDER BY) - Header: avgWeighted(x, y) Nullable(Float64) + Expression (Projection) + Header: avgWeighted(x_0, y_1) Nullable(Float64) Aggregating - Header: avgWeighted(x, y) Nullable(Float64) + Header: avgWeighted(x_0, y_1) Nullable(Float64) Expression (Before GROUP BY) - Header: x Nullable(UInt8) - y UInt8 - Union - Header: x Nullable(UInt8) - y UInt8 - Expression (Conversion before UNION) - Header: x Nullable(UInt8) + Header: x_0 Nullable(UInt8) + y_1 UInt8 + Expression (Change column names to column identifiers) + Header: x_0 Nullable(UInt8) + y_1 UInt8 + Union + Header: NULL Nullable(UInt8) + x Nullable(UInt8) y UInt8 - Expression (Projection) - Header: x UInt8 + Expression (Conversion before UNION) + Header: NULL Nullable(UInt8) + x Nullable(UInt8) y UInt8 - Expression (Before ORDER BY) - Header: 255 UInt8 - 1 UInt8 - dummy UInt8 - ReadFromStorage (SystemOne) - Header: dummy UInt8 - Expression (Conversion before UNION) - Header: x Nullable(UInt8) - y UInt8 - Expression (Projection) - Header: x Nullable(Nothing) + Expression (Project names) + Header: NULL Nullable(Nothing) + x UInt8 + y UInt8 + Expression (Projection) + Header: NULL_Nullable(Nothing) Nullable(Nothing) + 255_UInt8 UInt8 + 1_UInt8 UInt8 + Expression (Change column names to column identifiers) + Header: system.one.dummy_0 UInt8 + ReadFromStorage (SystemOne) + Header: dummy UInt8 + Expression (Conversion before UNION) + Header: NULL Nullable(UInt8) + x Nullable(UInt8) y UInt8 - Expression (Before ORDER BY) - Header: 1 UInt8 - NULL Nullable(Nothing) - dummy UInt8 - ReadFromStorage (SystemOne) - Header: dummy UInt8 + Expression (Project names) + Header: y UInt8 + x Nullable(Nothing) + y UInt8 + Expression (Projection) + Header: 1_UInt8 UInt8 + NULL_Nullable(Nothing) Nullable(Nothing) + 1_UInt8 UInt8 + Expression (Change column names to column identifiers) + Header: system.one.dummy_0 UInt8 + ReadFromStorage (SystemOne) + Header: dummy UInt8 SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y); 255 diff --git a/tests/queries/0_stateless/02227_union_match_by_name.sql b/tests/queries/0_stateless/02227_union_match_by_name.sql index cc0ab8ba5aa..6a19add1d37 100644 --- a/tests/queries/0_stateless/02227_union_match_by_name.sql +++ b/tests/queries/0_stateless/02227_union_match_by_name.sql @@ -1,3 +1,8 @@ --- { echo } +SET allow_experimental_analyzer = 1; + +-- { echoOn } + EXPLAIN header = 1, optimize = 0 SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y); SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y); + +-- { echoOff } diff --git a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh index b563c487646..991622446b8 100755 --- a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh +++ b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh @@ -16,7 +16,7 @@ for STORAGE_POLICY in 's3_cache' 'local_cache'; do ORDER BY key SETTINGS storage_policy='$STORAGE_POLICY', min_bytes_for_wide_part = 10485760" - $CLICKHOUSE_CLIENT --query "SYSTEM STOP MERGES" + $CLICKHOUSE_CLIENT --query "SYSTEM STOP MERGES test_02286" $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE" $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" diff --git a/tests/queries/0_stateless/02303_query_kind.reference b/tests/queries/0_stateless/02303_query_kind.reference index 163f8b0ed5e..5af8c2b743f 100644 --- a/tests/queries/0_stateless/02303_query_kind.reference +++ b/tests/queries/0_stateless/02303_query_kind.reference @@ -1,36 +1,36 @@ -clickhouse-client --query_kind secondary_query -q explain plan header=1 select toString(dummy) as dummy from system.one group by dummy -Expression ((Projection + Before ORDER BY)) +clickhouse-client --allow_experimental_analyzer=1 --query_kind secondary_query -q explain plan header=1 select toString(dummy) as dummy from system.one group by dummy +Expression ((Project names + Projection)) Header: dummy String Aggregating - Header: toString(dummy) String - Expression (Before GROUP BY) - Header: toString(dummy) String + Header: toString(system.one.dummy_0) String + Expression ((Before GROUP BY + Change column names to column identifiers)) + Header: toString(system.one.dummy_0) String ReadFromStorage (SystemOne) Header: dummy UInt8 -clickhouse-local --query_kind secondary_query -q explain plan header=1 select toString(dummy) as dummy from system.one group by dummy -Expression ((Projection + Before ORDER BY)) +clickhouse-local --allow_experimental_analyzer=1 --query_kind secondary_query -q explain plan header=1 select toString(dummy) as dummy from system.one group by dummy +Expression ((Project names + Projection)) Header: dummy String Aggregating - Header: toString(dummy) String - Expression (Before GROUP BY) - Header: toString(dummy) String + Header: toString(system.one.dummy_0) String + Expression ((Before GROUP BY + Change column names to column identifiers)) + Header: toString(system.one.dummy_0) String ReadFromStorage (SystemOne) Header: dummy UInt8 -clickhouse-client --query_kind initial_query -q explain plan header=1 select toString(dummy) as dummy from system.one group by dummy -Expression ((Projection + Before ORDER BY)) +clickhouse-client --allow_experimental_analyzer=1 --query_kind initial_query -q explain plan header=1 select toString(dummy) as dummy from system.one group by dummy +Expression ((Project names + Projection)) Header: dummy String Aggregating - Header: toString(dummy) String - Expression (Before GROUP BY) - Header: toString(dummy) String + Header: toString(system.one.dummy_0) String + Expression ((Before GROUP BY + Change column names to column identifiers)) + Header: toString(system.one.dummy_0) String ReadFromStorage (SystemOne) Header: dummy UInt8 -clickhouse-local --query_kind initial_query -q explain plan header=1 select toString(dummy) as dummy from system.one group by dummy -Expression ((Projection + Before ORDER BY)) +clickhouse-local --allow_experimental_analyzer=1 --query_kind initial_query -q explain plan header=1 select toString(dummy) as dummy from system.one group by dummy +Expression ((Project names + Projection)) Header: dummy String Aggregating - Header: toString(dummy) String - Expression (Before GROUP BY) - Header: toString(dummy) String + Header: toString(system.one.dummy_0) String + Expression ((Before GROUP BY + Change column names to column identifiers)) + Header: toString(system.one.dummy_0) String ReadFromStorage (SystemOne) Header: dummy UInt8 diff --git a/tests/queries/0_stateless/02303_query_kind.sh b/tests/queries/0_stateless/02303_query_kind.sh index 5ad5f9ec6f4..1d883a2dcc7 100755 --- a/tests/queries/0_stateless/02303_query_kind.sh +++ b/tests/queries/0_stateless/02303_query_kind.sh @@ -4,6 +4,10 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh +opts=( + "--allow_experimental_analyzer=1" +) + function run_query() { echo "clickhouse-client $*" @@ -12,5 +16,5 @@ function run_query() echo "clickhouse-local $*" $CLICKHOUSE_LOCAL "$@" } -run_query --query_kind secondary_query -q "explain plan header=1 select toString(dummy) as dummy from system.one group by dummy" -run_query --query_kind initial_query -q "explain plan header=1 select toString(dummy) as dummy from system.one group by dummy" +run_query "${opts[@]}" --query_kind secondary_query -q "explain plan header=1 select toString(dummy) as dummy from system.one group by dummy" +run_query "${opts[@]}" --query_kind initial_query -q "explain plan header=1 select toString(dummy) as dummy from system.one group by dummy" diff --git a/tests/queries/0_stateless/02342_window_view_different_struct.sql b/tests/queries/0_stateless/02342_window_view_different_struct.sql index c5bf8899cae..a5b2b8daa5a 100644 --- a/tests/queries/0_stateless/02342_window_view_different_struct.sql +++ b/tests/queries/0_stateless/02342_window_view_different_struct.sql @@ -1,3 +1,4 @@ +SET allow_experimental_analyzer = 0; SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS data_02342; diff --git a/tests/queries/0_stateless/02345_implicit_transaction.reference b/tests/queries/0_stateless/02345_implicit_transaction.reference index e4dd35600f7..fb4254ec5a7 100644 --- a/tests/queries/0_stateless/02345_implicit_transaction.reference +++ b/tests/queries/0_stateless/02345_implicit_transaction.reference @@ -12,3 +12,6 @@ in_transaction 10000 out_transaction 0 {"'implicit_True'":"implicit_True","all":"2","is_empty":0} {"'implicit_False'":"implicit_False","all":"2","is_empty":1} +0 +0 +0 diff --git a/tests/queries/0_stateless/02345_implicit_transaction.sql b/tests/queries/0_stateless/02345_implicit_transaction.sql index e3f9cca37d1..b0cb4ab6305 100644 --- a/tests/queries/0_stateless/02345_implicit_transaction.sql +++ b/tests/queries/0_stateless/02345_implicit_transaction.sql @@ -1,4 +1,4 @@ --- Tags: no-ordinary-database +-- Tags: no-ordinary-database, no-fasttest CREATE TABLE landing (n Int64) engine=MergeTree order by n; CREATE TABLE target (n Int64) engine=MergeTree order by n; @@ -92,3 +92,13 @@ WHERE query LIKE '-- Verify that the transaction_id column is NOT populated without transaction%' GROUP BY transaction_id FORMAT JSONEachRow; + +SET implicit_transaction=1; +SET throw_on_unsupported_query_inside_transaction=1; +SELECT * FROM system.one; +SELECT * FROM cluster('test_cluster_interserver_secret', system, one); -- { serverError NOT_IMPLEMENTED } +SELECT * FROM cluster('test_cluster_two_shards', system, one); -- { serverError NOT_IMPLEMENTED } +SET throw_on_unsupported_query_inside_transaction=0; +-- there's not session in the interserver mode +SELECT * FROM cluster('test_cluster_interserver_secret', system, one) FORMAT Null; -- { serverError INVALID_TRANSACTION } +SELECT * FROM cluster('test_cluster_two_shards', system, one); diff --git a/tests/queries/0_stateless/02364_window_view_segfault.sh b/tests/queries/0_stateless/02364_window_view_segfault.sh index d03a1e5ae3e..3def22f4a9e 100755 --- a/tests/queries/0_stateless/02364_window_view_segfault.sh +++ b/tests/queries/0_stateless/02364_window_view_segfault.sh @@ -5,7 +5,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --multiquery --multiline --query """ +opts=( + "--allow_experimental_analyzer=0" +) + +${CLICKHOUSE_CLIENT} "${opts[@]}" --multiquery --multiline --query """ DROP TABLE IF EXISTS mt ON CLUSTER test_shard_localhost; DROP TABLE IF EXISTS wv ON CLUSTER test_shard_localhost; CREATE TABLE mt ON CLUSTER test_shard_localhost (a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); diff --git a/tests/queries/0_stateless/02371_select_projection_normal_agg.sql b/tests/queries/0_stateless/02371_select_projection_normal_agg.sql index 283aec0b122..8650fb6b843 100644 --- a/tests/queries/0_stateless/02371_select_projection_normal_agg.sql +++ b/tests/queries/0_stateless/02371_select_projection_normal_agg.sql @@ -11,7 +11,8 @@ CREATE TABLE video_log ) ENGINE = MergeTree PARTITION BY toDate(datetime) -ORDER BY (user_id, device_id); +ORDER BY (user_id, device_id) +SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; DROP TABLE IF EXISTS rng; @@ -57,7 +58,8 @@ CREATE TABLE video_log_result ) ENGINE = MergeTree PARTITION BY toDate(hour) -ORDER BY sum_bytes; +ORDER BY sum_bytes +SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO video_log_result SELECT toStartOfHour(datetime) AS hour, diff --git a/tests/queries/0_stateless/02381_join_dup_columns_in_plan.reference b/tests/queries/0_stateless/02381_join_dup_columns_in_plan.reference index bbf288c45d7..31a37862663 100644 --- a/tests/queries/0_stateless/02381_join_dup_columns_in_plan.reference +++ b/tests/queries/0_stateless/02381_join_dup_columns_in_plan.reference @@ -2,51 +2,51 @@ Expression Header: key String value String Join - Header: key String - value String + Header: s1.key_0 String + s2.value_1 String Expression - Header: key String + Header: s1.key_0 String ReadFromStorage Header: dummy UInt8 Union - Header: s2.key String - value String + Header: s2.key_2 String + s2.value_1 String Expression - Header: s2.key String - value String + Header: s2.key_2 String + s2.value_1 String ReadFromStorage Header: dummy UInt8 Expression - Header: s2.key String - value String + Header: s2.key_2 String + s2.value_1 String ReadFromStorage Header: dummy UInt8 Expression Header: key String value String Join - Header: key String - s2.key String - value String + Header: s1.key_0 String + s2.key_2 String + s2.value_1 String Sorting - Header: key String + Header: s1.key_0 String Expression - Header: key String + Header: s1.key_0 String ReadFromStorage Header: dummy UInt8 Sorting - Header: s2.key String - value String + Header: s2.key_2 String + s2.value_1 String Union - Header: s2.key String - value String + Header: s2.key_2 String + s2.value_1 String Expression - Header: s2.key String - value String + Header: s2.key_2 String + s2.value_1 String ReadFromStorage Header: dummy UInt8 Expression - Header: s2.key String - value String + Header: s2.key_2 String + s2.value_1 String ReadFromStorage Header: dummy UInt8 diff --git a/tests/queries/0_stateless/02381_join_dup_columns_in_plan.sql b/tests/queries/0_stateless/02381_join_dup_columns_in_plan.sql index 4ed6d965292..dfcd8c12e11 100644 --- a/tests/queries/0_stateless/02381_join_dup_columns_in_plan.sql +++ b/tests/queries/0_stateless/02381_join_dup_columns_in_plan.sql @@ -1,3 +1,4 @@ +SET allow_experimental_analyzer = 1; SET join_algorithm = 'hash'; EXPLAIN actions=0, description=0, header=1 diff --git a/tests/queries/0_stateless/02402_external_disk_mertrics.sql b/tests/queries/0_stateless/02402_external_disk_mertrics.sql index b675c05f45c..e9696eb7122 100644 --- a/tests/queries/0_stateless/02402_external_disk_mertrics.sql +++ b/tests/queries/0_stateless/02402_external_disk_mertrics.sql @@ -20,7 +20,8 @@ SET join_algorithm = 'partial_merge'; SET default_max_bytes_in_join = 0; SET max_bytes_in_join = 10000000; -SELECT number * 200000 as n, j * 2097152 FROM numbers(5) nums +SELECT n, j * 2097152 FROM +(SELECT number * 200000 as n FROM numbers(5)) nums ANY LEFT JOIN ( SELECT number * 2 AS n, number AS j FROM numbers(1000000) ) js2 USING n ORDER BY n diff --git a/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.reference b/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.reference index 81a1f62ca29..312904a3cf9 100644 --- a/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.reference +++ b/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.reference @@ -1,8 +1,18 @@ +===1=== {"operation_name":"void DB::DistributedSink::writeToLocal(const Cluster::ShardInfo &, const DB::Block &, size_t)","cluster":"test_cluster_two_shards_localhost","shard":"1","rows":"1","bytes":"8"} {"operation_name":"void DB::DistributedSink::writeToLocal(const Cluster::ShardInfo &, const DB::Block &, size_t)","cluster":"test_cluster_two_shards_localhost","shard":"2","rows":"1","bytes":"8"} +1 +===2=== {"operation_name":"void DB::DistributedAsyncInsertDirectoryQueue::processFile(const std::string &)","cluster":"test_cluster_two_shards_localhost","shard":"1","rows":"1","bytes":"8"} {"operation_name":"void DB::DistributedAsyncInsertDirectoryQueue::processFile(const std::string &)","cluster":"test_cluster_two_shards_localhost","shard":"2","rows":"1","bytes":"8"} +3 +2 +===3=== {"operation_name":"auto DB::DistributedSink::runWritingJob(DB::DistributedSink::JobReplica &, const DB::Block &, size_t)::(anonymous class)::operator()() const","cluster":"test_cluster_two_shards_localhost","shard":"1","rows":"1","bytes":"8"} {"operation_name":"auto DB::DistributedSink::runWritingJob(DB::DistributedSink::JobReplica &, const DB::Block &, size_t)::(anonymous class)::operator()() const","cluster":"test_cluster_two_shards_localhost","shard":"2","rows":"1","bytes":"8"} +1 +===4=== {"operation_name":"auto DB::DistributedSink::runWritingJob(DB::DistributedSink::JobReplica &, const DB::Block &, size_t)::(anonymous class)::operator()() const","cluster":"test_cluster_two_shards_localhost","shard":"1","rows":"1","bytes":"8"} {"operation_name":"auto DB::DistributedSink::runWritingJob(DB::DistributedSink::JobReplica &, const DB::Block &, size_t)::(anonymous class)::operator()() const","cluster":"test_cluster_two_shards_localhost","shard":"2","rows":"1","bytes":"8"} +3 +2 diff --git a/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.sh b/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.sh index 9ac5f061d4a..81f3e3f4ea6 100755 --- a/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.sh +++ b/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.sh @@ -42,6 +42,22 @@ ${CLICKHOUSE_CLIENT} -nq " ;" } +# +# $1 - OpenTelemetry Trace Id +# $2 - value of insert_distributed_sync +function check_span_kind() +{ +${CLICKHOUSE_CLIENT} -nq " + SYSTEM FLUSH LOGS; + + SELECT count() + FROM system.opentelemetry_span_log + WHERE finish_date >= yesterday() + AND lower(hex(trace_id)) = '${1}' + AND kind = '${2}' + ;" +} + # # Prepare tables for tests @@ -57,30 +73,46 @@ CREATE TABLE ${CLICKHOUSE_DATABASE}.local_opentelemetry (key UInt64) Engine=Merg # # test1 # +echo "===1===" trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); insert $trace_id 0 1 "async-insert-writeToLocal" check_span $trace_id +# 1 HTTP SERVER spans +check_span_kind $trace_id 'SERVER' # # test2 # +echo "===2===" trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); insert $trace_id 0 0 "async-insert-writeToRemote" check_span $trace_id +# 3 SERVER spans, 1 for HTTP, 2 for TCP +check_span_kind $trace_id 'SERVER' +# 2 CLIENT spans +check_span_kind $trace_id 'CLIENT' # # test3 # trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); insert $trace_id 1 1 "sync-insert-writeToLocal" +echo "===3===" check_span $trace_id +# 1 HTTP SERVER spans +check_span_kind $trace_id 'SERVER' # # test4 # +echo "===4===" trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); insert $trace_id 1 0 "sync-insert-writeToRemote" check_span $trace_id +# 3 SERVER spans, 1 for HTTP, 2 for TCP +check_span_kind $trace_id 'SERVER' +# 2 CLIENT spans +check_span_kind $trace_id 'CLIENT' # # Cleanup diff --git a/tests/queries/0_stateless/02420_final_setting_analyzer.reference b/tests/queries/0_stateless/02420_final_setting_analyzer.reference index ee7c2541bcf..9a03c484765 100644 --- a/tests/queries/0_stateless/02420_final_setting_analyzer.reference +++ b/tests/queries/0_stateless/02420_final_setting_analyzer.reference @@ -108,9 +108,6 @@ select left_table.id,val_left, val_middle, val_right from left_table ORDER BY left_table.id, val_left, val_middle, val_right; 1 c a c 1 c b c --- no distributed tests because it is not currently supported: --- JOIN with remote storages is unsupported. - -- Quite exotic with Merge engine DROP TABLE IF EXISTS table_to_merge_a; DROP TABLE IF EXISTS table_to_merge_b; diff --git a/tests/queries/0_stateless/02420_final_setting_analyzer.sql b/tests/queries/0_stateless/02420_final_setting_analyzer.sql index 5937e536239..14c832cfaf5 100644 --- a/tests/queries/0_stateless/02420_final_setting_analyzer.sql +++ b/tests/queries/0_stateless/02420_final_setting_analyzer.sql @@ -79,9 +79,6 @@ select left_table.id,val_left, val_middle, val_right from left_table inner join (SELECT * FROM right_table WHERE id = 1) r on middle_table.id = r.id ORDER BY left_table.id, val_left, val_middle, val_right; --- no distributed tests because it is not currently supported: --- JOIN with remote storages is unsupported. - -- Quite exotic with Merge engine DROP TABLE IF EXISTS table_to_merge_a; DROP TABLE IF EXISTS table_to_merge_b; diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh index 6164ff97d9f..b2a5ebdd4ad 100755 --- a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh @@ -144,8 +144,8 @@ else # For Replicated database it will fail on initiator before enqueueing distributed DDL expected=0 fi -check_span $expected $trace_id "%executeDDLQueryOnCluster%" "attribute['clickhouse.cluster']='${cluster_name}'" -check_span $expected $trace_id "%DDLWorker::processTask%" +check_span $expected $trace_id "%executeDDLQueryOnCluster%" "attribute['clickhouse.cluster']='${cluster_name}' AND kind = 'PRODUCER'" +check_span $expected $trace_id "%DDLWorker::processTask%" "kind = 'CONSUMER'" if [ $cluster_name = "test_shard_localhost" ]; then # There should be two 'query' spans, one is for the HTTPHandler, the other is for the DDL executing in DDLWorker. diff --git a/tests/queries/0_stateless/02426_orc_bug.reference b/tests/queries/0_stateless/02426_orc_bug.reference index e5ad2b49289..baa88da2158 100644 Binary files a/tests/queries/0_stateless/02426_orc_bug.reference and b/tests/queries/0_stateless/02426_orc_bug.reference differ diff --git a/tests/queries/0_stateless/02426_orc_bug.sh b/tests/queries/0_stateless/02426_orc_bug.sh new file mode 100755 index 00000000000..7a7ad9f1783 --- /dev/null +++ b/tests/queries/0_stateless/02426_orc_bug.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --query="SELECT arrayJoin([[], [1]]) FORMAT ORC SETTINGS output_format_orc_compression_method='none'" | md5sum; + diff --git a/tests/queries/0_stateless/02426_orc_bug.sql b/tests/queries/0_stateless/02426_orc_bug.sql deleted file mode 100644 index 7016f1ceb70..00000000000 --- a/tests/queries/0_stateless/02426_orc_bug.sql +++ /dev/null @@ -1,3 +0,0 @@ --- Tags: no-fasttest - -SELECT arrayJoin([[], [1]]) FORMAT ORC; diff --git a/tests/queries/0_stateless/02451_order_by_monotonic.reference b/tests/queries/0_stateless/02451_order_by_monotonic.reference index d3de324a7e1..f9f0ef38be1 100644 --- a/tests/queries/0_stateless/02451_order_by_monotonic.reference +++ b/tests/queries/0_stateless/02451_order_by_monotonic.reference @@ -4,19 +4,19 @@ 2022-09-09 12:00:00 0x 2022-09-09 12:00:00 1 2022-09-09 12:00:00 1x - Prefix sort description: toStartOfMinute(t) ASC - Result sort description: toStartOfMinute(t) ASC, c1 ASC - Prefix sort description: toStartOfMinute(t) ASC - Result sort description: toStartOfMinute(t) ASC - Prefix sort description: negate(a) ASC - Result sort description: negate(a) ASC - Prefix sort description: negate(a) ASC, negate(b) ASC - Result sort description: negate(a) ASC, negate(b) ASC - Prefix sort description: a DESC, negate(b) ASC - Result sort description: a DESC, negate(b) ASC - Prefix sort description: negate(a) ASC, b DESC - Result sort description: negate(a) ASC, b DESC - Prefix sort description: negate(a) ASC - Result sort description: negate(a) ASC, b ASC - Prefix sort description: a ASC - Result sort description: a ASC, negate(b) ASC + Prefix sort description: toStartOfMinute(test.t_0) ASC + Result sort description: toStartOfMinute(test.t_0) ASC, test.c1_1 ASC + Prefix sort description: toStartOfMinute(test.t_0) ASC + Result sort description: toStartOfMinute(test.t_0) ASC + Prefix sort description: negate(test.a_0) ASC + Result sort description: negate(test.a_0) ASC + Prefix sort description: negate(test.a_0) ASC, negate(test.b_1) ASC + Result sort description: negate(test.a_0) ASC, negate(test.b_1) ASC + Prefix sort description: test.a_0 DESC, negate(test.b_1) ASC + Result sort description: test.a_0 DESC, negate(test.b_1) ASC + Prefix sort description: negate(test.a_0) ASC, test.b_1 DESC + Result sort description: negate(test.a_0) ASC, test.b_1 DESC + Prefix sort description: negate(test.a_0) ASC + Result sort description: negate(test.a_0) ASC, test.b_1 ASC + Prefix sort description: test.a_0 ASC + Result sort description: test.a_0 ASC, negate(test.b_1) ASC diff --git a/tests/queries/0_stateless/02451_order_by_monotonic.sh b/tests/queries/0_stateless/02451_order_by_monotonic.sh index cc26ba91e1c..7d1356b4445 100755 --- a/tests/queries/0_stateless/02451_order_by_monotonic.sh +++ b/tests/queries/0_stateless/02451_order_by_monotonic.sh @@ -4,37 +4,41 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +opts=( + "--allow_experimental_analyzer=1" +) + function explain_sort_description() { - out=$($CLICKHOUSE_CLIENT --optimize_read_in_order=1 -q "EXPLAIN PLAN actions = 1 $1") + out=$($CLICKHOUSE_CLIENT "${opts[@]}" --optimize_read_in_order=1 -q "EXPLAIN PLAN actions = 1 $1") echo "$out" | grep "Prefix sort description:" echo "$out" | grep "Result sort description:" } -$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_order_by_monotonic" -$CLICKHOUSE_CLIENT -q "CREATE TABLE t_order_by_monotonic (t DateTime, c1 String) ENGINE = MergeTree ORDER BY (t, c1) +$CLICKHOUSE_CLIENT "${opts[@]}" -q "DROP TABLE IF EXISTS t_order_by_monotonic" +$CLICKHOUSE_CLIENT "${opts[@]}" -q "CREATE TABLE t_order_by_monotonic (t DateTime, c1 String) ENGINE = MergeTree ORDER BY (t, c1) AS SELECT '2022-09-09 12:00:00', toString(number % 2) FROM numbers(2) UNION ALL SELECT '2022-09-09 12:00:30', toString(number % 2)|| 'x' FROM numbers(3)" -$CLICKHOUSE_CLIENT --optimize_aggregation_in_order=1 -q "SELECT count() FROM - (SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic GROUP BY s, c1)" +$CLICKHOUSE_CLIENT "${opts[@]}" --optimize_aggregation_in_order=1 -q "SELECT count() FROM + (SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic AS test GROUP BY s, c1)" -$CLICKHOUSE_CLIENT --optimize_read_in_order=1 -q "SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic ORDER BY s, c1" +$CLICKHOUSE_CLIENT "${opts[@]}" --optimize_read_in_order=1 -q "SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic AS test ORDER BY s, c1" -explain_sort_description "SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic ORDER BY s, c1" -explain_sort_description "SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic ORDER BY s" +explain_sort_description "SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic AS test ORDER BY s, c1" +explain_sort_description "SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic AS test ORDER BY s" -$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_order_by_monotonic" +$CLICKHOUSE_CLIENT "${opts[@]}" -q "DROP TABLE IF EXISTS t_order_by_monotonic" -$CLICKHOUSE_CLIENT -q "CREATE TABLE t_order_by_monotonic (a Int64, b Int64) ENGINE = MergeTree ORDER BY (a, b)" +$CLICKHOUSE_CLIENT "${opts[@]}" -q "CREATE TABLE t_order_by_monotonic (a Int64, b Int64) ENGINE = MergeTree ORDER BY (a, b)" -$CLICKHOUSE_CLIENT -q "INSERT INTO t_order_by_monotonic VALUES (1, 1) (1, 2), (2, 1) (2, 2)" +$CLICKHOUSE_CLIENT "${opts[@]}" -q "INSERT INTO t_order_by_monotonic VALUES (1, 1) (1, 2), (2, 1) (2, 2)" -explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY -a" -explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY -a, -b" -explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY a DESC, -b" -explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY -a, b DESC" -explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY -a, b" -explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY a, -b" +explain_sort_description "SELECT * FROM t_order_by_monotonic AS test ORDER BY -a" +explain_sort_description "SELECT * FROM t_order_by_monotonic AS test ORDER BY -a, -b" +explain_sort_description "SELECT * FROM t_order_by_monotonic AS test ORDER BY a DESC, -b" +explain_sort_description "SELECT * FROM t_order_by_monotonic AS test ORDER BY -a, b DESC" +explain_sort_description "SELECT * FROM t_order_by_monotonic AS test ORDER BY -a, b" +explain_sort_description "SELECT * FROM t_order_by_monotonic AS test ORDER BY a, -b" -$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_order_by_monotonic" +$CLICKHOUSE_CLIENT "${opts[@]}" -q "DROP TABLE IF EXISTS t_order_by_monotonic" diff --git a/tests/queries/0_stateless/02480_tlp_nan.reference b/tests/queries/0_stateless/02480_tlp_nan.reference index ea4aa44fa89..befd1f66564 100644 --- a/tests/queries/0_stateless/02480_tlp_nan.reference +++ b/tests/queries/0_stateless/02480_tlp_nan.reference @@ -1,10 +1,21 @@ +-- {echo} +SELECT sqrt(-1) as x, not(x), not(not(x)), (not(x)) IS NULL SETTINGS allow_experimental_analyzer=1; nan 0 1 0 +SELECT sqrt(-1) as x, not(x), not(not(x)), (not(x)) IS NULL SETTINGS allow_experimental_analyzer=0; nan 0 1 0 +SELECT -inf as x, not(x), not(not(x)), (not(x)) IS NULL SETTINGS allow_experimental_analyzer=1; -inf 0 1 0 +SELECT -inf as x, not(x), not(not(x)), (not(x)) IS NULL SETTINGS allow_experimental_analyzer=0; -inf 0 1 0 +SELECT NULL as x, not(x), not(not(x)), (not(x)) IS NULL SETTINGS allow_experimental_analyzer=1; \N \N \N 1 +SELECT NULL as x, not(x), not(not(x)), (not(x)) IS NULL SETTINGS allow_experimental_analyzer=0; \N \N \N 1 +SELECT inf as x, not(x), not(not(x)), (not(x)) IS NULL SETTINGS allow_experimental_analyzer=1; inf 0 1 0 +SELECT inf as x, not(x), not(not(x)), (not(x)) IS NULL SETTINGS allow_experimental_analyzer=0; inf 0 1 0 +SELECT nan as x, not(x), not(not(x)), (not(x)) IS NULL SETTINGS allow_experimental_analyzer=1; nan 0 1 0 +SELECT nan as x, not(x), not(not(x)), (not(x)) IS NULL SETTINGS allow_experimental_analyzer=0; nan 0 1 0 diff --git a/tests/queries/0_stateless/02483_cuturlparameter_with_arrays.reference b/tests/queries/0_stateless/02483_cuturlparameter_with_arrays.reference index dd677873c7c..348408a15cc 100644 --- a/tests/queries/0_stateless/02483_cuturlparameter_with_arrays.reference +++ b/tests/queries/0_stateless/02483_cuturlparameter_with_arrays.reference @@ -1,4 +1,5 @@ -- { echoOn } + SELECT cutURLParameter('http://bigmir.net/?a=b&c=d', []), cutURLParameter('http://bigmir.net/?a=b&c=d', ['a']), @@ -30,7 +31,7 @@ SELECT FORMAT Vertical; Row 1: ────── -cutURLParameter('http://bigmir.net/?a=b&c=d', []): http://bigmir.net/?a=b&c=d +cutURLParameter('http://bigmir.net/?a=b&c=d', array()): http://bigmir.net/?a=b&c=d cutURLParameter('http://bigmir.net/?a=b&c=d', ['a']): http://bigmir.net/?c=d cutURLParameter('http://bigmir.net/?a=b&c=d', ['a', 'c']): http://bigmir.net/? cutURLParameter('http://bigmir.net/?a=b&c=d', ['c']): http://bigmir.net/?a=b @@ -43,7 +44,7 @@ cutURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', ['c', 'g']): http: cutURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', ['e', 'g']): http://bigmir.net/?a=b&c=d#e cutURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', ['test', 'e']): http://bigmir.net/?a=b&c=d#test?g=h cutURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', ['test', 'g']): http://bigmir.net/?a=b&c=d#test?e=f -cutURLParameter('//bigmir.net/?a=b&c=d', []): //bigmir.net/?a=b&c=d +cutURLParameter('//bigmir.net/?a=b&c=d', array()): //bigmir.net/?a=b&c=d cutURLParameter('//bigmir.net/?a=b&c=d', ['a']): //bigmir.net/?c=d cutURLParameter('//bigmir.net/?a=b&c=d', ['a', 'c']): //bigmir.net/? cutURLParameter('//bigmir.net/?a=b&c=d#e=f', ['a', 'e']): //bigmir.net/?c=d# @@ -88,7 +89,7 @@ SELECT FORMAT Vertical; Row 1: ────── -cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), []): http://bigmir.net/?a=b&c=d +cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), array()): http://bigmir.net/?a=b&c=d cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), ['a']): http://bigmir.net/?c=d cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), ['a', 'c']): http://bigmir.net/? cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), ['c']): http://bigmir.net/?a=b @@ -101,7 +102,7 @@ cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), ['c', 'g']): cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), ['e', 'g']): http://bigmir.net/?a=b&c=d#e cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), ['test', 'e']): http://bigmir.net/?a=b&c=d#test?g=h cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), ['test', 'g']): http://bigmir.net/?a=b&c=d#test?e=f -cutURLParameter(materialize('//bigmir.net/?a=b&c=d'), []): //bigmir.net/?a=b&c=d +cutURLParameter(materialize('//bigmir.net/?a=b&c=d'), array()): //bigmir.net/?a=b&c=d cutURLParameter(materialize('//bigmir.net/?a=b&c=d'), ['a']): //bigmir.net/?c=d cutURLParameter(materialize('//bigmir.net/?a=b&c=d'), ['a', 'c']): //bigmir.net/? cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e=f'), ['a', 'e']): //bigmir.net/?c=d# diff --git a/tests/queries/0_stateless/02483_cuturlparameter_with_arrays.sql b/tests/queries/0_stateless/02483_cuturlparameter_with_arrays.sql index ea2d6ae104f..6d64d2685b7 100644 --- a/tests/queries/0_stateless/02483_cuturlparameter_with_arrays.sql +++ b/tests/queries/0_stateless/02483_cuturlparameter_with_arrays.sql @@ -1,4 +1,7 @@ +SET allow_experimental_analyzer = 1; + -- { echoOn } + SELECT cutURLParameter('http://bigmir.net/?a=b&c=d', []), cutURLParameter('http://bigmir.net/?a=b&c=d', ['a']), diff --git a/tests/queries/0_stateless/02494_query_cache_explain.reference b/tests/queries/0_stateless/02494_query_cache_explain.reference index ecc965ac391..690e75bca7c 100644 --- a/tests/queries/0_stateless/02494_query_cache_explain.reference +++ b/tests/queries/0_stateless/02494_query_cache_explain.reference @@ -1,9 +1,9 @@ 1 1 -Expression ((Projection + Before ORDER BY)) +Expression ((Project names + (Projection + Change column names to column identifiers))) Limit (preliminary LIMIT (without OFFSET)) ReadFromStorage (SystemNumbers) -Expression ((Projection + Before ORDER BY)) +Expression ((Project names + (Projection + Change column names to column identifiers))) Limit (preliminary LIMIT (without OFFSET)) ReadFromStorage (SystemNumbers) (Expression) diff --git a/tests/queries/0_stateless/02494_query_cache_explain.sql b/tests/queries/0_stateless/02494_query_cache_explain.sql index 67717efde13..68b7e0005f8 100644 --- a/tests/queries/0_stateless/02494_query_cache_explain.sql +++ b/tests/queries/0_stateless/02494_query_cache_explain.sql @@ -1,6 +1,7 @@ -- Tags: no-parallel -- Tag no-parallel: Messes with internal cache +SET allow_experimental_analyzer = 1; SET allow_experimental_query_cache = true; SYSTEM DROP QUERY CACHE; diff --git a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference index af6f07e3f95..8b4cafc3260 100644 --- a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference +++ b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference @@ -30,20 +30,20 @@ Header: count() UInt64 Expression ((JOIN actions + Change column names to column identifiers)) Header: default.a.a1_1 UInt64 default.a.a2_4 String - ReadFromStorage (Memory) + ReadFromMemoryStorage Header: a1 UInt64 a2 String Expression ((JOIN actions + Change column names to column identifiers)) Header: default.b.b1_0 UInt64 - ReadFromStorage (Memory) + ReadFromMemoryStorage Header: b1 UInt64 Expression ((JOIN actions + Change column names to column identifiers)) Header: default.c.c1_2 UInt64 - ReadFromStorage (Memory) + ReadFromMemoryStorage Header: c1 UInt64 Expression ((JOIN actions + Change column names to column identifiers)) Header: default.d.d1_3 UInt64 - ReadFromStorage (Memory) + ReadFromMemoryStorage Header: d1 UInt64 EXPLAIN PLAN header = 1 SELECT a.a2, d.d2 FROM a JOIN b USING (k) JOIN c USING (k) JOIN d USING (k) @@ -70,21 +70,21 @@ Header: a2 String Expression (Change column names to column identifiers) Header: default.a.a2_0 String default.a.k_2 UInt64 - ReadFromStorage (Memory) + ReadFromMemoryStorage Header: a2 String k UInt64 Expression (Change column names to column identifiers) Header: default.b.k_3 UInt64 - ReadFromStorage (Memory) + ReadFromMemoryStorage Header: k UInt64 Expression (Change column names to column identifiers) Header: default.c.k_4 UInt64 - ReadFromStorage (Memory) + ReadFromMemoryStorage Header: k UInt64 Expression (Change column names to column identifiers) Header: default.d.d2_1 String default.d.k_5 UInt64 - ReadFromStorage (Memory) + ReadFromMemoryStorage Header: d2 String k UInt64 EXPLAIN PLAN header = 1 @@ -130,19 +130,19 @@ Header: bx String Expression ((JOIN actions + Change column names to column identifiers)) Header: default.a.a1_2 UInt64 default.a.a2_6 String - ReadFromStorage (Memory) + ReadFromMemoryStorage Header: a1 UInt64 a2 String Expression ((JOIN actions + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))) Header: b.b1_1 UInt64 b.bx_0 String - ReadFromStorage (Memory) + ReadFromMemoryStorage Header: b1 UInt64 b2 String Expression ((JOIN actions + Change column names to column identifiers)) Header: default.c.c1_3 UInt64 default.c.c2_5 String - ReadFromStorage (Memory) + ReadFromMemoryStorage Header: c1 UInt64 c2 String Expression ((JOIN actions + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))) diff --git a/tests/queries/0_stateless/02521_cannot-find-column-in-projection.sql b/tests/queries/0_stateless/02521_cannot-find-column-in-projection.sql deleted file mode 100644 index 31602c5bae2..00000000000 --- a/tests/queries/0_stateless/02521_cannot-find-column-in-projection.sql +++ /dev/null @@ -1,3 +0,0 @@ -create table test(day Date, id UInt32) engine=MergeTree partition by day order by tuple(); -insert into test select toDate('2023-01-05') AS day, number from numbers(10); -with toUInt64(id) as id_with select day, count(id_with) from test where day >= '2023-01-01' group by day limit 1000; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK } diff --git a/tests/queries/0_stateless/02521_cannot_find_column_in_projection.reference b/tests/queries/0_stateless/02521_cannot_find_column_in_projection.reference new file mode 100644 index 00000000000..2cd767c8054 --- /dev/null +++ b/tests/queries/0_stateless/02521_cannot_find_column_in_projection.reference @@ -0,0 +1 @@ +2023-01-05 10 diff --git a/tests/queries/0_stateless/02521_cannot_find_column_in_projection.sql b/tests/queries/0_stateless/02521_cannot_find_column_in_projection.sql new file mode 100644 index 00000000000..255c6f56ab3 --- /dev/null +++ b/tests/queries/0_stateless/02521_cannot_find_column_in_projection.sql @@ -0,0 +1,7 @@ +SET allow_experimental_analyzer = 1; + +drop table if exists test; +create table test(day Date, id UInt32) engine=MergeTree partition by day order by tuple(); +insert into test select toDate('2023-01-05') AS day, number from numbers(10); +with toUInt64(id) as id_with select day, count(id_with) from test where day >= '2023-01-01' group by day limit 1000; +drop table test; diff --git a/tests/queries/0_stateless/02565_analyzer_limit_settings.reference b/tests/queries/0_stateless/02565_analyzer_limit_settings.reference index 6f23097612e..87e9f407cc8 100644 --- a/tests/queries/0_stateless/02565_analyzer_limit_settings.reference +++ b/tests/queries/0_stateless/02565_analyzer_limit_settings.reference @@ -62,7 +62,6 @@ SELECT * FROM numbers(10); SELECT * FROM numbers(10) LIMIT 3 OFFSET 2; 3 4 -5 SELECT * FROM numbers(10) LIMIT 5 OFFSET 2; 3 4 diff --git a/tests/queries/0_stateless/02567_and_consistency.reference b/tests/queries/0_stateless/02567_and_consistency.reference index bcb2b5aecfb..e0014f187a8 100644 --- a/tests/queries/0_stateless/02567_and_consistency.reference +++ b/tests/queries/0_stateless/02567_and_consistency.reference @@ -6,10 +6,8 @@ true ===== true ===== -===== 1 ===== -===== allow_experimental_analyzer true #45440 diff --git a/tests/queries/0_stateless/02567_and_consistency.sql b/tests/queries/0_stateless/02567_and_consistency.sql index f02185a1a52..8ad06bd68cb 100644 --- a/tests/queries/0_stateless/02567_and_consistency.sql +++ b/tests/queries/0_stateless/02567_and_consistency.sql @@ -42,31 +42,10 @@ SETTINGS enable_optimize_predicate_expression = 0; SELECT '====='; -SELECT toBool(sin(SUM(number))) AS x -FROM -( - SELECT 1 AS number -) -GROUP BY number -HAVING 1 AND sin(sum(number)) -SETTINGS enable_optimize_predicate_expression = 1; -- { serverError 59 } - -SELECT '====='; - SELECT 1 and sin(1); SELECT '====='; -SELECT toBool(sin(SUM(number))) AS x -FROM -( - SELECT 1 AS number -) -GROUP BY number -HAVING x AND sin(1) -SETTINGS enable_optimize_predicate_expression = 0; -- { serverError 59 } - -SELECT '====='; SELECT 'allow_experimental_analyzer'; SET allow_experimental_analyzer = 1; diff --git a/tests/queries/0_stateless/02567_native_type_conversions.reference b/tests/queries/0_stateless/02567_native_type_conversions.reference new file mode 100644 index 00000000000..5c223870c11 --- /dev/null +++ b/tests/queries/0_stateless/02567_native_type_conversions.reference @@ -0,0 +1,3 @@ +1 +42 +1 diff --git a/tests/queries/0_stateless/02567_native_type_conversions.sh b/tests/queries/0_stateless/02567_native_type_conversions.sh new file mode 100755 index 00000000000..976c42f07c1 --- /dev/null +++ b/tests/queries/0_stateless/02567_native_type_conversions.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select 42::UInt8 as x format Native" | $CLICKHOUSE_LOCAL --structure="x UInt64" --input-format="Native" -q "select * from table" --input_format_native_allow_types_conversion=0 2>&1 | grep "TYPE_MISMATCH" -c + +$CLICKHOUSE_LOCAL -q "select 42::UInt8 as x format Native" | $CLICKHOUSE_LOCAL --structure="x UInt64" --input-format="Native" -q "select * from table" --input_format_native_allow_types_conversion=1 + +$CLICKHOUSE_LOCAL -q "select 'Hello' as x format Native" | $CLICKHOUSE_LOCAL --structure="x UInt64" --input-format="Native" -q "select * from table" --input_format_native_allow_types_conversion=1 2>&1 | grep 'while converting column "x" from type String to type UInt64' -c + diff --git a/tests/queries/0_stateless/02572_materialized_views_ignore_errors.reference b/tests/queries/0_stateless/02572_materialized_views_ignore_errors.reference new file mode 100644 index 00000000000..fc2e6b78122 --- /dev/null +++ b/tests/queries/0_stateless/02572_materialized_views_ignore_errors.reference @@ -0,0 +1,28 @@ +-- { echoOn } +select * from data_02572 order by key; +insert into data_02572 settings materialized_views_ignore_errors=1 values (2); +select * from data_02572 order by key; +2 +-- check system.query_views_log +system flush logs; +-- lower(status) to pass through clickhouse-test "exception" check +select lower(status::String), errorCodeToName(exception_code) +from system.query_views_log where + view_name = concatWithSeparator('.', currentDatabase(), 'push_to_proxy_mv_02572') and + view_target = concatWithSeparator('.', currentDatabase(), 'proxy_02572') + order by event_date, event_time +; +exceptionwhileprocessing UNKNOWN_TABLE +-- materialized_views_ignore_errors=0 +insert into data_02572 values (1); -- { serverError UNKNOWN_TABLE } +select * from data_02572 order by key; +1 +2 +create table receiver_02572 as data_02572; +insert into data_02572 values (3); +select * from data_02572 order by key; +1 +2 +3 +select * from receiver_02572 order by key; +3 diff --git a/tests/queries/0_stateless/02572_materialized_views_ignore_errors.sql b/tests/queries/0_stateless/02572_materialized_views_ignore_errors.sql new file mode 100644 index 00000000000..2d1f824b9b1 --- /dev/null +++ b/tests/queries/0_stateless/02572_materialized_views_ignore_errors.sql @@ -0,0 +1,40 @@ +set prefer_localhost_replica=1; + +drop table if exists data_02572; +drop table if exists proxy_02572; +drop table if exists push_to_proxy_mv_02572; +drop table if exists receiver_02572; + +create table data_02572 (key Int) engine=Memory(); + +create table proxy_02572 (key Int) engine=Distributed('test_shard_localhost', currentDatabase(), 'receiver_02572'); +-- ensure that insert fails +insert into proxy_02572 values (1); -- { serverError UNKNOWN_TABLE } + +-- proxy data with MV +create materialized view push_to_proxy_mv_02572 to proxy_02572 as select * from data_02572; + +-- { echoOn } +select * from data_02572 order by key; + +insert into data_02572 settings materialized_views_ignore_errors=1 values (2); +select * from data_02572 order by key; +-- check system.query_views_log +system flush logs; +-- lower(status) to pass through clickhouse-test "exception" check +select lower(status::String), errorCodeToName(exception_code) +from system.query_views_log where + view_name = concatWithSeparator('.', currentDatabase(), 'push_to_proxy_mv_02572') and + view_target = concatWithSeparator('.', currentDatabase(), 'proxy_02572') + order by event_date, event_time +; + +-- materialized_views_ignore_errors=0 +insert into data_02572 values (1); -- { serverError UNKNOWN_TABLE } +select * from data_02572 order by key; + +create table receiver_02572 as data_02572; + +insert into data_02572 values (3); +select * from data_02572 order by key; +select * from receiver_02572 order by key; diff --git a/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.reference b/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.reference new file mode 100644 index 00000000000..d5446e756a3 --- /dev/null +++ b/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.reference @@ -0,0 +1,2 @@ +10 querystart OK +10 queryfinish OK diff --git a/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.sql b/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.sql new file mode 100644 index 00000000000..9568bc7af1a --- /dev/null +++ b/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.sql @@ -0,0 +1,26 @@ +-- Tags: no-parallel, no-replicated-database +-- Tag no-parallel: due to attaching to system.query_log +-- Tag no-replicated-database: Replicated database will has extra queries + +-- Attach MV to system.query_log and check that writing query_log will not fail + +set log_queries=1; + +drop table if exists log_proxy_02572; +drop table if exists push_to_logs_proxy_mv_02572; + +create table log_proxy_02572 as system.query_log engine=Distributed('test_shard_localhost', currentDatabase(), 'receiver_02572'); +create materialized view push_to_logs_proxy_mv_02572 to log_proxy_02572 as select * from system.query_log; + +select 1 format Null; +system flush logs; +system flush logs; + +drop table log_proxy_02572; +drop table push_to_logs_proxy_mv_02572; + +system flush logs; +-- lower() to pass through clickhouse-test "exception" check +select count(), lower(type::String), errorCodeToName(exception_code) + from system.query_log + where current_database = currentDatabase() group by 2, 3; diff --git a/tests/queries/0_stateless/02579_fill_empty_chunk.sql b/tests/queries/0_stateless/02579_fill_empty_chunk.sql index 14ae322d8c9..cbdbd7a9f84 100644 --- a/tests/queries/0_stateless/02579_fill_empty_chunk.sql +++ b/tests/queries/0_stateless/02579_fill_empty_chunk.sql @@ -1,5 +1,7 @@ -- this SELECT produces empty chunk in FillingTransform +SET enable_positional_arguments = 0; + SELECT 2 AS x, arrayJoin([NULL, NULL, NULL]) diff --git a/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.reference b/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.reference new file mode 100644 index 00000000000..492b12dba56 --- /dev/null +++ b/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.reference @@ -0,0 +1,14 @@ +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 diff --git a/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.sh b/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.sh new file mode 100755 index 00000000000..89b5147f026 --- /dev/null +++ b/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='none'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table" +$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='lz4'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table" +$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='snappy'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table" +$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='zstd'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table" +$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='brotli'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table" +$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='gzip'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table" + +$CLICKHOUSE_LOCAL -q "select * from numbers(10) format ORC settings output_format_orc_compression_method='none'" | $CLICKHOUSE_LOCAL --input-format=ORC -q "select count() from table" +$CLICKHOUSE_LOCAL -q "select * from numbers(10) format ORC settings output_format_orc_compression_method='lz4'" | $CLICKHOUSE_LOCAL --input-format=ORC -q "select count() from table" +$CLICKHOUSE_LOCAL -q "select * from numbers(10) format ORC settings output_format_orc_compression_method='zstd'" | $CLICKHOUSE_LOCAL --input-format=ORC -q "select count() from table" +$CLICKHOUSE_LOCAL -q "select * from numbers(10) format ORC settings output_format_orc_compression_method='zlib'" | $CLICKHOUSE_LOCAL --input-format=ORC -q "select count() from table" +$CLICKHOUSE_LOCAL -q "select * from numbers(10) format ORC settings output_format_orc_compression_method='snappy'" | $CLICKHOUSE_LOCAL --input-format=ORC -q "select count() from table" + + +$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Arrow settings output_format_arrow_compression_method='none'" | $CLICKHOUSE_LOCAL --input-format=Arrow -q "select count() from table" +$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Arrow settings output_format_arrow_compression_method='lz4_frame'" | $CLICKHOUSE_LOCAL --input-format=Arrow -q "select count() from table" +$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Arrow settings output_format_arrow_compression_method='zstd'" | $CLICKHOUSE_LOCAL --input-format=Arrow -q "select count() from table" + diff --git a/tests/queries/0_stateless/02587_csv_big_numbers_inference.reference b/tests/queries/0_stateless/02587_csv_big_numbers_inference.reference new file mode 100644 index 00000000000..5b38606d1fd --- /dev/null +++ b/tests/queries/0_stateless/02587_csv_big_numbers_inference.reference @@ -0,0 +1,4 @@ +c1 Nullable(Float64) +100000000000000000000 +c1 Nullable(Float64) +-100000000000000000000 diff --git a/tests/queries/0_stateless/02587_csv_big_numbers_inference.sql b/tests/queries/0_stateless/02587_csv_big_numbers_inference.sql new file mode 100644 index 00000000000..45a93034524 --- /dev/null +++ b/tests/queries/0_stateless/02587_csv_big_numbers_inference.sql @@ -0,0 +1,5 @@ +desc format('CSV', '100000000000000000000'); +select * from format('CSV', '100000000000000000000'); +desc format('CSV', '-100000000000000000000'); +select * from format('CSV', '-100000000000000000000'); + diff --git a/tests/queries/0_stateless/02588_parquet_bug.reference b/tests/queries/0_stateless/02588_parquet_bug.reference new file mode 100644 index 00000000000..44de58ae5c3 --- /dev/null +++ b/tests/queries/0_stateless/02588_parquet_bug.reference @@ -0,0 +1,3 @@ +cta 224.0.90.10 1670964058771367936 64066044 NYSE cqs_pillar quote \N \N \N 82.92 1 R 82.97 2 R +1670964058771367936 +1670946478544048640 ARCA cqs_pillar diff --git a/tests/queries/0_stateless/02588_parquet_bug.sh b/tests/queries/0_stateless/02588_parquet_bug.sh new file mode 100755 index 00000000000..f7e4ecf5e4c --- /dev/null +++ b/tests/queries/0_stateless/02588_parquet_bug.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_parquet/02588_data.parquet') where exchange_ts = 1670964058771367936" +$CLICKHOUSE_LOCAL -q "select exchange_ts from file('$CURDIR/data_parquet/02588_data.parquet') where exchange_ts = 1670964058771367936" +$CLICKHOUSE_LOCAL -q "select exchange_ts, market, product from file('$CURDIR/data_parquet/02588_data.parquet') where exchange_ts = 1670946478544048640" + diff --git a/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.sql b/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.sql index 78cb423216b..73baad11634 100644 --- a/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.sql +++ b/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.sql @@ -1,4 +1,5 @@ SET allow_experimental_analyzer = 1; +SET single_join_prefer_left_table = 0; DROP TABLE IF EXISTS test_table; CREATE TABLE test_table diff --git a/tests/queries/0_stateless/02675_sparse_columns_clear_column.reference b/tests/queries/0_stateless/02675_sparse_columns_clear_column.reference new file mode 100644 index 00000000000..56fa4a9ebea --- /dev/null +++ b/tests/queries/0_stateless/02675_sparse_columns_clear_column.reference @@ -0,0 +1,6 @@ +arr Default +v Sparse +arr Default +arr Default +v Sparse +0 [] diff --git a/tests/queries/0_stateless/02675_sparse_columns_clear_column.sql b/tests/queries/0_stateless/02675_sparse_columns_clear_column.sql new file mode 100644 index 00000000000..781030ef7b4 --- /dev/null +++ b/tests/queries/0_stateless/02675_sparse_columns_clear_column.sql @@ -0,0 +1,34 @@ +DROP TABLE IF EXISTS t_sparse_columns_clear; + +CREATE TABLE t_sparse_columns_clear (arr Array(UInt64), v UInt64) +ENGINE = MergeTree ORDER BY tuple() +SETTINGS + ratio_of_defaults_for_sparse_serialization = 0.9, + min_bytes_for_wide_part=0; + +INSERT INTO t_sparse_columns_clear SELECT [number], 0 FROM numbers(1000); + +SELECT column, serialization_kind FROM system.parts_columns +WHERE database = currentDatabase() AND table = 't_sparse_columns_clear' AND active +ORDER BY column; + +SET mutations_sync = 2; +SET alter_sync = 2; + +ALTER TABLE t_sparse_columns_clear CLEAR COLUMN v; + +SELECT column, serialization_kind FROM system.parts_columns +WHERE database = currentDatabase() AND table = 't_sparse_columns_clear' AND active +ORDER BY column; + +OPTIMIZE TABLE t_sparse_columns_clear FINAL; + +SELECT column, serialization_kind FROM system.parts_columns +WHERE database = currentDatabase() AND table = 't_sparse_columns_clear' AND active +ORDER BY column; + +DROP TABLE t_sparse_columns_clear SYNC; + +SYSTEM FLUSH LOGS; + +SELECT count(), groupArray(message) FROM system.text_log WHERE logger_name LIKE '%' || currentDatabase() || '.t_sparse_columns_clear' || '%' AND level = 'Error'; diff --git a/tests/queries/0_stateless/02676_analyzer_limit_offset.reference b/tests/queries/0_stateless/02676_analyzer_limit_offset.reference new file mode 100644 index 00000000000..96483268d43 --- /dev/null +++ b/tests/queries/0_stateless/02676_analyzer_limit_offset.reference @@ -0,0 +1,63 @@ +0 +1 +2 +3 +4 +15 +15 +16 +16 +17 +30 +30 +31 +31 +32 +102 +103 +104 +105 +105 +106 +107 +108 +109 +105 +106 +107 +108 +109 +60 +60 +61 +61 +62 +62 +63 +63 +64 +64 +60 +35 +35 +36 +36 +37 +37 +38 +38 +39 +39 +105 +106 +107 +108 +109 +12 +13 +13 +14 +14 +15 +15 +16 diff --git a/tests/queries/0_stateless/02676_analyzer_limit_offset.sql b/tests/queries/0_stateless/02676_analyzer_limit_offset.sql new file mode 100644 index 00000000000..39c6b85f088 --- /dev/null +++ b/tests/queries/0_stateless/02676_analyzer_limit_offset.sql @@ -0,0 +1,34 @@ +set allow_experimental_analyzer=1; + +DROP TABLE IF EXISTS test; +CREATE TABLE test (i UInt64) Engine = MergeTree() order by i; +INSERT INTO test SELECT number FROM numbers(100); +INSERT INTO test SELECT number FROM numbers(10,100); +OPTIMIZE TABLE test FINAL; + +-- Only set limit +SET limit = 5; +SELECT * FROM test; -- 5 rows +SELECT * FROM test OFFSET 20; -- 5 rows +SELECT * FROM (SELECT i FROM test LIMIT 10 OFFSET 50) TMP; -- 5 rows +SELECT * FROM test LIMIT 4 OFFSET 192; -- 4 rows +SELECT * FROM test LIMIT 10 OFFSET 195; -- 5 rows + +-- Only set offset +SET limit = 0; +SET offset = 195; +SELECT * FROM test; -- 5 rows +SELECT * FROM test OFFSET 20; -- no result +SELECT * FROM test LIMIT 100; -- no result +SET offset = 10; +SELECT * FROM test LIMIT 20 OFFSET 100; -- 10 rows +SELECT * FROM test LIMIT 11 OFFSET 100; -- 1 rows + +-- offset and limit together +SET limit = 10; +SELECT * FROM test LIMIT 50 OFFSET 50; -- 10 rows +SELECT * FROM test LIMIT 50 OFFSET 190; -- 0 rows +SELECT * FROM test LIMIT 50 OFFSET 185; -- 5 rows +SELECT * FROM test LIMIT 18 OFFSET 5; -- 8 rows + +DROP TABLE test; diff --git a/tests/queries/0_stateless/02677_decode_url_component.reference b/tests/queries/0_stateless/02677_decode_url_component.reference new file mode 100644 index 00000000000..5f88856dc1c --- /dev/null +++ b/tests/queries/0_stateless/02677_decode_url_component.reference @@ -0,0 +1,2 @@ +%D0%BA%D0%BB%D0%B8%D0%BA%D1%85%D0%B0%D1%83%D1%81 1 +1 diff --git a/tests/queries/0_stateless/02677_decode_url_component.sql b/tests/queries/0_stateless/02677_decode_url_component.sql new file mode 100644 index 00000000000..68345b5de16 --- /dev/null +++ b/tests/queries/0_stateless/02677_decode_url_component.sql @@ -0,0 +1,5 @@ +SELECT + encodeURLComponent('кликхаус') AS encoded, + decodeURLComponent(encoded) = 'кликхаус' AS expected_EQ; + +SELECT DISTINCT decodeURLComponent(encodeURLComponent(randomString(100) AS x)) = x FROM numbers(100000); diff --git a/tests/queries/0_stateless/25340_grace_hash_limit_race.reference b/tests/queries/0_stateless/02677_grace_hash_limit_race.reference similarity index 100% rename from tests/queries/0_stateless/25340_grace_hash_limit_race.reference rename to tests/queries/0_stateless/02677_grace_hash_limit_race.reference diff --git a/tests/queries/0_stateless/25340_grace_hash_limit_race.sql b/tests/queries/0_stateless/02677_grace_hash_limit_race.sql similarity index 100% rename from tests/queries/0_stateless/25340_grace_hash_limit_race.sql rename to tests/queries/0_stateless/02677_grace_hash_limit_race.sql diff --git a/tests/integration/test_async_insert_memory/__init__.py b/tests/queries/0_stateless/02678_explain_pipeline_graph_with_projection.reference similarity index 100% rename from tests/integration/test_async_insert_memory/__init__.py rename to tests/queries/0_stateless/02678_explain_pipeline_graph_with_projection.reference diff --git a/tests/queries/0_stateless/02678_explain_pipeline_graph_with_projection.sql b/tests/queries/0_stateless/02678_explain_pipeline_graph_with_projection.sql new file mode 100644 index 00000000000..e8b7405d602 --- /dev/null +++ b/tests/queries/0_stateless/02678_explain_pipeline_graph_with_projection.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS t1; +CREATE TABLE t1(ID UInt64, name String) engine=MergeTree order by ID; + +insert into t1(ID, name) values (1, 'abc'), (2, 'bbb'); + +-- The returned node order is uncertain +explain pipeline graph=1 select count(ID) from t1 FORMAT Null; +explain pipeline graph=1 select sum(1) from t1 FORMAT Null; +explain pipeline graph=1 select min(ID) from t1 FORMAT Null; +explain pipeline graph=1 select max(ID) from t1 FORMAT Null; + +DROP TABLE t1; diff --git a/tests/queries/0_stateless/02521_cannot-find-column-in-projection.reference b/tests/queries/0_stateless/02679_query_parameters_dangling_pointer.reference similarity index 100% rename from tests/queries/0_stateless/02521_cannot-find-column-in-projection.reference rename to tests/queries/0_stateless/02679_query_parameters_dangling_pointer.reference diff --git a/tests/queries/0_stateless/02679_query_parameters_dangling_pointer.sql b/tests/queries/0_stateless/02679_query_parameters_dangling_pointer.sql new file mode 100644 index 00000000000..7705b860e8e --- /dev/null +++ b/tests/queries/0_stateless/02679_query_parameters_dangling_pointer.sql @@ -0,0 +1,4 @@ +-- There is no use-after-free in the following query: + +SET param_o = 'a'; +CREATE TABLE test.xxx (a Int64) ENGINE=MergeTree ORDER BY ({o:String}); -- { serverError 44 } diff --git a/tests/queries/0_stateless/data_parquet/02588_data.parquet b/tests/queries/0_stateless/data_parquet/02588_data.parquet new file mode 100644 index 00000000000..e00b869233d Binary files /dev/null and b/tests/queries/0_stateless/data_parquet/02588_data.parquet differ diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 1ad7432a5bf..f1bba4dc2fc 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1,4 +1,4 @@ -personal_ws-1.1 en 484 +personal_ws-1.1 en 543 AArch ACLs AMQP @@ -25,6 +25,7 @@ CentOS ClickHouse ClickHouse's CodeBlock +CodeLLDB Config ConnectionDetails Contrib @@ -56,9 +57,8 @@ Hostname IPv IntN Integrations -invariants -JSONAsString JSONAsObject +JSONAsString JSONColumns JSONColumnsWithMetadata JSONCompact @@ -82,6 +82,7 @@ Jemalloc Jepsen KDevelop LGPL +LLDB LOCALTIME LOCALTIMESTAMP LibFuzzer @@ -104,10 +105,11 @@ NULLIF NVME NYPD NuRaft -ObjectId -Ok OLAP OLTP +ObjectId +Ok +OpenSSL OpenSUSE OpenStack OpenTelemetry @@ -128,10 +130,10 @@ PrettySpaceNoEscapes PrettySpaceNoEscapesMonoBlock Protobuf ProtobufSingle +QEMU QTCreator QueryCacheHits QueryCacheMisses -QEMU RBAC RawBLOB RedHat @@ -178,14 +180,14 @@ Valgrind Vectorized VirtualBox Werror +WithNamesAndTypes Woboq WriteBuffer WriteBuffers -WithNamesAndTypes XCode YAML -Yasm YYYY +Yasm Zipkin ZooKeeper ZooKeeper's @@ -200,6 +202,7 @@ autostart avro avx aws +backend backoff backticks benchmarking @@ -309,6 +312,7 @@ instantiation integrational integrations interserver +invariants jdbc jemalloc json @@ -333,8 +337,8 @@ jsonstringseachrowwithprogress kafka kafkacat konsole -latencies laion +latencies lexicographically libFuzzer libc @@ -373,9 +377,10 @@ mutex mysql mysqldump mysqljs +natively noop -nullable nullability +nullable num obfuscator odbc @@ -532,6 +537,7 @@ xcode xml xz zLib +zLinux zkcopy zlib znodes diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 34bc3f646fc..c2d9781177d 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,10 +1,13 @@ +v23.2.4.12-stable 2023-03-10 v23.2.3.17-stable 2023-03-06 v23.2.2.20-stable 2023-03-01 v23.2.1.2537-stable 2023-02-23 +v23.1.5.24-stable 2023-03-10 v23.1.4.58-stable 2023-03-01 v23.1.3.5-stable 2023-02-03 v23.1.2.9-stable 2023-01-29 v23.1.1.3077-stable 2023-01-25 +v22.12.5.34-stable 2023-03-10 v22.12.4.76-stable 2023-03-01 v22.12.3.5-stable 2023-01-10 v22.12.2.25-stable 2023-01-06 @@ -29,6 +32,7 @@ v22.9.4.32-stable 2022-10-26 v22.9.3.18-stable 2022-09-30 v22.9.2.7-stable 2022-09-23 v22.9.1.2603-stable 2022-09-22 +v22.8.15.23-lts 2023-03-10 v22.8.14.53-lts 2023-02-27 v22.8.13.20-lts 2023-01-29 v22.8.12.45-lts 2023-01-10 diff --git a/utils/self-extracting-executable/decompressor.cpp b/utils/self-extracting-executable/decompressor.cpp index 37fbd043814..d41b9b1ebe1 100644 --- a/utils/self-extracting-executable/decompressor.cpp +++ b/utils/self-extracting-executable/decompressor.cpp @@ -168,6 +168,10 @@ int decompress(char * input, char * output, off_t start, off_t end, size_t max_n return 0; } +bool isSudo() +{ + return geteuid() == 0; +} /// Read data about files and decomrpess them. int decompressFiles(int input_fd, char * path, char * name, bool & have_compressed_analoge, bool & has_exec, char * decompressed_suffix, uint64_t * decompressed_umask) @@ -220,6 +224,8 @@ int decompressFiles(int input_fd, char * path, char * name, bool & have_compress return 1; } + bool is_sudo = isSudo(); + FileData file_info; /// Decompress files with appropriate file names for (size_t i = 0; i < le64toh(metadata.number_of_files); ++i) @@ -319,6 +325,9 @@ int decompressFiles(int input_fd, char * path, char * name, bool & have_compress perror("fsync"); if (0 != close(output_fd)) perror("close"); + + if (is_sudo) + chown(file_name, info_in.st_uid, info_in.st_gid); } if (0 != munmap(input, info_in.st_size)) @@ -414,6 +423,13 @@ int main(int/* argc*/, char* argv[]) else name = file_path; + struct stat input_info; + if (0 != stat(self, &input_info)) + { + perror("stat"); + return 1; + } + #if !defined(OS_DARWIN) && !defined(OS_FREEBSD) /// get inode of this executable uint64_t inode = getInode(self); @@ -441,13 +457,6 @@ int main(int/* argc*/, char* argv[]) return 1; } - struct stat input_info; - if (0 != stat(self, &input_info)) - { - perror("stat"); - return 1; - } - /// inconsistency in WSL1 Ubuntu - inode reported in /proc/self/maps is a 64bit to /// 32bit conversion of input_info.st_ino if (input_info.st_ino & 0xFFFFFFFF00000000 && !(inode & 0xFFFFFFFF00000000)) @@ -532,6 +541,9 @@ int main(int/* argc*/, char* argv[]) return 1; } + if (isSudo()) + chown(static_cast(self), input_info.st_uid, input_info.st_gid); + if (has_exec) { #if !defined(OS_DARWIN) && !defined(OS_FREEBSD)