diff --git a/CHANGELOG.md b/CHANGELOG.md index d60b53a1f22..ddba799dbd0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -61,7 +61,6 @@ * `SET param_x` query no longer requires manual string serialization for the value of the parameter. For example, query `SET param_a = '[\'a\', \'b\']'` can now be written like `SET param_a = ['a', 'b']`. [#41874](https://github.com/ClickHouse/ClickHouse/pull/41874) ([Nikolay Degterinsky](https://github.com/evillique)). * Show read rows in the progress indication while reading from STDIN from client. Closes [#43423](https://github.com/ClickHouse/ClickHouse/issues/43423). [#43442](https://github.com/ClickHouse/ClickHouse/pull/43442) ([Kseniia Sumarokova](https://github.com/kssenii)). * Show progress bar while reading from s3 table function / engine. [#43454](https://github.com/ClickHouse/ClickHouse/pull/43454) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Progress bar will show both read and written rows. [#43496](https://github.com/ClickHouse/ClickHouse/pull/43496) ([Ilya Yatsishin](https://github.com/qoega)). * `filesystemAvailable` and related functions support one optional argument with disk name, and change `filesystemFree` to `filesystemUnreserved`. Closes [#35076](https://github.com/ClickHouse/ClickHouse/issues/35076). [#42064](https://github.com/ClickHouse/ClickHouse/pull/42064) ([flynn](https://github.com/ucasfl)). * Integration with LDAP: increased the default value of search_limit to 256, and added LDAP server config option to change that to an arbitrary value. Closes: [#42276](https://github.com/ClickHouse/ClickHouse/issues/42276). [#42461](https://github.com/ClickHouse/ClickHouse/pull/42461) ([Vasily Nemkov](https://github.com/Enmk)). * Allow the removal of sensitive information (see the `query_masking_rules` in the configuration file) from the exception messages as well. Resolves [#41418](https://github.com/ClickHouse/ClickHouse/issues/41418). [#42940](https://github.com/ClickHouse/ClickHouse/pull/42940) ([filimonov](https://github.com/filimonov)). diff --git a/SECURITY.md b/SECURITY.md index a4f431d7552..3dcdc5db009 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -13,9 +13,10 @@ The following versions of ClickHouse server are currently being supported with s | Version | Supported | |:-|:-| +| 22.12 | ✔️ | | 22.11 | ✔️ | | 22.10 | ✔️ | -| 22.9 | ✔️ | +| 22.9 | ❌ | | 22.8 | ✔️ | | 22.7 | ❌ | | 22.6 | ❌ | diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index d06d3918612..87b11c46f45 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54469) +SET(VERSION_REVISION 54470) SET(VERSION_MAJOR 22) -SET(VERSION_MINOR 12) +SET(VERSION_MINOR 13) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 0d211ed19849fe44b0e43fdebe2c15d76d560a77) -SET(VERSION_DESCRIBE v22.12.1.1-testing) -SET(VERSION_STRING 22.12.1.1) +SET(VERSION_GITHASH 688e488e930c83eefeac4f87c4cc029cc5b231e3) +SET(VERSION_DESCRIBE v22.13.1.1-testing) +SET(VERSION_STRING 22.13.1.1) # end of autochange diff --git a/cmake/darwin/toolchain-aarch64.cmake b/cmake/darwin/toolchain-aarch64.cmake index 81398111495..569b02bb642 100644 --- a/cmake/darwin/toolchain-aarch64.cmake +++ b/cmake/darwin/toolchain-aarch64.cmake @@ -1,3 +1,6 @@ +# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file. +include_guard(GLOBAL) + set (CMAKE_SYSTEM_NAME "Darwin") set (CMAKE_SYSTEM_PROCESSOR "aarch64") set (CMAKE_C_COMPILER_TARGET "aarch64-apple-darwin") diff --git a/cmake/darwin/toolchain-x86_64.cmake b/cmake/darwin/toolchain-x86_64.cmake index 0be81dfa753..c4527d2fc0d 100644 --- a/cmake/darwin/toolchain-x86_64.cmake +++ b/cmake/darwin/toolchain-x86_64.cmake @@ -1,3 +1,6 @@ +# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file. +include_guard(GLOBAL) + set (CMAKE_SYSTEM_NAME "Darwin") set (CMAKE_SYSTEM_PROCESSOR "x86_64") set (CMAKE_C_COMPILER_TARGET "x86_64-apple-darwin") diff --git a/cmake/freebsd/toolchain-aarch64.cmake b/cmake/freebsd/toolchain-aarch64.cmake index eeec635cc06..8a8da00f3be 100644 --- a/cmake/freebsd/toolchain-aarch64.cmake +++ b/cmake/freebsd/toolchain-aarch64.cmake @@ -1,3 +1,6 @@ +# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file. +include_guard(GLOBAL) + set (CMAKE_SYSTEM_NAME "FreeBSD") set (CMAKE_SYSTEM_PROCESSOR "aarch64") set (CMAKE_C_COMPILER_TARGET "aarch64-unknown-freebsd12") diff --git a/cmake/freebsd/toolchain-ppc64le.cmake b/cmake/freebsd/toolchain-ppc64le.cmake index d6007befb67..c3f6594204d 100644 --- a/cmake/freebsd/toolchain-ppc64le.cmake +++ b/cmake/freebsd/toolchain-ppc64le.cmake @@ -1,3 +1,6 @@ +# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file. +include_guard(GLOBAL) + set (CMAKE_SYSTEM_NAME "FreeBSD") set (CMAKE_SYSTEM_PROCESSOR "ppc64le") set (CMAKE_C_COMPILER_TARGET "powerpc64le-unknown-freebsd13") diff --git a/cmake/freebsd/toolchain-x86_64.cmake b/cmake/freebsd/toolchain-x86_64.cmake index 80cbeba549f..460de6a7d39 100644 --- a/cmake/freebsd/toolchain-x86_64.cmake +++ b/cmake/freebsd/toolchain-x86_64.cmake @@ -1,3 +1,6 @@ +# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file. +include_guard(GLOBAL) + set (CMAKE_SYSTEM_NAME "FreeBSD") set (CMAKE_SYSTEM_PROCESSOR "x86_64") set (CMAKE_C_COMPILER_TARGET "x86_64-pc-freebsd11") diff --git a/cmake/linux/toolchain-aarch64.cmake b/cmake/linux/toolchain-aarch64.cmake index 5db71aecf9a..2dedef8859f 100644 --- a/cmake/linux/toolchain-aarch64.cmake +++ b/cmake/linux/toolchain-aarch64.cmake @@ -1,3 +1,6 @@ +# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file. +include_guard(GLOBAL) + set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) set (CMAKE_SYSTEM_NAME "Linux") diff --git a/cmake/linux/toolchain-ppc64le.cmake b/cmake/linux/toolchain-ppc64le.cmake index 345de208234..8eb2aab34e9 100644 --- a/cmake/linux/toolchain-ppc64le.cmake +++ b/cmake/linux/toolchain-ppc64le.cmake @@ -1,3 +1,6 @@ +# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file. +include_guard(GLOBAL) + set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) set (CMAKE_SYSTEM_NAME "Linux") diff --git a/cmake/linux/toolchain-riscv64.cmake b/cmake/linux/toolchain-riscv64.cmake index 02c3d0c97fc..49a036c2972 100644 --- a/cmake/linux/toolchain-riscv64.cmake +++ b/cmake/linux/toolchain-riscv64.cmake @@ -1,3 +1,6 @@ +# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file. +include_guard(GLOBAL) + set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) set (CMAKE_SYSTEM_NAME "Linux") diff --git a/cmake/linux/toolchain-x86_64-musl.cmake b/cmake/linux/toolchain-x86_64-musl.cmake index 3eb2077db2b..bc327e5ac25 100644 --- a/cmake/linux/toolchain-x86_64-musl.cmake +++ b/cmake/linux/toolchain-x86_64-musl.cmake @@ -1,3 +1,6 @@ +# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file. +include_guard(GLOBAL) + set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) set (CMAKE_SYSTEM_NAME "Linux") diff --git a/cmake/linux/toolchain-x86_64.cmake b/cmake/linux/toolchain-x86_64.cmake index bdcfcfa013a..e73d779284a 100644 --- a/cmake/linux/toolchain-x86_64.cmake +++ b/cmake/linux/toolchain-x86_64.cmake @@ -1,18 +1,15 @@ -if (_CLICKHOUSE_TOOLCHAIN_FILE_LOADED) - # During first run of cmake the toolchain file will be loaded twice, - # - /usr/share/cmake-3.23/Modules/CMakeDetermineSystem.cmake - # - /bld/CMakeFiles/3.23.2/CMakeSystem.cmake - # - # But once you already have non-empty cmake cache it will be loaded only - # once: - # - /bld/CMakeFiles/3.23.2/CMakeSystem.cmake - # - # This has no harm except for double load of toolchain will add - # --gcc-toolchain multiple times that will not allow ccache to reuse the - # cache. - return() -endif() -set (_CLICKHOUSE_TOOLCHAIN_FILE_LOADED ON) +# During first run of cmake the toolchain file will be loaded twice, +# - /usr/share/cmake-3.23/Modules/CMakeDetermineSystem.cmake +# - /bld/CMakeFiles/3.23.2/CMakeSystem.cmake +# +# But once you already have non-empty cmake cache it will be loaded only +# once: +# - /bld/CMakeFiles/3.23.2/CMakeSystem.cmake +# +# This has no harm except for double load of toolchain will add +# --gcc-toolchain multiple times that will not allow ccache to reuse the +# cache. +include_guard(GLOBAL) set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 305fc279414..22d6282d71c 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="22.11.2.30" +ARG VERSION="22.12.1.1752" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index f1c4dd097aa..3135ec508de 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -21,7 +21,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="22.11.2.30" +ARG VERSION="22.12.1.1752" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 136428c8ddd..01e0f5b4897 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -127,23 +127,24 @@ EOL function stop() { - local max_tries="" - if [ -n "$1" ] - then - max_tries="--max-tries $1" - fi - local pid # Preserve the pid, since the server can hung after the PID will be deleted. pid="$(cat /var/run/clickhouse-server/clickhouse-server.pid)" clickhouse stop $max_tries --do-not-kill && return + if [ -n "$1" ] + then + # temporarily disable it in BC check + clickhouse stop --force + return + fi + # We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces. kill -TERM "$(pidof gdb)" ||: sleep 5 echo "thread apply all backtrace (on stop)" >> /test_output/gdb.log - gdb -batch -ex 'thread apply all backtrace' -p "$pid" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log + timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$pid" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log clickhouse stop --force } @@ -431,7 +432,7 @@ else clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" - stop 180 + stop 1 mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.stress.log # Start new server diff --git a/docs/changelogs/v22.12.1.1752-stable.md b/docs/changelogs/v22.12.1.1752-stable.md new file mode 100644 index 00000000000..9b3d2379277 --- /dev/null +++ b/docs/changelogs/v22.12.1.1752-stable.md @@ -0,0 +1,320 @@ +--- +sidebar_position: 1 +sidebar_label: 2022 +--- + +# 2022 Changelog + +### ClickHouse release v22.12.1.1752-stable (688e488e930) FIXME as compared to v22.11.1.1360-stable (0d211ed1984) + +#### Backward Incompatible Change +* Fixed backward incompatibility in (de)serialization of states of `min`, `max`, `any*`, `argMin`, `argMax` aggregate functions with `String` argument. The incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/41431 and affects 22.9, 22.10 and 22.11 branches (fixed since 22.9.6, 22.10.4 and 22.11.2 correspondingly). Some minor releases of 22.3, 22.7 and 22.8 branches are also affected: 22.3.13...22.3.14 (fixed since 22.3.15), 22.8.6...22.8.9 (fixed since 22.8.10), 22.7.6 and newer (will not be fixed in 22.7, we recommend to upgrade from 22.7.* to 22.8.10 or newer). This release note does not concern users that have never used affected versions. Incompatible versions append extra `'\0'` to strings when reading states of the aggregate functions mentioned above. For example, if an older version saved state of `anyState('foobar')` to `state_column` then incompatible version will print `'foobar\0'` on `anyMerge(state_column)`. Also incompatible versions write states of the aggregate functions without trailing `'\0'`. Newer versions (that have the fix) can correctly read data written by all versions including incompatible versions, except one corner case. If an incompatible version saved a state with a string that actually ends with null character, then newer version will trim trailing `'\0'` when reading state of affected aggregate function. For example, if an incompatible version saved state of `anyState('abrac\0dabra\0')` to `state_column` then newer versions will print `'abrac\0dabra'` on `anyMerge(state_column)`. The issue also affects distributed queries when an incompatible version works in a cluster together with older or newer versions. [#43038](https://github.com/ClickHouse/ClickHouse/pull/43038) ([Raúl Marín](https://github.com/Algunenano)). + +#### New Feature +* Add "grace_hash" join_algorithm. [#38191](https://github.com/ClickHouse/ClickHouse/pull/38191) ([BigRedEye](https://github.com/BigRedEye)). +* Merging on initiator now uses the same memory bound approach as merging of local aggregation results if `enable_memory_bound_merging_of_aggregation_results` is set. [#40879](https://github.com/ClickHouse/ClickHouse/pull/40879) ([Nikita Taranov](https://github.com/nickitat)). +* Add BSONEachRow input/output format. In this format, ClickHouse formats/parses each row as a separated BSON Document and each column is formatted/parsed as a single BSON field with column name as a key. [#42033](https://github.com/ClickHouse/ClickHouse/pull/42033) ([mark-polokhov](https://github.com/mark-polokhov)). +* close: [#37631](https://github.com/ClickHouse/ClickHouse/issues/37631). [#42265](https://github.com/ClickHouse/ClickHouse/pull/42265) ([刘陶峰](https://github.com/taofengliu)). +* Added `multiplyDecimal` and `divideDecimal` functions for decimal operations with fixed precision. [#42438](https://github.com/ClickHouse/ClickHouse/pull/42438) ([Andrey Zvonov](https://github.com/zvonand)). +* Added `system.moves` table with list of currently moving parts. [#42660](https://github.com/ClickHouse/ClickHouse/pull/42660) ([Sergei Trifonov](https://github.com/serxa)). +* Keeper feature: add support for embedded Prometheus endpoint. [#43087](https://github.com/ClickHouse/ClickHouse/pull/43087) ([Antonio Andelic](https://github.com/antonio2368)). +* Added age function to calculate difference between two dates or dates with time values expressed as number of full units. Close [#41115](https://github.com/ClickHouse/ClickHouse/issues/41115). [#43123](https://github.com/ClickHouse/ClickHouse/pull/43123) ([Roman Vasin](https://github.com/rvasin)). +* Add settings `max_streams_for_merge_tree_reading` and `allow_asynchronous_read_from_io_pool_for_merge_tree`. Setting `max_streams_for_merge_tree_reading` limits the number of reading streams for MergeTree tables. Setting `allow_asynchronous_read_from_io_pool_for_merge_tree` enables background I/O pool to read from `MergeTree` tables. This may increase performance for I/O bound queries if used together with `max_streams_to_max_threads_ratio` or `max_streams_for_merge_tree_reading`. [#43260](https://github.com/ClickHouse/ClickHouse/pull/43260) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add the expression of the index on `data_skipping_indices` system table. [#43308](https://github.com/ClickHouse/ClickHouse/pull/43308) ([Guillaume Tassery](https://github.com/YiuRULE)). +* New hash function [xxh3](https://github.com/Cyan4973/xxHash) added. Also performance of `xxHash32` and `xxHash64` improved on arm thanks to library update. [#43411](https://github.com/ClickHouse/ClickHouse/pull/43411) ([Nikita Taranov](https://github.com/nickitat)). +* - Temporary data (for external sorting, aggregation, and JOINs) can share storage with the filesystem cache for remote disks and evict it, close [#42158](https://github.com/ClickHouse/ClickHouse/issues/42158). [#43457](https://github.com/ClickHouse/ClickHouse/pull/43457) ([Vladimir C](https://github.com/vdimir)). +* Add column `engine_full` to system table `databases` so that users can access whole engine definition of database via system tables. [#43468](https://github.com/ClickHouse/ClickHouse/pull/43468) ([凌涛](https://github.com/lingtaolf)). +* Add password complexity rules and checks for creating a new user. [#43719](https://github.com/ClickHouse/ClickHouse/pull/43719) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add function concatWithSeparator , like concat_ws in spark. [#43749](https://github.com/ClickHouse/ClickHouse/pull/43749) ([李扬](https://github.com/taiyang-li)). +* Added constraints for merge tree settings. [#43903](https://github.com/ClickHouse/ClickHouse/pull/43903) ([Sergei Trifonov](https://github.com/serxa)). +* Support numeric literals with _ as separator. [#43925](https://github.com/ClickHouse/ClickHouse/pull/43925) ([jh0x](https://github.com/jh0x)). +* Add a new setting `input_format_json_read_objects_as_strings` that allows to parse nested JSON objects into Strings in all JSON input formats. This setting is disable by default. [#44052](https://github.com/ClickHouse/ClickHouse/pull/44052) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Performance Improvement +* Optimisation is getting skipped now if `max_size_to_preallocate_for_aggregation` has too small value. Default value of this setting increased to `10^8`. [#43945](https://github.com/ClickHouse/ClickHouse/pull/43945) ([Nikita Taranov](https://github.com/nickitat)). + +#### Improvement +* Support numeric literals with underscores. closes [#28967](https://github.com/ClickHouse/ClickHouse/issues/28967). [#39129](https://github.com/ClickHouse/ClickHouse/pull/39129) ([unbyte](https://github.com/unbyte)). +* Add `FROM table SELECT column` syntax. [#41095](https://github.com/ClickHouse/ClickHouse/pull/41095) ([Nikolay Degterinsky](https://github.com/evillique)). +* This PR changes how followed queries delete parts: truncate table, alter table drop part, alter table drop partition. Now these queries make empty parts which cover old parts. This makes truncate query works without exclusive lock which means concurrent reads aren't locked. Also achieved durability in all those queries. If request is succeeded then no resurrected pars appear later. Note that atomicity is achieved only with transaction scope. [#41145](https://github.com/ClickHouse/ClickHouse/pull/41145) ([Sema Checherinda](https://github.com/CheSema)). +* `SET param_x` query no longer requires manual string serialization for the value of the parameter. For example, query `SET param_a = '[\'a\', \'b\']'` can now be written like `SET param_a = ['a', 'b']`. [#41874](https://github.com/ClickHouse/ClickHouse/pull/41874) ([Nikolay Degterinsky](https://github.com/evillique)). +* `filesystemAvailable` and related functions support one optional argument with disk name, and change `filesystemFree` to `filesystemUnreserved`. Closes [#35076](https://github.com/ClickHouse/ClickHouse/issues/35076). [#42064](https://github.com/ClickHouse/ClickHouse/pull/42064) ([flynn](https://github.com/ucasfl)). +* Increased the default value of search_limit to 256, and added LDAP server config option to change that to an arbitrary value. Closes: [#42276](https://github.com/ClickHouse/ClickHouse/issues/42276). [#42461](https://github.com/ClickHouse/ClickHouse/pull/42461) ([Vasily Nemkov](https://github.com/Enmk)). +* Add cosine distance for annoy. [#42778](https://github.com/ClickHouse/ClickHouse/pull/42778) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Allow to remove sensitive information from the exception messages also. Resolves [#41418](https://github.com/ClickHouse/ClickHouse/issues/41418). [#42940](https://github.com/ClickHouse/ClickHouse/pull/42940) ([filimonov](https://github.com/filimonov)). +* Keeper improvement: Add 4lw command `rqld` which can manually assign a node as leader. [#43026](https://github.com/ClickHouse/ClickHouse/pull/43026) ([JackyWoo](https://github.com/JackyWoo)). +* Apply connection timeouts settings for Distributed async INSERT from the query. [#43156](https://github.com/ClickHouse/ClickHouse/pull/43156) ([Azat Khuzhin](https://github.com/azat)). +* unhex function support FixedString arguments. [issue42369](https://github.com/ClickHouse/ClickHouse/issues/42369). [#43207](https://github.com/ClickHouse/ClickHouse/pull/43207) ([DR](https://github.com/freedomDR)). +* Priority is given to deleting completely expired Parts,related [#42869](https://github.com/ClickHouse/ClickHouse/issues/42869). [#43222](https://github.com/ClickHouse/ClickHouse/pull/43222) ([zhongyuankai](https://github.com/zhongyuankai)). +* Follow-up to https://github.com/ClickHouse/ClickHouse/pull/42484. Mask sensitive information in logs better; mask secret parts in the output of queries `SHOW CREATE TABLE` and `SELECT FROM system.tables`. Also resolves [#41418](https://github.com/ClickHouse/ClickHouse/issues/41418). [#43227](https://github.com/ClickHouse/ClickHouse/pull/43227) ([Vitaly Baranov](https://github.com/vitlibar)). +* Enable compress marks and primary key. [#43288](https://github.com/ClickHouse/ClickHouse/pull/43288) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* resolve issue [#38075](https://github.com/ClickHouse/ClickHouse/issues/38075) . Right now async insert doesn't support deduplication, because multiple small inserts will coexist in one part, which corespond multiple `block id`s. This solution is straitfoward: The change involves: 1. mark offsets for every inserts in every chunk 2. calculate multiple `block_id`s when sinker receive a chunk 3. get block number lock by these `block_id`s 3.1. if fails, remove the dup insert(s) and dup `block_id`(s) from block and recalculate `offsets` agian. 3.2. if succeeds, commit `block_id`'s and other items into keeper a. if fails, do 3.1 b. if succeeds, everything succeeds. [#43304](https://github.com/ClickHouse/ClickHouse/pull/43304) ([Han Fei](https://github.com/hanfei1991)). +* More precise and reactive CPU load indication on client. [#43307](https://github.com/ClickHouse/ClickHouse/pull/43307) ([Sergei Trifonov](https://github.com/serxa)). +* Restrict default access to named collections for user defined in config. It must have explicit `show_named_collections=1` to be able to see them. [#43325](https://github.com/ClickHouse/ClickHouse/pull/43325) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support reading of subcolumns of nested types from storage `S3` and table function `s3` with formats `Parquet`, `Arrow` and `ORC`. [#43329](https://github.com/ClickHouse/ClickHouse/pull/43329) ([chen](https://github.com/xiedeyantu)). +* - Systemd integration now correctly notifies systemd that service is really started and is ready to server requests. [#43400](https://github.com/ClickHouse/ClickHouse/pull/43400) ([Коренберг Марк](https://github.com/socketpair)). +* Add table_uuid to system.parts. [#43404](https://github.com/ClickHouse/ClickHouse/pull/43404) ([Azat Khuzhin](https://github.com/azat)). +* Added client option to display the number of locally processed rows in non-interactive mode (--print-num-processed-rows). [#43407](https://github.com/ClickHouse/ClickHouse/pull/43407) ([jh0x](https://github.com/jh0x)). +* Show read rows while reading from stdin from client. Closes [#43423](https://github.com/ClickHouse/ClickHouse/issues/43423). [#43442](https://github.com/ClickHouse/ClickHouse/pull/43442) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Keeper improvement: try syncing logs to disk in parallel with replication. [#43450](https://github.com/ClickHouse/ClickHouse/pull/43450) ([Antonio Andelic](https://github.com/antonio2368)). +* Show progress bar while reading from s3 table function / engine. [#43454](https://github.com/ClickHouse/ClickHouse/pull/43454) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Progress bar will show both read and written rows. [#43496](https://github.com/ClickHouse/ClickHouse/pull/43496) ([Ilya Yatsishin](https://github.com/qoega)). +* Implement `aggregation-in-order` optimization on top of query plan. It is enabled by default (but works only together with `optimize_aggregation_in_order`, which is disabled by default). Set `query_plan_aggregation_in_order = 0` to use previous AST-based version. [#43592](https://github.com/ClickHouse/ClickHouse/pull/43592) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Allow to send profile events with `trace_type = 'ProfileEvent'` to `system.trace_log` on each increment with current stack, profile event name and value of increment. It can be enabled by setting `trace_profile_events` and used to debug performance of queries. [#43639](https://github.com/ClickHouse/ClickHouse/pull/43639) ([Anton Popov](https://github.com/CurtizJ)). +* Keeper improvement: requests are batched more often. The batching can be controlled with the new setting `max_requests_quick_batch_size`. [#43686](https://github.com/ClickHouse/ClickHouse/pull/43686) ([Antonio Andelic](https://github.com/antonio2368)). +* Added possibility to use array as a second parameter for cutURLParameter function. Close [#6827](https://github.com/ClickHouse/ClickHouse/issues/6827). [#43788](https://github.com/ClickHouse/ClickHouse/pull/43788) ([Roman Vasin](https://github.com/rvasin)). +* Implement referential dependencies and use them to create tables in the correct order while restoring from a backup. [#43834](https://github.com/ClickHouse/ClickHouse/pull/43834) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add a new setting `input_format_max_binary_string_size` to limit string size in RowBinary format. [#43842](https://github.com/ClickHouse/ClickHouse/pull/43842) ([Kruglov Pavel](https://github.com/Avogar)). +* - Fix some incorrect logic in ast level optimization related. [#43873](https://github.com/ClickHouse/ClickHouse/pull/43873) ([Duc Canh Le](https://github.com/canhld94)). +* Support query like `SHOW FULL TABLES ...`. [#43910](https://github.com/ClickHouse/ClickHouse/pull/43910) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* When ClickHouse requests a remote HTTP server, and it returns an error, the numeric HTTP code was not displayed correctly in the exception message. Closes [#43919](https://github.com/ClickHouse/ClickHouse/issues/43919). [#43920](https://github.com/ClickHouse/ClickHouse/pull/43920) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Settings `merge_tree_min_rows_for_concurrent_read_for_remote_filesystem/merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem` did not respect adaptive granularity. Fat rows did not decrease the number of read rows (as it is was done for `merge_tree_min_rows_for_concurrent_read/merge_tree_min_bytes_for_concurrent_read`, which could lead to high memory usage. [#43965](https://github.com/ClickHouse/ClickHouse/pull/43965) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Support `optimize_if_transform_strings_to_enum` in new analyzer. [#43999](https://github.com/ClickHouse/ClickHouse/pull/43999) ([Antonio Andelic](https://github.com/antonio2368)). +* This is to upgrade the new "DeflateQpl" compression codec which has been implemented on previous PR (details: https://github.com/ClickHouse/ClickHouse/pull/39494). This patch improves codec on below aspects: 1. QPL v0.2.0 to QPL v0.3.0 [Intel® Query Processing Library (QPL)](https://github.com/intel/qpl) 2. Improve CMake file for fixing QPL build issues for QPL v0.3.0。 3. Link the QPL library with libaccel-config at build time instead of runtime loading on QPL v0.2.0 (dlopen) 4. Fixed log print issue in CompressionCodecDeflateQpl.cpp. [#44024](https://github.com/ClickHouse/ClickHouse/pull/44024) ([jasperzhu](https://github.com/jinjunzh)). +* Follow-up to https://github.com/ClickHouse/ClickHouse/pull/43834 Fix review issues; dependencies from `Distributed` table engine and from `cluster()` function are also considered now; as well as dependencies of a dictionary defined without host & port specified. [#44158](https://github.com/ClickHouse/ClickHouse/pull/44158) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Bug Fix +* Fix mutations not making progress when checksums do not match between replicas (e.g. caused by a change in data format on an upgrade). [#36877](https://github.com/ClickHouse/ClickHouse/pull/36877) ([nvartolomei](https://github.com/nvartolomei)). +* fix skip_unavailable_shards does not work using hdfsCluster table function. [#43236](https://github.com/ClickHouse/ClickHouse/pull/43236) ([chen](https://github.com/xiedeyantu)). +* fix s3 support question mark wildcard. Closes [#42731](https://github.com/ClickHouse/ClickHouse/issues/42731). [#43253](https://github.com/ClickHouse/ClickHouse/pull/43253) ([chen](https://github.com/xiedeyantu)). +* - Fix functions arrayFirstOrNull and arrayLastOrNull or null when array is Nullable. [#43274](https://github.com/ClickHouse/ClickHouse/pull/43274) ([Duc Canh Le](https://github.com/canhld94)). +* - we create a new zk path called "async_blocks" for replicated tables in [#43304](https://github.com/ClickHouse/ClickHouse/issues/43304) . However, for tables created in older versions, this path does not exist and will cause error when doing partition operations. This PR will create this node when initializing replicated tree. - This PR created a flag `async_insert_deduplicate` with `false` default value to control whether to use this function. As mentioned in [#38075](https://github.com/ClickHouse/ClickHouse/issues/38075) , this function is not yet fully finished. I would turn off it by default. [#44223](https://github.com/ClickHouse/ClickHouse/pull/44223) ([Han Fei](https://github.com/hanfei1991)). + +#### Build/Testing/Packaging Improvement +* Add support for FreeBSD/powerpc64le. [#40422](https://github.com/ClickHouse/ClickHouse/pull/40422) ([pkubaj](https://github.com/pkubaj)). +* Bump Testcontainers for Go to v0.15.0. [#43278](https://github.com/ClickHouse/ClickHouse/pull/43278) ([Manuel de la Peña](https://github.com/mdelapenya)). +* ... Enable base64 on s390x > Information about CI checks: https://clickhouse.com/docs/en/development/continuous-integration/. [#43352](https://github.com/ClickHouse/ClickHouse/pull/43352) ([Suzy Wang](https://github.com/SuzyWangIBMer)). +* Shutdown will be much faster if do not call clearOldPartsFromFilesystem. Especially this is right for tests with zero-copy due to single thread deletion parts. clearOldPartsFromFilesystem is unnecessary after https://github.com/ClickHouse/ClickHouse/pull/41145. [#43760](https://github.com/ClickHouse/ClickHouse/pull/43760) ([Sema Checherinda](https://github.com/CheSema)). +* Integrate skim into the client/local. [#43922](https://github.com/ClickHouse/ClickHouse/pull/43922) ([Azat Khuzhin](https://github.com/azat)). +* Allow clickhouse to use openssl as a dynamic library and in-tree for development purposes. [#43991](https://github.com/ClickHouse/ClickHouse/pull/43991) ([Boris Kuschel](https://github.com/bkuschel)). +* Closes [#43912](https://github.com/ClickHouse/ClickHouse/issues/43912). [#43992](https://github.com/ClickHouse/ClickHouse/pull/43992) ([Nikolay Degterinsky](https://github.com/evillique)). +* Bring sha512 sums back to the building step. [#44017](https://github.com/ClickHouse/ClickHouse/pull/44017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Kill stress tests after 2.5h in case of hanging process. [#44214](https://github.com/ClickHouse/ClickHouse/pull/44214) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Fixed unable to log in (because of failure to create session_log entry) in rare case of messed up setting profiles. ... [#42641](https://github.com/ClickHouse/ClickHouse/pull/42641) ([Vasily Nemkov](https://github.com/Enmk)). +* Fix incorrect UserTimeMicroseconds/SystemTimeMicroseconds accounting. [#42791](https://github.com/ClickHouse/ClickHouse/pull/42791) ([Azat Khuzhin](https://github.com/azat)). +* Do not suppress exceptions in web disk. Fix retries for web disk. [#42800](https://github.com/ClickHouse/ClickHouse/pull/42800) ([Azat Khuzhin](https://github.com/azat)). +* Fixed race condition between inserts and dropping MVs. [#43161](https://github.com/ClickHouse/ClickHouse/pull/43161) ([AlfVII](https://github.com/AlfVII)). +* Fixed bug which could lead to deadlock while using asynchronous inserts. [#43233](https://github.com/ClickHouse/ClickHouse/pull/43233) ([Anton Popov](https://github.com/CurtizJ)). +* Additional check on zero uncompressed size is added to `CompressionCodecDelta`. [#43255](https://github.com/ClickHouse/ClickHouse/pull/43255) ([Nikita Taranov](https://github.com/nickitat)). +* An issue with the following exception has been reported while trying to read a Parquet file from S3 into ClickHouse:. [#43297](https://github.com/ClickHouse/ClickHouse/pull/43297) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix bad cast from LowCardinality column when using short circuit function execution. Proper fix of https://github.com/ClickHouse/ClickHouse/pull/42937. [#43311](https://github.com/ClickHouse/ClickHouse/pull/43311) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed queries with `SAMPLE BY` with prewhere optimization on tables using `Merge` engine. [#43315](https://github.com/ClickHouse/ClickHouse/pull/43315) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix `DESCRIBE` for `deltaLake` and `hudi` table functions. [#43323](https://github.com/ClickHouse/ClickHouse/pull/43323) ([Antonio Andelic](https://github.com/antonio2368)). +* Check and compare the content of `format_version` file in `MergeTreeData` so tables can be loaded even if the storage policy was changed. [#43328](https://github.com/ClickHouse/ClickHouse/pull/43328) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix possible (very unlikely) "No column to rollback" logical error during INSERT into Buffer. [#43336](https://github.com/ClickHouse/ClickHouse/pull/43336) ([Azat Khuzhin](https://github.com/azat)). +* Fix a bug that allowed FucntionParser to parse an unlimited amount of round brackets into one function if `allow_function_parameters` is set. [#43350](https://github.com/ClickHouse/ClickHouse/pull/43350) ([Nikolay Degterinsky](https://github.com/evillique)). +* MaterializeMySQL support ddl: drop table t1,t2 and Compatible with most of MySQL drop ddl. [#43366](https://github.com/ClickHouse/ClickHouse/pull/43366) ([zzsmdfj](https://github.com/zzsmdfj)). +* Fix possible `Cannot create non-empty column with type Nothing` in functions if/multiIf. Closes [#43356](https://github.com/ClickHouse/ClickHouse/issues/43356). [#43368](https://github.com/ClickHouse/ClickHouse/pull/43368) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix a bug when row level filter uses default value of column. [#43387](https://github.com/ClickHouse/ClickHouse/pull/43387) ([Alexander Gololobov](https://github.com/davenger)). +* Query with DISTINCT + LIMIT BY + LIMIT can return fewer rows than expected. Fixes [#43377](https://github.com/ClickHouse/ClickHouse/issues/43377). [#43410](https://github.com/ClickHouse/ClickHouse/pull/43410) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix sumMap() for Nullable(Decimal()). [#43414](https://github.com/ClickHouse/ClickHouse/pull/43414) ([Azat Khuzhin](https://github.com/azat)). +* Fix date_diff() for hour/minute on macOS. Close [#42742](https://github.com/ClickHouse/ClickHouse/issues/42742). [#43466](https://github.com/ClickHouse/ClickHouse/pull/43466) ([zzsmdfj](https://github.com/zzsmdfj)). +* Fix incorrect memory accounting because of merges/mutations. [#43516](https://github.com/ClickHouse/ClickHouse/pull/43516) ([Azat Khuzhin](https://github.com/azat)). +* Substitute UDFs in `CREATE` query to avoid failures during loading at the startup. Additionally, UDFs can now be used as `DEFAULT` expressions for columns. [#43539](https://github.com/ClickHouse/ClickHouse/pull/43539) ([Antonio Andelic](https://github.com/antonio2368)). +* Correctly report errors in queries even when multiple JOINs optimization is taking place. [#43583](https://github.com/ClickHouse/ClickHouse/pull/43583) ([Salvatore](https://github.com/tbsal)). +* Fixed primary key analysis with conditions involving `toString(enum)`. [#43596](https://github.com/ClickHouse/ClickHouse/pull/43596) ([Nikita Taranov](https://github.com/nickitat)). +* - Ensure consistency when copier update status and `attach_is_done` in keeper after partition attach is done. [#43602](https://github.com/ClickHouse/ClickHouse/pull/43602) ([lizhuoyu5](https://github.com/lzydmxy)). +* During recovering of the lost replica there could a situation where we need to atomically swap two table names (use EXCHANGE), but instead previously we tried to use two RENAME queries. Which was obviously failed and moreover failed the whole recovery process of the database replica. [#43628](https://github.com/ClickHouse/ClickHouse/pull/43628) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* fix s3Cluster function returns NOT_FOUND_COLUMN_IN_BLOCK error. Closes [#43534](https://github.com/ClickHouse/ClickHouse/issues/43534). [#43629](https://github.com/ClickHouse/ClickHouse/pull/43629) ([chen](https://github.com/xiedeyantu)). +* Optimized number of List requests to ZooKeeper when selecting a part to merge. Previously it could produce thousands of requests in some cases. Fixes [#43647](https://github.com/ClickHouse/ClickHouse/issues/43647). [#43675](https://github.com/ClickHouse/ClickHouse/pull/43675) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix posssible logical error 'Array sizes mismatched' while parsing JSON object with arrays with same key names but with different nesting level. Closes [#43569](https://github.com/ClickHouse/ClickHouse/issues/43569). [#43693](https://github.com/ClickHouse/ClickHouse/pull/43693) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed possible exception in case of distributed group by with an alias column among aggregation keys. [#43709](https://github.com/ClickHouse/ClickHouse/pull/43709) ([Nikita Taranov](https://github.com/nickitat)). +* Fix bug which can lead to broken projections if zero-copy replication is enabled and used. [#43764](https://github.com/ClickHouse/ClickHouse/pull/43764) ([alesapin](https://github.com/alesapin)). +* - Fix using multipart upload for large S3 objects in AWS S3. [#43824](https://github.com/ClickHouse/ClickHouse/pull/43824) ([ianton-ru](https://github.com/ianton-ru)). +* Fixed `ALTER ... RESET SETTING` with `ON CLUSTER`. It could be applied to one replica only. Fixes [#43843](https://github.com/ClickHouse/ClickHouse/issues/43843). [#43848](https://github.com/ClickHouse/ClickHouse/pull/43848) ([Elena Torró](https://github.com/elenatorro)). +* * Fix logical error in right storage join with using. [#43963](https://github.com/ClickHouse/ClickHouse/pull/43963) ([Vladimir C](https://github.com/vdimir)). +* Keeper fix: throw if interserver port for Raft is already in use. Fix segfault in Prometheus when Raft server failed to initialize. [#43984](https://github.com/ClickHouse/ClickHouse/pull/43984) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix order by positional arg in case unneeded columns pruning. Closes [#43964](https://github.com/ClickHouse/ClickHouse/issues/43964). [#43987](https://github.com/ClickHouse/ClickHouse/pull/43987) ([Kseniia Sumarokova](https://github.com/kssenii)). +* * Fix bug with wrong order of keys in Storage Join. [#44012](https://github.com/ClickHouse/ClickHouse/pull/44012) ([Vladimir C](https://github.com/vdimir)). +* Fixed exception when subquery contains having but doesn't contain actual aggregation. [#44051](https://github.com/ClickHouse/ClickHouse/pull/44051) ([Nikita Taranov](https://github.com/nickitat)). +* Fix race in s3 multipart upload. This race could cause the error `Part number must be an integer between 1 and 10000, inclusive. (S3_ERROR)` while restoring from a backup. [#44065](https://github.com/ClickHouse/ClickHouse/pull/44065) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix undefined behavior in the `quantiles` function, which might lead to uninitialized memory. Found by fuzzer. This closes [#44066](https://github.com/ClickHouse/ClickHouse/issues/44066). [#44067](https://github.com/ClickHouse/ClickHouse/pull/44067) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Prevent dropping nested column if it creates empty part. [#44159](https://github.com/ClickHouse/ClickHouse/pull/44159) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix `LOGICAL_ERROR` in case when fetch of part was stopped while fetching projection to the disk with enabled zero-copy replication. [#44173](https://github.com/ClickHouse/ClickHouse/pull/44173) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible Bad cast from type DB::IAST const* to DB::ASTLiteral const*. Closes [#44191](https://github.com/ClickHouse/ClickHouse/issues/44191). [#44192](https://github.com/ClickHouse/ClickHouse/pull/44192) ([Kruglov Pavel](https://github.com/Avogar)). +* Prevent `ReadonlyReplica` metric from having negative values. [#44220](https://github.com/ClickHouse/ClickHouse/pull/44220) ([Antonio Andelic](https://github.com/antonio2368)). + +#### Build Improvement + +* Fixed Endian issues in hex string conversion on s390x (which is not supported by ClickHouse). [#41245](https://github.com/ClickHouse/ClickHouse/pull/41245) ([Harry Lee](https://github.com/HarryLeeIBM)). +* ... toDateTime64 conversion generates wrong time on z build, add bit_cast swap fix to support toDateTime64 on s390x platform. [#42847](https://github.com/ClickHouse/ClickHouse/pull/42847) ([Suzy Wang](https://github.com/SuzyWangIBMer)). +* ... s390x support for ip coding functions. [#43078](https://github.com/ClickHouse/ClickHouse/pull/43078) ([Suzy Wang](https://github.com/SuzyWangIBMer)). +* Fix byte order issue of wide integers for s390x. [#43228](https://github.com/ClickHouse/ClickHouse/pull/43228) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Fixed endian issue in bloom filter serialization for s390x. [#43642](https://github.com/ClickHouse/ClickHouse/pull/43642) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Fixed setting TCP_KEEPIDLE of client connection for s390x. [#43850](https://github.com/ClickHouse/ClickHouse/pull/43850) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Fix endian issue in StringHashTable for s390x. [#44049](https://github.com/ClickHouse/ClickHouse/pull/44049) ([Harry Lee](https://github.com/HarryLeeIBM)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Revert "S3 request per second rate throttling""'. [#43335](https://github.com/ClickHouse/ClickHouse/pull/43335) ([Sergei Trifonov](https://github.com/serxa)). +* NO CL ENTRY: 'Update version after release'. [#43348](https://github.com/ClickHouse/ClickHouse/pull/43348) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* NO CL ENTRY: 'Revert "Add table_uuid to system.parts"'. [#43571](https://github.com/ClickHouse/ClickHouse/pull/43571) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Fix endian issue in integer hex string conversion"'. [#43613](https://github.com/ClickHouse/ClickHouse/pull/43613) ([Vladimir C](https://github.com/vdimir)). +* NO CL ENTRY: 'Update replication.md'. [#43643](https://github.com/ClickHouse/ClickHouse/pull/43643) ([Peignon Melvyn](https://github.com/melvynator)). +* NO CL ENTRY: 'Revert "Temporary files evict fs cache"'. [#43883](https://github.com/ClickHouse/ClickHouse/pull/43883) ([Vladimir C](https://github.com/vdimir)). +* NO CL ENTRY: 'Update html interface doc'. [#44064](https://github.com/ClickHouse/ClickHouse/pull/44064) ([San](https://github.com/santrancisco)). +* NO CL ENTRY: 'Revert "Add function 'age'"'. [#44203](https://github.com/ClickHouse/ClickHouse/pull/44203) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Builtin skim"'. [#44227](https://github.com/ClickHouse/ClickHouse/pull/44227) ([Azat Khuzhin](https://github.com/azat)). +* NO CL ENTRY: 'Revert "Add information about written rows in progress indicator"'. [#44255](https://github.com/ClickHouse/ClickHouse/pull/44255) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Build libcxx and libcxxabi from llvm-project [#42730](https://github.com/ClickHouse/ClickHouse/pull/42730) ([Robert Schulze](https://github.com/rschu1ze)). +* Allow release only from ready commits [#43019](https://github.com/ClickHouse/ClickHouse/pull/43019) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add global flags to base/ libraries [#43082](https://github.com/ClickHouse/ClickHouse/pull/43082) ([Raúl Marín](https://github.com/Algunenano)). +* Enable strict typing check in tests/ci [#43132](https://github.com/ClickHouse/ClickHouse/pull/43132) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add server UUID for disks access checks (read/read-by-offset/write/delete) to avoid possible races [#43143](https://github.com/ClickHouse/ClickHouse/pull/43143) ([Azat Khuzhin](https://github.com/azat)). +* Do not include libcxx library for C [#43166](https://github.com/ClickHouse/ClickHouse/pull/43166) ([Azat Khuzhin](https://github.com/azat)). +* Followup fixes for FuseFunctionsPass [#43217](https://github.com/ClickHouse/ClickHouse/pull/43217) ([Vladimir C](https://github.com/vdimir)). +* Fix bug in replication queue which can lead to premature mutation finish [#43231](https://github.com/ClickHouse/ClickHouse/pull/43231) ([alesapin](https://github.com/alesapin)). +* Support `CREATE / ALTER / DROP NAMED COLLECTION` queries under according access types [#43252](https://github.com/ClickHouse/ClickHouse/pull/43252) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix race in `IColumn::dumpStructure` [#43269](https://github.com/ClickHouse/ClickHouse/pull/43269) ([Anton Popov](https://github.com/CurtizJ)). +* Sanitize thirdparty libraries for public flags [#43275](https://github.com/ClickHouse/ClickHouse/pull/43275) ([Azat Khuzhin](https://github.com/azat)). +* stress: increase timeout for server waiting after TERM [#43277](https://github.com/ClickHouse/ClickHouse/pull/43277) ([Azat Khuzhin](https://github.com/azat)). +* Fix cloning of ASTIdentifier [#43282](https://github.com/ClickHouse/ClickHouse/pull/43282) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix race on write in `ReplicatedMergeTree` [#43289](https://github.com/ClickHouse/ClickHouse/pull/43289) ([Antonio Andelic](https://github.com/antonio2368)). +* Cancel lambda api url [#43295](https://github.com/ClickHouse/ClickHouse/pull/43295) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fixed: Typo [#43312](https://github.com/ClickHouse/ClickHouse/pull/43312) ([Raevsky Rudolf](https://github.com/lanesket)). +* Analyzer small fixes [#43321](https://github.com/ClickHouse/ClickHouse/pull/43321) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix: make test_read_only_table more stable [#43326](https://github.com/ClickHouse/ClickHouse/pull/43326) ([Igor Nikonov](https://github.com/devcrafter)). +* Make insertRangeFrom() more exception safe [#43338](https://github.com/ClickHouse/ClickHouse/pull/43338) ([Azat Khuzhin](https://github.com/azat)). +* Analyzer added indexes support [#43341](https://github.com/ClickHouse/ClickHouse/pull/43341) ([Maksim Kita](https://github.com/kitaisreal)). +* Allow to "drop tables" from s3_plain disk (so as from web disk) [#43343](https://github.com/ClickHouse/ClickHouse/pull/43343) ([Azat Khuzhin](https://github.com/azat)). +* Add --max-consecutive-errors for clickhouse-benchmark [#43344](https://github.com/ClickHouse/ClickHouse/pull/43344) ([Azat Khuzhin](https://github.com/azat)). +* Add [#43072](https://github.com/ClickHouse/ClickHouse/issues/43072) [#43345](https://github.com/ClickHouse/ClickHouse/pull/43345) ([Nikita Taranov](https://github.com/nickitat)). +* Suggest users installation troubleshooting [#43346](https://github.com/ClickHouse/ClickHouse/pull/43346) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update version_date.tsv and changelogs after v22.11.1.1360-stable [#43349](https://github.com/ClickHouse/ClickHouse/pull/43349) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Provide full stacktrace in case of uncaught exception during server startup [#43364](https://github.com/ClickHouse/ClickHouse/pull/43364) ([Azat Khuzhin](https://github.com/azat)). +* Update SECURITY.md on new stable tags [#43365](https://github.com/ClickHouse/ClickHouse/pull/43365) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Splitting checks in CI more [#43373](https://github.com/ClickHouse/ClickHouse/pull/43373) ([alesapin](https://github.com/alesapin)). +* Update version_date.tsv and changelogs after v22.8.9.24-lts [#43393](https://github.com/ClickHouse/ClickHouse/pull/43393) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix mess with signed sizes in SingleValueDataString [#43401](https://github.com/ClickHouse/ClickHouse/pull/43401) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add a comment [#43403](https://github.com/ClickHouse/ClickHouse/pull/43403) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Avoid race condition for updating system.distribution_queue values [#43406](https://github.com/ClickHouse/ClickHouse/pull/43406) ([Azat Khuzhin](https://github.com/azat)). +* Fix flaky 01926_order_by_desc_limit [#43408](https://github.com/ClickHouse/ClickHouse/pull/43408) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible heap-use-after-free in local if history file cannot be created [#43409](https://github.com/ClickHouse/ClickHouse/pull/43409) ([Azat Khuzhin](https://github.com/azat)). +* Fix flaky test [#43435](https://github.com/ClickHouse/ClickHouse/pull/43435) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix backward compatibility check [#43436](https://github.com/ClickHouse/ClickHouse/pull/43436) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix typo [#43446](https://github.com/ClickHouse/ClickHouse/pull/43446) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove noise from logs about NetLink in Docker [#43447](https://github.com/ClickHouse/ClickHouse/pull/43447) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Modify test slightly [#43448](https://github.com/ClickHouse/ClickHouse/pull/43448) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Set run_passes to 1 by default [#43451](https://github.com/ClickHouse/ClickHouse/pull/43451) ([Dmitry Novik](https://github.com/novikd)). +* Do not reuse jemalloc memory in test_global_overcommit [#43453](https://github.com/ClickHouse/ClickHouse/pull/43453) ([Dmitry Novik](https://github.com/novikd)). +* Fix createTableSharedID again [#43458](https://github.com/ClickHouse/ClickHouse/pull/43458) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Use smaller buffer for small files [#43460](https://github.com/ClickHouse/ClickHouse/pull/43460) ([Alexander Gololobov](https://github.com/davenger)). +* Merging [#42064](https://github.com/ClickHouse/ClickHouse/issues/42064) [#43461](https://github.com/ClickHouse/ClickHouse/pull/43461) ([Anton Popov](https://github.com/CurtizJ)). +* Use all parameters with prefixes from ssm [#43467](https://github.com/ClickHouse/ClickHouse/pull/43467) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Avoid possible DROP hung due to attached web disk [#43489](https://github.com/ClickHouse/ClickHouse/pull/43489) ([Azat Khuzhin](https://github.com/azat)). +* Improve fuzzy search in clickhouse-client/clickhouse-local [#43498](https://github.com/ClickHouse/ClickHouse/pull/43498) ([Azat Khuzhin](https://github.com/azat)). +* check ast limits for create_parser_fuzzer [#43504](https://github.com/ClickHouse/ClickHouse/pull/43504) ([Sema Checherinda](https://github.com/CheSema)). +* Add another test for SingleDataValueString [#43514](https://github.com/ClickHouse/ClickHouse/pull/43514) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Move password reset message from client to server [#43517](https://github.com/ClickHouse/ClickHouse/pull/43517) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Sync everything to persistent storage to avoid writeback affects perf tests [#43530](https://github.com/ClickHouse/ClickHouse/pull/43530) ([Azat Khuzhin](https://github.com/azat)). +* bump lib for diag [#43538](https://github.com/ClickHouse/ClickHouse/pull/43538) ([Dale McDiarmid](https://github.com/gingerwizard)). +* Temporarily disable `test_hive_query` [#43542](https://github.com/ClickHouse/ClickHouse/pull/43542) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Analyzer SumIfToCountIfPass fix [#43543](https://github.com/ClickHouse/ClickHouse/pull/43543) ([Maksim Kita](https://github.com/kitaisreal)). +* Analyzer UniqInjectiveFunctionsEliminationPass [#43547](https://github.com/ClickHouse/ClickHouse/pull/43547) ([Maksim Kita](https://github.com/kitaisreal)). +* Disable broken 00176_bson_parallel_parsing [#43550](https://github.com/ClickHouse/ClickHouse/pull/43550) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add benchmark for query interpretation with JOINs [#43556](https://github.com/ClickHouse/ClickHouse/pull/43556) ([Raúl Marín](https://github.com/Algunenano)). +* Analyzer table functions untuple fix [#43572](https://github.com/ClickHouse/ClickHouse/pull/43572) ([Maksim Kita](https://github.com/kitaisreal)). +* Prepare CI for universal runners preallocated pool [#43579](https://github.com/ClickHouse/ClickHouse/pull/43579) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Iterate list without index-based access [#43584](https://github.com/ClickHouse/ClickHouse/pull/43584) ([Alexander Gololobov](https://github.com/davenger)). +* Remove code that I do not understand [#43593](https://github.com/ClickHouse/ClickHouse/pull/43593) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add table_uuid to system.parts (resubmit) [#43595](https://github.com/ClickHouse/ClickHouse/pull/43595) ([Azat Khuzhin](https://github.com/azat)). +* Move perf tests for Aarch64 from PRs to master [#43623](https://github.com/ClickHouse/ClickHouse/pull/43623) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix flaky 01175_distributed_ddl_output_mode_long [#43626](https://github.com/ClickHouse/ClickHouse/pull/43626) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Speedup backup config loading [#43627](https://github.com/ClickHouse/ClickHouse/pull/43627) ([Alexander Gololobov](https://github.com/davenger)). +* Fix [#43478](https://github.com/ClickHouse/ClickHouse/issues/43478) [#43636](https://github.com/ClickHouse/ClickHouse/pull/43636) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Do not checkout submodules recursively [#43637](https://github.com/ClickHouse/ClickHouse/pull/43637) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Optimize binary-builder size [#43654](https://github.com/ClickHouse/ClickHouse/pull/43654) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix flaky `KeeperMap` integration tests [#43658](https://github.com/ClickHouse/ClickHouse/pull/43658) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix data race in `Keeper` snapshot [#43663](https://github.com/ClickHouse/ClickHouse/pull/43663) ([Antonio Andelic](https://github.com/antonio2368)). +* Use docker images cache from merged PRs in master and release branches [#43664](https://github.com/ClickHouse/ClickHouse/pull/43664) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update AsynchronousReadIndirectBufferFromRemoteFS.cpp [#43667](https://github.com/ClickHouse/ClickHouse/pull/43667) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix pagination issue in GITHUB_JOB_ID() [#43681](https://github.com/ClickHouse/ClickHouse/pull/43681) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Try fix flaky test 00176_bson_parallel_parsing [#43696](https://github.com/ClickHouse/ClickHouse/pull/43696) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix log messages in clickhouse-copier [#43707](https://github.com/ClickHouse/ClickHouse/pull/43707) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* try to remove clickhouse if already exists [#43728](https://github.com/ClickHouse/ClickHouse/pull/43728) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix 43622 [#43731](https://github.com/ClickHouse/ClickHouse/pull/43731) ([Amos Bird](https://github.com/amosbird)). +* Fix example of colored prompt in client [#43738](https://github.com/ClickHouse/ClickHouse/pull/43738) ([Azat Khuzhin](https://github.com/azat)). +* Minor fixes in annoy index documentation [#43743](https://github.com/ClickHouse/ClickHouse/pull/43743) ([Robert Schulze](https://github.com/rschu1ze)). +* Terminate lost runners [#43756](https://github.com/ClickHouse/ClickHouse/pull/43756) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update README.md [#43759](https://github.com/ClickHouse/ClickHouse/pull/43759) ([Tyler Hannan](https://github.com/tylerhannan)). +* Fix included_elements calculation in AggregateFunctionNullVariadic [#43763](https://github.com/ClickHouse/ClickHouse/pull/43763) ([Dmitry Novik](https://github.com/novikd)). +* Migrate runner_token_rotation_lambda to zip-package deployment [#43766](https://github.com/ClickHouse/ClickHouse/pull/43766) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Analyzer compound expression crash fix [#43768](https://github.com/ClickHouse/ClickHouse/pull/43768) ([Maksim Kita](https://github.com/kitaisreal)). +* Migrate termination lambda to zip-package [#43769](https://github.com/ClickHouse/ClickHouse/pull/43769) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix flaky `test_store_cleanup` [#43770](https://github.com/ClickHouse/ClickHouse/pull/43770) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Attempt to fix StyleCheck condition [#43773](https://github.com/ClickHouse/ClickHouse/pull/43773) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Rerun PullRequestCI on changed description body [#43777](https://github.com/ClickHouse/ClickHouse/pull/43777) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Yet another fix for AggregateFunctionMinMaxAny [#43778](https://github.com/ClickHouse/ClickHouse/pull/43778) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add google benchmark to contrib [#43779](https://github.com/ClickHouse/ClickHouse/pull/43779) ([Nikita Taranov](https://github.com/nickitat)). +* Fix EN doc as in [#43765](https://github.com/ClickHouse/ClickHouse/issues/43765) [#43780](https://github.com/ClickHouse/ClickHouse/pull/43780) ([Alexander Gololobov](https://github.com/davenger)). +* Detach threads from thread group [#43781](https://github.com/ClickHouse/ClickHouse/pull/43781) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Try making `test_keeper_zookeeper_converter` less flaky [#43789](https://github.com/ClickHouse/ClickHouse/pull/43789) ([Antonio Andelic](https://github.com/antonio2368)). +* Polish UDF substitution visitor [#43790](https://github.com/ClickHouse/ClickHouse/pull/43790) ([Antonio Andelic](https://github.com/antonio2368)). +* Analyzer ConstantNode refactoring [#43793](https://github.com/ClickHouse/ClickHouse/pull/43793) ([Maksim Kita](https://github.com/kitaisreal)). +* Update Poco [#43802](https://github.com/ClickHouse/ClickHouse/pull/43802) ([Alexander Gololobov](https://github.com/davenger)). +* Add another BC check suppression [#43810](https://github.com/ClickHouse/ClickHouse/pull/43810) ([Alexander Tokmakov](https://github.com/tavplubix)). +* tests: fix 01676_long_clickhouse_client_autocomplete flakiness [#43819](https://github.com/ClickHouse/ClickHouse/pull/43819) ([Azat Khuzhin](https://github.com/azat)). +* Use disk operation to serialize and deserialize meta files of StorageFilelog [#43826](https://github.com/ClickHouse/ClickHouse/pull/43826) ([flynn](https://github.com/ucasfl)). +* Add constexpr [#43827](https://github.com/ClickHouse/ClickHouse/pull/43827) ([zhanglistar](https://github.com/zhanglistar)). +* Do not postpone removal of in-memory tables [#43833](https://github.com/ClickHouse/ClickHouse/pull/43833) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Increase some logging level for keeper client. [#43835](https://github.com/ClickHouse/ClickHouse/pull/43835) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* FuseFunctionsPass small fix [#43837](https://github.com/ClickHouse/ClickHouse/pull/43837) ([Maksim Kita](https://github.com/kitaisreal)). +* Followup fixes for XML helpers [#43845](https://github.com/ClickHouse/ClickHouse/pull/43845) ([Alexander Gololobov](https://github.com/davenger)). +* Hold ProcessListEntry a bit longer in case of exception from Interpreter [#43847](https://github.com/ClickHouse/ClickHouse/pull/43847) ([Alexander Tokmakov](https://github.com/tavplubix)). +* A little improve performance of PODArray [#43860](https://github.com/ClickHouse/ClickHouse/pull/43860) ([zhanglistar](https://github.com/zhanglistar)). +* Change email for robot-clickhouse to immutable one [#43861](https://github.com/ClickHouse/ClickHouse/pull/43861) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Rerun DocsCheck on edited PR description [#43862](https://github.com/ClickHouse/ClickHouse/pull/43862) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Temporarily disable misc-* slow clang-tidy checks [#43863](https://github.com/ClickHouse/ClickHouse/pull/43863) ([Robert Schulze](https://github.com/rschu1ze)). +* do not leave tmp part on disk, do not go to the keeper for remove it [#43866](https://github.com/ClickHouse/ClickHouse/pull/43866) ([Sema Checherinda](https://github.com/CheSema)). +* do not read part status just for logging [#43868](https://github.com/ClickHouse/ClickHouse/pull/43868) ([Sema Checherinda](https://github.com/CheSema)). +* Analyzer Context refactoring [#43884](https://github.com/ClickHouse/ClickHouse/pull/43884) ([Maksim Kita](https://github.com/kitaisreal)). +* Analyzer CTE resolution fix [#43893](https://github.com/ClickHouse/ClickHouse/pull/43893) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve release script [#43894](https://github.com/ClickHouse/ClickHouse/pull/43894) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Use only PRs to our repository in pr_info on push [#43895](https://github.com/ClickHouse/ClickHouse/pull/43895) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Join engine works with analyzer [#43897](https://github.com/ClickHouse/ClickHouse/pull/43897) ([Vladimir C](https://github.com/vdimir)). +* Fix reports [#43904](https://github.com/ClickHouse/ClickHouse/pull/43904) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix vim settings (and make it compatible with neovim) [#43909](https://github.com/ClickHouse/ClickHouse/pull/43909) ([Azat Khuzhin](https://github.com/azat)). +* Fix clang tidy errors introduced in [#43834](https://github.com/ClickHouse/ClickHouse/issues/43834) [#43911](https://github.com/ClickHouse/ClickHouse/pull/43911) ([Nikita Taranov](https://github.com/nickitat)). +* Fix BACKUP TO S3 for Google Cloud Storage [#43940](https://github.com/ClickHouse/ClickHouse/pull/43940) ([Azat Khuzhin](https://github.com/azat)). +* Fix tags workflow [#43942](https://github.com/ClickHouse/ClickHouse/pull/43942) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Generate missed changelogs for latest releases [#43944](https://github.com/ClickHouse/ClickHouse/pull/43944) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix typo in tests/ci/bugfix_validate_check.py [#43973](https://github.com/ClickHouse/ClickHouse/pull/43973) ([Vladimir C](https://github.com/vdimir)). +* Remove test logging of signal "EINTR" [#44001](https://github.com/ClickHouse/ClickHouse/pull/44001) ([Kruglov Pavel](https://github.com/Avogar)). +* Some cleanup of isDeterministic(InScopeOfQuery)() [#44011](https://github.com/ClickHouse/ClickHouse/pull/44011) ([Robert Schulze](https://github.com/rschu1ze)). +* Try to keep runners alive for longer [#44015](https://github.com/ClickHouse/ClickHouse/pull/44015) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix relaxed "too many parts" threshold [#44021](https://github.com/ClickHouse/ClickHouse/pull/44021) ([Sergei Trifonov](https://github.com/serxa)). +* Correct CompressionCodecGorilla exception message [#44023](https://github.com/ClickHouse/ClickHouse/pull/44023) ([Duc Canh Le](https://github.com/canhld94)). +* Fix exception message [#44034](https://github.com/ClickHouse/ClickHouse/pull/44034) ([Nikolay Degterinsky](https://github.com/evillique)). +* Update version_date.tsv and changelogs after v22.8.11.15-lts [#44035](https://github.com/ClickHouse/ClickHouse/pull/44035) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* do not hardlink serialization.json in new part [#44036](https://github.com/ClickHouse/ClickHouse/pull/44036) ([Sema Checherinda](https://github.com/CheSema)). +* Fix tracing of profile events [#44045](https://github.com/ClickHouse/ClickHouse/pull/44045) ([Anton Popov](https://github.com/CurtizJ)). +* Slightly better clickhouse disks and remove DiskMemory [#44050](https://github.com/ClickHouse/ClickHouse/pull/44050) ([alesapin](https://github.com/alesapin)). +* Assign release PRs [#44055](https://github.com/ClickHouse/ClickHouse/pull/44055) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Merging [#36877](https://github.com/ClickHouse/ClickHouse/issues/36877) [#44059](https://github.com/ClickHouse/ClickHouse/pull/44059) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* add changelogs [#44061](https://github.com/ClickHouse/ClickHouse/pull/44061) ([Dan Roscigno](https://github.com/DanRoscigno)). +* Fix the CACHE_PATH creation for default value [#44079](https://github.com/ClickHouse/ClickHouse/pull/44079) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix aspell [#44090](https://github.com/ClickHouse/ClickHouse/pull/44090) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix another bug in AggregateFunctionMinMaxAny [#44091](https://github.com/ClickHouse/ClickHouse/pull/44091) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Analyzer aggregate function lambda crash fix [#44098](https://github.com/ClickHouse/ClickHouse/pull/44098) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix -Wshorten-64-to-32 on FreeBSD and enable -Werror [#44121](https://github.com/ClickHouse/ClickHouse/pull/44121) ([Azat Khuzhin](https://github.com/azat)). +* Fix flaky test `02497_trace_events_stress_long` [#44124](https://github.com/ClickHouse/ClickHouse/pull/44124) ([Anton Popov](https://github.com/CurtizJ)). +* Minor file renaming [#44125](https://github.com/ClickHouse/ClickHouse/pull/44125) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix typo [#44127](https://github.com/ClickHouse/ClickHouse/pull/44127) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better descriptions of signals [#44129](https://github.com/ClickHouse/ClickHouse/pull/44129) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* make calls to be sure that parts are deleted [#44156](https://github.com/ClickHouse/ClickHouse/pull/44156) ([Sema Checherinda](https://github.com/CheSema)). +* Ignore "session expired" errors after BC check [#44157](https://github.com/ClickHouse/ClickHouse/pull/44157) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix incorrect assertion [#44160](https://github.com/ClickHouse/ClickHouse/pull/44160) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Close GRPC channels in tests [#44184](https://github.com/ClickHouse/ClickHouse/pull/44184) ([Antonio Andelic](https://github.com/antonio2368)). +* Remove misleading message from logs [#44190](https://github.com/ClickHouse/ClickHouse/pull/44190) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Minor clang-tidy fixes in fromUnixTimestamp64() [#44194](https://github.com/ClickHouse/ClickHouse/pull/44194) ([Igor Nikonov](https://github.com/devcrafter)). +* Hotfix for "check_status.tsv doesn't exists" in stress tests [#44197](https://github.com/ClickHouse/ClickHouse/pull/44197) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix documentation after [#42438](https://github.com/ClickHouse/ClickHouse/issues/42438) [#44200](https://github.com/ClickHouse/ClickHouse/pull/44200) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix an assertion in transactions [#44202](https://github.com/ClickHouse/ClickHouse/pull/44202) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add log message [#44237](https://github.com/ClickHouse/ClickHouse/pull/44237) ([Alexander Tokmakov](https://github.com/tavplubix)). + diff --git a/docs/changelogs/v22.9.7.34-stable.md b/docs/changelogs/v22.9.7.34-stable.md new file mode 100644 index 00000000000..042347b3815 --- /dev/null +++ b/docs/changelogs/v22.9.7.34-stable.md @@ -0,0 +1,37 @@ +--- +sidebar_position: 1 +sidebar_label: 2022 +--- + +# 2022 Changelog + +### ClickHouse release v22.9.7.34-stable (613fe09ca2e) FIXME as compared to v22.9.6.20-stable (ef6343f9579) + +#### Bug Fix +* Backported in [#43099](https://github.com/ClickHouse/ClickHouse/issues/43099): Updated normaliser to clone the alias ast. resolves [#42452](https://github.com/ClickHouse/ClickHouse/issues/42452) Implementation: * Updated QueryNormalizer to clone alias ast, when its replaced. Previously just assigning the same leads to exception in LogicalExpressinsOptimizer as it would be the same parent being inserted again. * This bug is not seen with new analyser (allow_experimental_analyzer), so no changes for it. I added a test for the same. [#42827](https://github.com/ClickHouse/ClickHouse/pull/42827) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). + +#### Build/Testing/Packaging Improvement +* Backported in [#44111](https://github.com/ClickHouse/ClickHouse/issues/44111): Bring sha512 sums back to the building step. [#44017](https://github.com/ClickHouse/ClickHouse/pull/44017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#43612](https://github.com/ClickHouse/ClickHouse/issues/43612): Fix bad inefficiency of `remote_filesystem_read_method=read` with filesystem cache. Closes [#42125](https://github.com/ClickHouse/ClickHouse/issues/42125). [#42129](https://github.com/ClickHouse/ClickHouse/pull/42129) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#43526](https://github.com/ClickHouse/ClickHouse/issues/43526): Fix incorrect UserTimeMicroseconds/SystemTimeMicroseconds accounting. [#42791](https://github.com/ClickHouse/ClickHouse/pull/42791) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#43518](https://github.com/ClickHouse/ClickHouse/issues/43518): Fix rare possible hung on query cancellation. [#42874](https://github.com/ClickHouse/ClickHouse/pull/42874) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#43752](https://github.com/ClickHouse/ClickHouse/issues/43752): An issue with the following exception has been reported while trying to read a Parquet file from S3 into ClickHouse:. [#43297](https://github.com/ClickHouse/ClickHouse/pull/43297) ([Arthur Passos](https://github.com/arthurpassos)). +* Backported in [#43618](https://github.com/ClickHouse/ClickHouse/issues/43618): Fix sumMap() for Nullable(Decimal()). [#43414](https://github.com/ClickHouse/ClickHouse/pull/43414) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#43887](https://github.com/ClickHouse/ClickHouse/issues/43887): Fixed `ALTER ... RESET SETTING` with `ON CLUSTER`. It could be applied to one replica only. Fixes [#43843](https://github.com/ClickHouse/ClickHouse/issues/43843). [#43848](https://github.com/ClickHouse/ClickHouse/pull/43848) ([Elena Torró](https://github.com/elenatorro)). +* Backported in [#44145](https://github.com/ClickHouse/ClickHouse/issues/44145): Fix undefined behavior in the `quantiles` function, which might lead to uninitialized memory. Found by fuzzer. This closes [#44066](https://github.com/ClickHouse/ClickHouse/issues/44066). [#44067](https://github.com/ClickHouse/ClickHouse/pull/44067) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Fix multipart upload for large S3 object, backport to 22.9'. [#44219](https://github.com/ClickHouse/ClickHouse/pull/44219) ([ianton-ru](https://github.com/ianton-ru)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Yet another fix for AggregateFunctionMinMaxAny [#43778](https://github.com/ClickHouse/ClickHouse/pull/43778) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Use only PRs to our repository in pr_info on push [#43895](https://github.com/ClickHouse/ClickHouse/pull/43895) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix tags workflow [#43942](https://github.com/ClickHouse/ClickHouse/pull/43942) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Assign release PRs [#44055](https://github.com/ClickHouse/ClickHouse/pull/44055) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix another bug in AggregateFunctionMinMaxAny [#44091](https://github.com/ClickHouse/ClickHouse/pull/44091) ([Alexander Tokmakov](https://github.com/tavplubix)). + diff --git a/docs/en/operations/settings/constraints-on-settings.md b/docs/en/operations/settings/constraints-on-settings.md index bb015f80834..4857bcd30c0 100644 --- a/docs/en/operations/settings/constraints-on-settings.md +++ b/docs/en/operations/settings/constraints-on-settings.md @@ -92,7 +92,7 @@ Code: 452, e.displayText() = DB::Exception: Setting force_index_by_date should n **Note:** the `default` profile has special handling: all the constraints defined for the `default` profile become the default constraints, so they restrict all the users until they’re overridden explicitly for these users. ## Constraints on Merge Tree Settings -It is possible to set constraints for [merge tree settings](merge-tree-settings.md). There constraints are applied when table with merge tree engine is created or its storage settings are altered. Name of merge tree setting must be prepended by `merge_tree_` prefix when referenced in `` section. +It is possible to set constraints for [merge tree settings](merge-tree-settings.md). These constraints are applied when table with merge tree engine is created or its storage settings are altered. Name of merge tree setting must be prepended by `merge_tree_` prefix when referenced in `` section. **Example:** Forbid to create new tables with explicitly specified `storage_policy` diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index ddfaab02159..a4b7495f4c9 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3588,6 +3588,24 @@ y Nullable(String) z IPv4 ``` +## input_format_try_infer_integers {#input_format_try_infer_integers} + +If enabled, ClickHouse will try to infer integers instead of floats in schema inference for text formats. If all numbers in the column from input data are integers, the result type will be `Int64`, if at least one number is float, the result type will be `Float64`. + +Enabled by default. + +## input_format_try_infer_dates {#input_format_try_infer_dates} + +If enabled, ClickHouse will try to infer type `Date` from string fields in schema inference for text formats. If all fields from a column in input data were successfully parsed as dates, the result type will be `Date`, if at least one field was not parsed as date, the result type will be `String`. + +Enabled by default. + +## input_format_try_infer_datetimes {#input_format_try_infer_datetimes} + +If enabled, ClickHouse will try to infer type `DateTime64` from string fields in schema inference for text formats. If all fields from a column in input data were successfully parsed as datetimes, the result type will be `DateTime64`, if at least one field was not parsed as datetime, the result type will be `String`. + +Enabled by default. + ## date_time_input_format {#date_time_input_format} Allows choosing a parser of the text representation of date and time. diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index aac0db208c6..bf88b9cedf2 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -607,3 +607,7 @@ dictGetString('prefix', 'asn', tuple(IPv6StringToNum('2001:db8::1'))) Other types are not supported yet. The function returns the attribute for the prefix that corresponds to this IP address. If there are overlapping prefixes, the most specific one is returned. Data must completely fit into RAM. + +## Related Content + +- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md index e4edad4d9a1..8e9dbd392aa 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md @@ -136,3 +136,7 @@ or SOURCE(CLICKHOUSE(... update_field 'added_time' update_lag 15)) ... ``` + +## Related Content + +- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md index 4eb96fe80a2..c5d48945649 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md @@ -824,3 +824,7 @@ Setting fields: :::note The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared. ::: + +## Related Content + +- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse) \ No newline at end of file diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md index 881630167e3..8271a342941 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md @@ -176,3 +176,6 @@ Configuration fields: - [Functions for working with dictionaries](../../../sql-reference/functions/ext-dict-functions.md). +## Related Content + +- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse) \ No newline at end of file diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md index 76ca3ac978f..a923511ca5e 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md @@ -52,3 +52,6 @@ LIFETIME(...) -- Lifetime of dictionary in memory - [structure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md) — Structure of the dictionary . A key and attributes that can be retrieved by this key. - [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) — Frequency of dictionary updates. +## Related Content + +- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse) \ No newline at end of file diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md index 9f922a2cccb..8621c68b428 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md @@ -79,3 +79,6 @@ You can convert values for a small dictionary by describing it in a `SELECT` que - [Dictionary Key and Fields](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md) - [Functions for Working with Dictionaries](../../../sql-reference/functions/ext-dict-functions.md) +## Related Content + +- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse) diff --git a/docs/en/sql-reference/functions/index.md b/docs/en/sql-reference/functions/index.md index 840bcd583e4..22e79ec6623 100644 --- a/docs/en/sql-reference/functions/index.md +++ b/docs/en/sql-reference/functions/index.md @@ -296,3 +296,7 @@ Another example is the `hostName` function, which returns the name of the server If a function in a query is performed on the requestor server, but you need to perform it on remote servers, you can wrap it in an ‘any’ aggregate function or add it to a key in `GROUP BY`. + +## Related Content + +- [User-defined functions in ClickHouse Cloud](https://clickhouse.com/blog/user-defined-functions-clickhouse-udfs) diff --git a/docs/en/sql-reference/statements/create/function.md b/docs/en/sql-reference/statements/create/function.md index 90be007bf43..80d20e8ccad 100644 --- a/docs/en/sql-reference/statements/create/function.md +++ b/docs/en/sql-reference/statements/create/function.md @@ -58,3 +58,7 @@ Result: │ 2 │ even │ └────────┴──────────────────────────────────────┘ ``` + +## Related Content + +- [User-defined functions in ClickHouse Cloud](https://clickhouse.com/blog/user-defined-functions-clickhouse-udfs) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 33d11091660..1614fb1a8b4 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 574e9bfa62e..db17a6a7bee 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -60,7 +60,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Access/SettingsConstraints.cpp b/src/Access/SettingsConstraints.cpp index 9983adcb417..dc45cd5789a 100644 --- a/src/Access/SettingsConstraints.cpp +++ b/src/Access/SettingsConstraints.cpp @@ -86,6 +86,49 @@ void SettingsConstraints::merge(const SettingsConstraints & other) } +void SettingsConstraints::check(const Settings & current_settings, const SettingsProfileElements & profile_elements) const +{ + for (const auto & element : profile_elements) + { + if (SettingsProfileElements::isAllowBackupSetting(element.setting_name)) + continue; + + if (!element.value.isNull()) + { + SettingChange value(element.setting_name, element.value); + check(current_settings, value); + } + + if (!element.min_value.isNull()) + { + SettingChange value(element.setting_name, element.min_value); + check(current_settings, value); + } + + if (!element.max_value.isNull()) + { + SettingChange value(element.setting_name, element.max_value); + check(current_settings, value); + } + + SettingConstraintWritability new_value = SettingConstraintWritability::WRITABLE; + SettingConstraintWritability old_value = SettingConstraintWritability::WRITABLE; + + if (element.writability) + new_value = *element.writability; + + auto it = constraints.find(element.setting_name); + if (it != constraints.end()) + old_value = it->second.writability; + + if (new_value != old_value) + { + if (old_value == SettingConstraintWritability::CONST) + throw Exception("Setting " + element.setting_name + " should not be changed", ErrorCodes::SETTING_CONSTRAINT_VIOLATION); + } + } +} + void SettingsConstraints::check(const Settings & current_settings, const SettingChange & change) const { checkImpl(current_settings, const_cast(change), THROW_ON_VIOLATION); diff --git a/src/Access/SettingsConstraints.h b/src/Access/SettingsConstraints.h index ed671774281..2e9f12d0b9c 100644 --- a/src/Access/SettingsConstraints.h +++ b/src/Access/SettingsConstraints.h @@ -73,6 +73,7 @@ public: void merge(const SettingsConstraints & other); /// Checks whether `change` violates these constraints and throws an exception if so. + void check(const Settings & current_settings, const SettingsProfileElements & profile_elements) const; void check(const Settings & current_settings, const SettingChange & change) const; void check(const Settings & current_settings, const SettingsChanges & changes) const; void check(const Settings & current_settings, SettingsChanges & changes) const; diff --git a/src/Access/SettingsProfileElement.cpp b/src/Access/SettingsProfileElement.cpp index 474ffec0d21..a4f3e81ab30 100644 --- a/src/Access/SettingsProfileElement.cpp +++ b/src/Access/SettingsProfileElement.cpp @@ -248,4 +248,9 @@ bool SettingsProfileElements::isBackupAllowed() const return true; } +bool SettingsProfileElements::isAllowBackupSetting(const String & setting_name) +{ + return setting_name == ALLOW_BACKUP_SETTING_NAME; +} + } diff --git a/src/Access/SettingsProfileElement.h b/src/Access/SettingsProfileElement.h index c02e9947d61..c4dcf4d83a7 100644 --- a/src/Access/SettingsProfileElement.h +++ b/src/Access/SettingsProfileElement.h @@ -71,6 +71,8 @@ public: std::vector toProfileIDs() const; bool isBackupAllowed() const; + + static bool isAllowBackupSetting(const String & setting_name); }; } diff --git a/src/AggregateFunctions/AggregateFunctionFlameGraph.cpp b/src/AggregateFunctions/AggregateFunctionFlameGraph.cpp new file mode 100644 index 00000000000..5fc6b21926e --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionFlameGraph.cpp @@ -0,0 +1,647 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int FUNCTION_NOT_ALLOWED; + extern const int NOT_IMPLEMENTED; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +struct AggregateFunctionFlameGraphTree +{ + struct ListNode; + + struct TreeNode + { + TreeNode * parent = nullptr; + ListNode * children = nullptr; + UInt64 ptr = 0; + size_t allocated = 0; + }; + + struct ListNode + { + ListNode * next = nullptr; + TreeNode * child = nullptr; + }; + + TreeNode root; + + static ListNode * createChild(TreeNode * parent, UInt64 ptr, Arena * arena) + { + + ListNode * list_node = reinterpret_cast(arena->alloc(sizeof(ListNode))); + TreeNode * tree_node = reinterpret_cast(arena->alloc(sizeof(TreeNode))); + + list_node->child = tree_node; + list_node->next = nullptr; + + tree_node->parent =parent; + tree_node->children = nullptr; + tree_node->ptr = ptr; + tree_node->allocated = 0; + + return list_node; + } + + TreeNode * find(const UInt64 * stack, size_t stack_size, Arena * arena) + { + TreeNode * node = &root; + for (size_t i = 0; i < stack_size; ++i) + { + UInt64 ptr = stack[i]; + if (ptr == 0) + break; + + if (!node->children) + { + node->children = createChild(node, ptr, arena); + node = node->children->child; + } + else + { + ListNode * list = node->children; + while (list->child->ptr != ptr && list->next) + list = list->next; + + if (list->child->ptr != ptr) + { + list->next = createChild(node, ptr, arena); + list = list->next; + } + + node = list->child; + } + } + + return node; + } + + static void append(DB::PaddedPODArray & values, DB::PaddedPODArray & offsets, std::vector & frame) + { + UInt64 prev = offsets.empty() ? 0 : offsets.back(); + offsets.push_back(prev + frame.size()); + for (UInt64 val : frame) + values.push_back(val); + } + + struct Trace + { + using Frames = std::vector; + + Frames frames; + + /// The total number of bytes allocated for traces with the same prefix. + size_t allocated_total = 0; + /// This counter is relevant in case we want to filter some traces with small amount of bytes. + /// It shows the total number of bytes for *filtered* traces with the same prefix. + /// This is the value which is used in flamegraph. + size_t allocated_self = 0; + }; + + using Traces = std::vector; + + Traces dump(size_t max_depth, size_t min_bytes) const + { + Traces traces; + Trace::Frames frames; + std::vector allocated_total; + std::vector allocated_self; + std::vector nodes; + + nodes.push_back(root.children); + allocated_total.push_back(root.allocated); + allocated_self.push_back(root.allocated); + + while (!nodes.empty()) + { + if (nodes.back() == nullptr) + { + traces.push_back({frames, allocated_total.back(), allocated_self.back()}); + + nodes.pop_back(); + allocated_total.pop_back(); + allocated_self.pop_back(); + + /// We don't have root's frame so framers are empty in the end. + if (!frames.empty()) + frames.pop_back(); + + continue; + } + + TreeNode * current = nodes.back()->child; + nodes.back() = nodes.back()->next; + + bool enough_bytes = current->allocated >= min_bytes; + bool enough_depth = max_depth == 0 || nodes.size() < max_depth; + + if (enough_bytes) + { + frames.push_back(current->ptr); + allocated_self.back() -= current->allocated; + + if (enough_depth) + { + allocated_total.push_back(current->allocated); + allocated_self.push_back(current->allocated); + nodes.push_back(current->children); + } + else + { + traces.push_back({frames, current->allocated, current->allocated}); + frames.pop_back(); + } + } + } + + return traces; + } +}; + +static void insertData(DB::PaddedPODArray & chars, DB::PaddedPODArray & offsets, const char * pos, size_t length) +{ + const size_t old_size = chars.size(); + const size_t new_size = old_size + length + 1; + + chars.resize(new_size); + if (length) + memcpy(chars.data() + old_size, pos, length); + chars[old_size + length] = 0; + offsets.push_back(new_size); +} + +/// Split str by line feed and write as separate row to ColumnString. +static void fillColumn(DB::PaddedPODArray & chars, DB::PaddedPODArray & offsets, const std::string & str) +{ + size_t start = 0; + size_t end = 0; + size_t size = str.size(); + + while (end < size) + { + if (str[end] == '\n') + { + insertData(chars, offsets, str.data() + start, end - start); + start = end + 1; + } + + ++end; + } + + if (start < end) + insertData(chars, offsets, str.data() + start, end - start); +} + +void dumpFlameGraph( + const AggregateFunctionFlameGraphTree::Traces & traces, + DB::PaddedPODArray & chars, + DB::PaddedPODArray & offsets) +{ + DB::WriteBufferFromOwnString out; + + std::unordered_map mapping; + +#if defined(__ELF__) && !defined(OS_FREEBSD) + auto symbol_index_ptr = DB::SymbolIndex::instance(); + const DB::SymbolIndex & symbol_index = *symbol_index_ptr; +#endif + + for (const auto & trace : traces) + { + if (trace.allocated_self == 0) + continue; + + for (size_t i = 0; i < trace.frames.size(); ++i) + { + if (i) + out << ";"; + + const void * ptr = reinterpret_cast(trace.frames[i]); + +#if defined(__ELF__) && !defined(OS_FREEBSD) + if (const auto * symbol = symbol_index.findSymbol(ptr)) + writeString(demangle(symbol->name), out); + else + DB::writePointerHex(ptr, out); +#else + DB::writePointerHex(ptr, out); +#endif + } + + out << ' ' << trace.allocated_self << "\n"; + } + + fillColumn(chars, offsets, out.str()); +} + +struct AggregateFunctionFlameGraphData +{ + struct Entry + { + AggregateFunctionFlameGraphTree::TreeNode * trace; + UInt64 size; + Entry * next = nullptr; + }; + + struct Pair + { + Entry * allocation = nullptr; + Entry * deallocation = nullptr; + }; + + using Entries = HashMap; + + AggregateFunctionFlameGraphTree tree; + Entries entries; + Entry * free_list = nullptr; + + Entry * alloc(Arena * arena) + { + if (free_list) + { + auto * res = free_list; + free_list = free_list->next; + return res; + } + + return reinterpret_cast(arena->alloc(sizeof(Entry))); + } + + void release(Entry * entry) + { + entry->next = free_list; + free_list = entry; + } + + static void track(Entry * allocation) + { + auto * node = allocation->trace; + while (node) + { + node->allocated += allocation->size; + node = node->parent; + } + } + + static void untrack(Entry * allocation) + { + auto * node = allocation->trace; + while (node) + { + node->allocated -= allocation->size; + node = node->parent; + } + } + + static Entry * tryFindMatchAndRemove(Entry *& list, UInt64 size) + { + if (!list) + return nullptr; + + if (list->size == size) + { + Entry * entry = list; + list = list->next; + return entry; + } + else + { + Entry * parent = list; + while (parent->next && parent->next->size != size) + parent = parent->next; + + if (parent->next && parent->next->size == size) + { + Entry * entry = parent->next; + parent->next = entry->next; + return entry; + } + + return nullptr; + } + } + + void add(UInt64 ptr, Int64 size, const UInt64 * stack, size_t stack_size, Arena * arena) + { + /// In case if argument is nullptr, only track allocations. + if (ptr == 0) + { + if (size > 0) + { + auto * node = tree.find(stack, stack_size, arena); + Entry entry{.trace = node, .size = UInt64(size)}; + track(&entry); + } + + return; + } + + auto & place = entries[ptr]; + if (size > 0) + { + if (auto * deallocation = tryFindMatchAndRemove(place.deallocation, size)) + { + release(deallocation); + } + else + { + auto * node = tree.find(stack, stack_size, arena); + + auto * allocation = alloc(arena); + allocation->size = UInt64(size); + allocation->trace = node; + + track(allocation); + + allocation->next = place.allocation; + place.allocation = allocation; + } + } + else if (size < 0) + { + UInt64 abs_size = -size; + if (auto * allocation = tryFindMatchAndRemove(place.allocation, abs_size)) + { + untrack(allocation); + release(allocation); + } + else + { + auto * deallocation = alloc(arena); + deallocation->size = abs_size; + + deallocation->next = place.deallocation; + place.deallocation = deallocation; + } + } + } + + void merge(const AggregateFunctionFlameGraphTree & other_tree, Arena * arena) + { + AggregateFunctionFlameGraphTree::Trace::Frames frames; + std::vector nodes; + + nodes.push_back(other_tree.root.children); + + while (!nodes.empty()) + { + if (nodes.back() == nullptr) + { + nodes.pop_back(); + + /// We don't have root's frame so framers are empty in the end. + if (!frames.empty()) + frames.pop_back(); + + continue; + } + + AggregateFunctionFlameGraphTree::TreeNode * current = nodes.back()->child; + nodes.back() = nodes.back()->next; + + frames.push_back(current->ptr); + + if (current->children) + nodes.push_back(current->children); + else + { + if (current->allocated) + add(0, current->allocated, frames.data(), frames.size(), arena); + + frames.pop_back(); + } + } + } + + void merge(const AggregateFunctionFlameGraphData & other, Arena * arena) + { + AggregateFunctionFlameGraphTree::Trace::Frames frames; + for (const auto & entry : other.entries) + { + for (auto * allocation = entry.value.second.allocation; allocation; allocation = allocation->next) + { + frames.clear(); + const auto * node = allocation->trace; + while (node->ptr) + { + frames.push_back(node->ptr); + node = node->parent; + } + + std::reverse(frames.begin(), frames.end()); + add(entry.value.first, allocation->size, frames.data(), frames.size(), arena); + untrack(allocation); + } + + for (auto * deallocation = entry.value.second.deallocation; deallocation; deallocation = deallocation->next) + { + add(entry.value.first, -Int64(deallocation->size), nullptr, 0, arena); + } + } + + merge(other.tree, arena); + } + + void dumpFlameGraph( + DB::PaddedPODArray & chars, + DB::PaddedPODArray & offsets, + size_t max_depth, size_t min_bytes) const + { + DB::dumpFlameGraph(tree.dump(max_depth, min_bytes), chars, offsets); + } +}; + +/// Aggregate function which builds a flamegraph using the list of stacktraces. +/// The output is an array of strings which can be used by flamegraph.pl util. +/// See https://github.com/brendangregg/FlameGraph +/// +/// Syntax: flameGraph(traces, [size = 1], [ptr = 0]) +/// - trace : Array(UInt64), a stacktrace +/// - size : Int64, an allocation size (for memory profiling) +/// - ptr : UInt64, an allocation address +/// In case if ptr != 0, a flameGraph will map allocations (size > 0) and deallocations (size < 0) with the same size and ptr. +/// Only allocations which were not freed are shown. Not mapped deallocations are ignored. +/// +/// Usage: +/// +/// * Build a flamegraph based on CPU query profiler +/// set query_profiler_cpu_time_period_ns=10000000; +/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +/// clickhouse client --allow_introspection_functions=1 +/// -q "select arrayJoin(flameGraph(arrayReverse(trace))) from system.trace_log where trace_type = 'CPU' and query_id = 'xxx'" +/// | ~/dev/FlameGraph/flamegraph.pl > flame_cpu.svg +/// +/// * Build a flamegraph based on memory query profiler, showing all allocations +/// set memory_profiler_sample_probability=1, max_untracked_memory=1; +/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +/// clickhouse client --allow_introspection_functions=1 +/// -q "select arrayJoin(flameGraph(trace, size)) from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx'" +/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem.svg +/// +/// * Build a flamegraph based on memory query profiler, showing allocations which were not deallocated in query context +/// set memory_profiler_sample_probability=1, max_untracked_memory=1, use_uncompressed_cache=1, merge_tree_max_rows_to_use_cache=100000000000, merge_tree_max_bytes_to_use_cache=1000000000000; +/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +/// clickhouse client --allow_introspection_functions=1 +/// -q "select arrayJoin(flameGraph(trace, size, ptr)) from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx'" +/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_untracked.svg +/// +/// * Build a flamegraph based on memory query profiler, showing active allocations at the fixed point of time +/// set memory_profiler_sample_probability=1, max_untracked_memory=1; +/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +/// 1. Memory usage per second +/// select event_time, m, formatReadableSize(max(s) as m) from (select event_time, sum(size) over (order by event_time) as s from system.trace_log where query_id = 'xxx' and trace_type = 'MemorySample') group by event_time order by event_time; +/// 2. Find a time point with maximal memory usage +/// select argMax(event_time, s), max(s) from (select event_time, sum(size) over (order by event_time) as s from system.trace_log where query_id = 'xxx' and trace_type = 'MemorySample'); +/// 3. Fix active allocations at fixed point of time +/// clickhouse client --allow_introspection_functions=1 +/// -q "select arrayJoin(flameGraph(trace, size, ptr)) from (select * from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx' and event_time <= 'yyy' order by event_time)" +/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_time_point_pos.svg +/// 4. Find deallocations at fixed point of time +/// clickhouse client --allow_introspection_functions=1 +/// -q "select arrayJoin(flameGraph(trace, -size, ptr)) from (select * from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx' and event_time > 'yyy' order by event_time desc)" +/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_time_point_neg.svg +class AggregateFunctionFlameGraph final : public IAggregateFunctionDataHelper +{ +public: + explicit AggregateFunctionFlameGraph(const DataTypes & argument_types_) + : IAggregateFunctionDataHelper(argument_types_, {}) + {} + + String getName() const override { return "flameGraph"; } + + DataTypePtr getReturnType() const override + { + return std::make_shared(std::make_shared()); + } + + bool allocatesMemoryInArena() const override { return true; } + + void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override + { + const auto * trace = typeid_cast(columns[0]); + + const auto & trace_offsets = trace->getOffsets(); + const auto & trace_values = typeid_cast(&trace->getData())->getData(); + UInt64 prev_offset = 0; + if (row_num) + prev_offset = trace_offsets[row_num - 1]; + UInt64 trace_size = trace_offsets[row_num] - prev_offset; + + Int64 allocated = 1; + if (argument_types.size() >= 2) + { + const auto & sizes = typeid_cast(columns[1])->getData(); + allocated = sizes[row_num]; + } + + UInt64 ptr = 0; + if (argument_types.size() >= 3) + { + const auto & ptrs = typeid_cast(columns[2])->getData(); + ptr = ptrs[row_num]; + } + + this->data(place).add(ptr, allocated, trace_values.data() + prev_offset, trace_size, arena); + } + + void addManyDefaults( + AggregateDataPtr __restrict /*place*/, + const IColumn ** /*columns*/, + size_t /*length*/, + Arena * /*arena*/) const override + { + } + + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override + { + this->data(place).merge(this->data(rhs), arena); + } + + void serialize(ConstAggregateDataPtr __restrict, WriteBuffer &, std::optional /* version */) const override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Serialization for function flameGraph is not implemented."); + } + + void deserialize(AggregateDataPtr __restrict, ReadBuffer &, std::optional /* version */, Arena *) const override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Deserialization for function flameGraph is not implemented."); + } + + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override + { + auto & array = assert_cast(to); + auto & str = assert_cast(array.getData()); + + this->data(place).dumpFlameGraph(str.getChars(), str.getOffsets(), 0, 0); + + array.getOffsets().push_back(str.size()); + } +}; + +static void check(const std::string & name, const DataTypes & argument_types, const Array & params) +{ + assertNoParameters(name, params); + + if (argument_types.empty() || argument_types.size() > 3) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Aggregate function {} requires 1 to 3 arguments : trace, [size = 1], [ptr = 0]", + name); + + auto ptr_type = std::make_shared(); + auto trace_type = std::make_shared(ptr_type); + auto size_type = std::make_shared(); + + if (!argument_types[0]->equals(*trace_type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument (trace) for function {} must be Array(UInt64), but it has type {}", + name, argument_types[0]->getName()); + + if (argument_types.size() >= 2 && !argument_types[1]->equals(*size_type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Second argument (size) for function {} must be Int64, but it has type {}", + name, argument_types[1]->getName()); + + if (argument_types.size() >= 3 && !argument_types[2]->equals(*ptr_type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Third argument (ptr) for function {} must be UInt64, but it has type {}", + name, argument_types[2]->getName()); +} + +AggregateFunctionPtr createAggregateFunctionFlameGraph(const std::string & name, const DataTypes & argument_types, const Array & params, const Settings * settings) +{ + if (!settings->allow_introspection_functions) + throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, + "Introspection functions are disabled, because setting 'allow_introspection_functions' is set to 0"); + + check(name, argument_types, params); + return std::make_shared(argument_types); +} + +void registerAggregateFunctionFlameGraph(AggregateFunctionFactory & factory) +{ + AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = true }; + + factory.registerFunction("flameGraph", { createAggregateFunctionFlameGraph, properties }); +} + +} diff --git a/src/AggregateFunctions/registerAggregateFunctions.cpp b/src/AggregateFunctions/registerAggregateFunctions.cpp index ecf6ab51367..08524cc9f97 100644 --- a/src/AggregateFunctions/registerAggregateFunctions.cpp +++ b/src/AggregateFunctions/registerAggregateFunctions.cpp @@ -73,6 +73,7 @@ void registerAggregateFunctionExponentialMovingAverage(AggregateFunctionFactory void registerAggregateFunctionSparkbar(AggregateFunctionFactory &); void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &); void registerAggregateFunctionAnalysisOfVariance(AggregateFunctionFactory &); +void registerAggregateFunctionFlameGraph(AggregateFunctionFactory &); class AggregateFunctionCombinatorFactory; void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &); @@ -158,6 +159,7 @@ void registerAggregateFunctions() registerAggregateFunctionExponentialMovingAverage(factory); registerAggregateFunctionSparkbar(factory); registerAggregateFunctionAnalysisOfVariance(factory); + registerAggregateFunctionFlameGraph(factory); registerWindowFunctions(factory); } diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 654651e34be..6bc3c86edf0 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -179,15 +179,6 @@ void BackupWriterS3::copyObjectImpl( } -Aws::S3::Model::HeadObjectOutcome BackupWriterS3::requestObjectHeadData(const std::string & bucket_from, const std::string & key) const -{ - Aws::S3::Model::HeadObjectRequest request; - request.SetBucket(bucket_from); - request.SetKey(key); - - return client->HeadObject(request); -} - void BackupWriterS3::copyObjectMultipartImpl( const String & src_bucket, const String & src_key, @@ -310,7 +301,7 @@ void BackupWriterS3::copyFileNative(DiskPtr from_disk, const String & file_name_ std::string source_bucket = object_storage->getObjectsNamespace(); auto file_path = fs::path(s3_uri.key) / file_name_to; - auto head = requestObjectHeadData(source_bucket, objects[0].absolute_path).GetResult(); + auto head = S3::headObject(*client, source_bucket, objects[0].absolute_path).GetResult(); if (static_cast(head.GetContentLength()) < request_settings.max_single_operation_copy_size) { copyObjectImpl( diff --git a/src/Backups/BackupIO_S3.h b/src/Backups/BackupIO_S3.h index 128bea28642..634b35c1e74 100644 --- a/src/Backups/BackupIO_S3.h +++ b/src/Backups/BackupIO_S3.h @@ -62,8 +62,6 @@ public: void copyFileNative(DiskPtr from_disk, const String & file_name_from, const String & file_name_to) override; private: - Aws::S3::Model::HeadObjectOutcome requestObjectHeadData(const std::string & bucket_from, const std::string & key) const; - void copyObjectImpl( const String & src_bucket, const String & src_key, diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index bd8b221e2ba..99c3c0c3fa2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -106,6 +106,7 @@ if (TARGET ch_contrib::nats_io) endif() add_headers_and_sources(dbms Storages/MeiliSearch) +add_headers_and_sources(dbms Storages/NamedCollections) if (TARGET ch_contrib::amqp_cpp) add_headers_and_sources(dbms Storages/RabbitMQ) diff --git a/src/Common/AllocationTrace.h b/src/Common/AllocationTrace.h new file mode 100644 index 00000000000..332808c8015 --- /dev/null +++ b/src/Common/AllocationTrace.h @@ -0,0 +1,16 @@ +#pragma once +#include + +/// This is a structure which is returned by MemoryTracker. +/// Methods onAlloc/onFree should be called after actual memory allocation if it succeed. +/// For now, it will only collect allocation trace with sample_probability. +struct AllocationTrace +{ + AllocationTrace() = default; + explicit AllocationTrace(double sample_probability_); + + void onAlloc(void * ptr, size_t size) const; + void onFree(void * ptr, size_t size) const; + + double sample_probability = 0; +}; diff --git a/src/Common/Allocator.h b/src/Common/Allocator.h index c348eaea006..8c4f2ef1690 100644 --- a/src/Common/Allocator.h +++ b/src/Common/Allocator.h @@ -92,8 +92,10 @@ public: void * alloc(size_t size, size_t alignment = 0) { checkSize(size); - CurrentMemoryTracker::alloc(size); - return allocNoTrack(size, alignment); + auto trace = CurrentMemoryTracker::alloc(size); + void * ptr = allocNoTrack(size, alignment); + trace.onAlloc(ptr, size); + return ptr; } /// Free memory range. @@ -103,7 +105,8 @@ public: { checkSize(size); freeNoTrack(buf, size); - CurrentMemoryTracker::free(size); + auto trace = CurrentMemoryTracker::free(size); + trace.onFree(buf, size); } catch (...) { @@ -129,13 +132,16 @@ public: && alignment <= MALLOC_MIN_ALIGNMENT) { /// Resize malloc'd memory region with no special alignment requirement. - CurrentMemoryTracker::realloc(old_size, new_size); + auto trace = CurrentMemoryTracker::realloc(old_size, new_size); + trace.onFree(buf, old_size); void * new_buf = ::realloc(buf, new_size); if (nullptr == new_buf) DB::throwFromErrno(fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); buf = new_buf; + trace.onAlloc(buf, new_size); + if constexpr (clear_memory) if (new_size > old_size) memset(reinterpret_cast(buf) + old_size, 0, new_size - old_size); @@ -143,7 +149,8 @@ public: else if (old_size >= MMAP_THRESHOLD && new_size >= MMAP_THRESHOLD) { /// Resize mmap'd memory region. - CurrentMemoryTracker::realloc(old_size, new_size); + auto trace = CurrentMemoryTracker::realloc(old_size, new_size); + trace.onFree(buf, old_size); // On apple and freebsd self-implemented mremap used (common/mremap.h) buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE, @@ -152,14 +159,17 @@ public: DB::throwFromErrno(fmt::format("Allocator: Cannot mremap memory chunk from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_MREMAP); + trace.onAlloc(buf, new_size); /// No need for zero-fill, because mmap guarantees it. } else if (new_size < MMAP_THRESHOLD) { /// Small allocs that requires a copy. Assume there's enough memory in system. Call CurrentMemoryTracker once. - CurrentMemoryTracker::realloc(old_size, new_size); + auto trace = CurrentMemoryTracker::realloc(old_size, new_size); + trace.onFree(buf, old_size); void * new_buf = allocNoTrack(new_size, alignment); + trace.onAlloc(new_buf, new_size); memcpy(new_buf, buf, std::min(old_size, new_size)); freeNoTrack(buf, old_size); buf = new_buf; diff --git a/src/Common/AllocatorWithMemoryTracking.h b/src/Common/AllocatorWithMemoryTracking.h index 815c326ed62..b43870e05b2 100644 --- a/src/Common/AllocatorWithMemoryTracking.h +++ b/src/Common/AllocatorWithMemoryTracking.h @@ -30,21 +30,24 @@ struct AllocatorWithMemoryTracking throw std::bad_alloc(); size_t bytes = n * sizeof(T); - CurrentMemoryTracker::alloc(bytes); + auto trace = CurrentMemoryTracker::alloc(bytes); T * p = static_cast(malloc(bytes)); if (!p) throw std::bad_alloc(); + trace.onAlloc(p, bytes); + return p; } void deallocate(T * p, size_t n) noexcept { - free(p); - size_t bytes = n * sizeof(T); - CurrentMemoryTracker::free(bytes); + + free(p); + auto trace = CurrentMemoryTracker::free(bytes); + trace.onFree(p, bytes); } }; diff --git a/src/Common/CurrentMemoryTracker.cpp b/src/Common/CurrentMemoryTracker.cpp index 720df07efb9..0147a095185 100644 --- a/src/Common/CurrentMemoryTracker.cpp +++ b/src/Common/CurrentMemoryTracker.cpp @@ -37,7 +37,7 @@ MemoryTracker * getMemoryTracker() using DB::current_thread; -void CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded) +AllocationTrace CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded) { #ifdef MEMORY_TRACKER_DEBUG_CHECKS if (unlikely(memory_tracker_always_throw_logical_error_on_allocation)) @@ -55,8 +55,9 @@ void CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded) if (will_be > current_thread->untracked_memory_limit) { - memory_tracker->allocImpl(will_be, throw_if_memory_exceeded); + auto res = memory_tracker->allocImpl(will_be, throw_if_memory_exceeded); current_thread->untracked_memory = 0; + return res; } else { @@ -68,36 +69,40 @@ void CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded) /// total_memory_tracker only, ignore untracked_memory else { - memory_tracker->allocImpl(size, throw_if_memory_exceeded); + return memory_tracker->allocImpl(size, throw_if_memory_exceeded); } + + return AllocationTrace(memory_tracker->getSampleProbability()); } + + return AllocationTrace(0); } void CurrentMemoryTracker::check() { if (auto * memory_tracker = getMemoryTracker()) - memory_tracker->allocImpl(0, true); + std::ignore = memory_tracker->allocImpl(0, true); } -void CurrentMemoryTracker::alloc(Int64 size) +AllocationTrace CurrentMemoryTracker::alloc(Int64 size) { bool throw_if_memory_exceeded = true; - allocImpl(size, throw_if_memory_exceeded); + return allocImpl(size, throw_if_memory_exceeded); } -void CurrentMemoryTracker::allocNoThrow(Int64 size) +AllocationTrace CurrentMemoryTracker::allocNoThrow(Int64 size) { bool throw_if_memory_exceeded = false; - allocImpl(size, throw_if_memory_exceeded); + return allocImpl(size, throw_if_memory_exceeded); } -void CurrentMemoryTracker::realloc(Int64 old_size, Int64 new_size) +AllocationTrace CurrentMemoryTracker::realloc(Int64 old_size, Int64 new_size) { Int64 addition = new_size - old_size; - addition > 0 ? alloc(addition) : free(-addition); + return addition > 0 ? alloc(addition) : free(-addition); } -void CurrentMemoryTracker::free(Int64 size) +AllocationTrace CurrentMemoryTracker::free(Int64 size) { if (auto * memory_tracker = getMemoryTracker()) { @@ -106,15 +111,20 @@ void CurrentMemoryTracker::free(Int64 size) current_thread->untracked_memory -= size; if (current_thread->untracked_memory < -current_thread->untracked_memory_limit) { - memory_tracker->free(-current_thread->untracked_memory); + Int64 untracked_memory = current_thread->untracked_memory; current_thread->untracked_memory = 0; + return memory_tracker->free(-untracked_memory); } } /// total_memory_tracker only, ignore untracked_memory else { - memory_tracker->free(size); + return memory_tracker->free(size); } + + return AllocationTrace(memory_tracker->getSampleProbability()); } + + return AllocationTrace(0); } diff --git a/src/Common/CurrentMemoryTracker.h b/src/Common/CurrentMemoryTracker.h index e125e4cbe4a..ba46f458e4a 100644 --- a/src/Common/CurrentMemoryTracker.h +++ b/src/Common/CurrentMemoryTracker.h @@ -1,19 +1,20 @@ #pragma once #include +#include /// Convenience methods, that use current thread's memory_tracker if it is available. struct CurrentMemoryTracker { /// Call the following functions before calling of corresponding operations with memory allocators. - static void alloc(Int64 size); - static void allocNoThrow(Int64 size); - static void realloc(Int64 old_size, Int64 new_size); + [[nodiscard]] static AllocationTrace alloc(Int64 size); + [[nodiscard]] static AllocationTrace allocNoThrow(Int64 size); + [[nodiscard]] static AllocationTrace realloc(Int64 old_size, Int64 new_size); /// This function should be called after memory deallocation. - static void free(Int64 size); + [[nodiscard]] static AllocationTrace free(Int64 size); static void check(); private: - static void allocImpl(Int64 size, bool throw_if_memory_exceeded); + [[nodiscard]] static AllocationTrace allocImpl(Int64 size, bool throw_if_memory_exceeded); }; diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 6878533c2fd..5e692101354 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -103,6 +103,7 @@ M(S3Requests, "S3 requests") \ M(KeeperAliveConnections, "Number of alive connections") \ M(KeeperOutstandingRequets, "Number of outstanding requests") \ + M(ThreadsInOvercommitTracker, "Number of waiting threads inside of OvercommitTracker") \ namespace CurrentMetrics { diff --git a/src/Common/FiberStack.h b/src/Common/FiberStack.h index c55608311d0..91bb632d807 100644 --- a/src/Common/FiberStack.h +++ b/src/Common/FiberStack.h @@ -57,7 +57,8 @@ public: } /// Do not count guard page in memory usage. - CurrentMemoryTracker::alloc(num_pages * page_size); + auto trace = CurrentMemoryTracker::alloc(num_pages * page_size); + trace.onAlloc(vp, num_pages * page_size); boost::context::stack_context sctx; sctx.size = num_bytes; @@ -77,6 +78,7 @@ public: ::munmap(vp, sctx.size); /// Do not count guard page in memory usage. - CurrentMemoryTracker::free(sctx.size - page_size); + auto trace = CurrentMemoryTracker::free(sctx.size - page_size); + trace.onFree(vp, sctx.size - page_size); } }; diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 27d0adcf24f..d0d0d6b8686 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -1,6 +1,7 @@ #include "MemoryTracker.h" #include +#include #include #include #include @@ -82,6 +83,53 @@ inline std::string_view toDescription(OvercommitResult result) } } +bool shouldTrackAllocation(DB::Float64 probability, void * ptr) +{ + return sipHash64(uintptr_t(ptr)) < std::numeric_limits::max() * probability; +} + +AllocationTrace updateAllocationTrace(AllocationTrace trace, const std::optional & sample_probability) +{ + if (unlikely(sample_probability)) + return AllocationTrace(*sample_probability); + + return trace; +} + +AllocationTrace getAllocationTrace(std::optional & sample_probability) +{ + if (unlikely(sample_probability)) + return AllocationTrace(*sample_probability); + + return AllocationTrace(0); +} + +} + +AllocationTrace::AllocationTrace(double sample_probability_) : sample_probability(sample_probability_) {} + +void AllocationTrace::onAlloc(void * ptr, size_t size) const +{ + if (likely(sample_probability == 0)) + return; + + if (sample_probability < 1 && !shouldTrackAllocation(sample_probability, ptr)) + return; + + MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); + DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = Int64(size), .ptr = ptr}); +} + +void AllocationTrace::onFree(void * ptr, size_t size) const +{ + if (likely(sample_probability == 0)) + return; + + if (sample_probability < 1 && !shouldTrackAllocation(sample_probability, ptr)) + return; + + MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); + DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = -Int64(size), .ptr = ptr}); } namespace ProfileEvents @@ -135,7 +183,7 @@ void MemoryTracker::logMemoryUsage(Int64 current) const } -void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker) +AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker) { if (size < 0) throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Negative size ({}) is passed to MemoryTracker. It is a bug.", size); @@ -154,9 +202,14 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT /// Since the MemoryTrackerBlockerInThread should respect the level, we should go to the next parent. if (auto * loaded_next = parent.load(std::memory_order_relaxed)) - loaded_next->allocImpl(size, throw_if_memory_exceeded, - level == VariableContext::Process ? this : query_tracker); - return; + { + MemoryTracker * tracker = level == VariableContext::Process ? this : query_tracker; + return updateAllocationTrace( + loaded_next->allocImpl(size, throw_if_memory_exceeded, tracker), + sample_probability); + } + + return getAllocationTrace(sample_probability); } /** Using memory_order_relaxed means that if allocations are done simultaneously, @@ -183,14 +236,6 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT allocation_traced = true; } - std::bernoulli_distribution sample(sample_probability); - if (unlikely(sample_probability > 0.0 && sample(thread_local_rng))) - { - MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); - DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = size}); - allocation_traced = true; - } - std::bernoulli_distribution fault(fault_probability); if (unlikely(fault_probability > 0.0 && fault(thread_local_rng))) { @@ -309,16 +354,22 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT } if (auto * loaded_next = parent.load(std::memory_order_relaxed)) - loaded_next->allocImpl(size, throw_if_memory_exceeded, - level == VariableContext::Process ? this : query_tracker); + { + MemoryTracker * tracker = level == VariableContext::Process ? this : query_tracker; + return updateAllocationTrace( + loaded_next->allocImpl(size, throw_if_memory_exceeded, tracker), + sample_probability); + } + + return getAllocationTrace(sample_probability); } void MemoryTracker::adjustWithUntrackedMemory(Int64 untracked_memory) { if (untracked_memory > 0) - allocImpl(untracked_memory, /*throw_if_memory_exceeded*/ false); + std::ignore = allocImpl(untracked_memory, /*throw_if_memory_exceeded*/ false); else - free(-untracked_memory); + std::ignore = free(-untracked_memory); } bool MemoryTracker::updatePeak(Int64 will_be, bool log_memory_usage) @@ -337,8 +388,7 @@ bool MemoryTracker::updatePeak(Int64 will_be, bool log_memory_usage) return false; } - -void MemoryTracker::free(Int64 size) +AllocationTrace MemoryTracker::free(Int64 size) { if (MemoryTrackerBlockerInThread::isBlocked(level)) { @@ -353,15 +403,9 @@ void MemoryTracker::free(Int64 size) /// Since the MemoryTrackerBlockerInThread should respect the level, we should go to the next parent. if (auto * loaded_next = parent.load(std::memory_order_relaxed)) - loaded_next->free(size); - return; - } + return updateAllocationTrace(loaded_next->free(size), sample_probability); - std::bernoulli_distribution sample(sample_probability); - if (unlikely(sample_probability > 0.0 && sample(thread_local_rng))) - { - MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); - DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = -size}); + return getAllocationTrace(sample_probability); } Int64 accounted_size = size; @@ -389,12 +433,15 @@ void MemoryTracker::free(Int64 size) if (auto * overcommit_tracker_ptr = overcommit_tracker.load(std::memory_order_relaxed)) overcommit_tracker_ptr->tryContinueQueryExecutionAfterFree(accounted_size); + AllocationTrace res = getAllocationTrace(sample_probability); if (auto * loaded_next = parent.load(std::memory_order_relaxed)) - loaded_next->free(size); + res = updateAllocationTrace(loaded_next->free(size), sample_probability); auto metric_loaded = metric.load(std::memory_order_relaxed); if (metric_loaded != CurrentMetrics::end()) CurrentMetrics::sub(metric_loaded, accounted_size); + + return res; } @@ -478,3 +525,14 @@ void MemoryTracker::setOrRaiseProfilerLimit(Int64 value) while ((value == 0 || old_value < value) && !profiler_limit.compare_exchange_weak(old_value, value)) ; } + +double MemoryTracker::getSampleProbability() +{ + if (sample_probability) + return *sample_probability; + + if (auto * loaded_next = parent.load(std::memory_order_relaxed)) + return loaded_next->getSampleProbability(); + + return 0; +} diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h index f6113d31423..e1f61b1585a 100644 --- a/src/Common/MemoryTracker.h +++ b/src/Common/MemoryTracker.h @@ -2,9 +2,11 @@ #include #include +#include #include #include #include +#include #if !defined(NDEBUG) #define MEMORY_TRACKER_DEBUG_CHECKS @@ -65,7 +67,7 @@ private: double fault_probability = 0; /// To randomly sample allocations and deallocations in trace_log. - double sample_probability = 0; + std::optional sample_probability; /// Singly-linked list. All information will be passed to subsequent memory trackers also (it allows to implement trackers hierarchy). /// In terms of tree nodes it is the list of parents. Lifetime of these trackers should "include" lifetime of current tracker. @@ -90,8 +92,8 @@ private: /// allocImpl(...) and free(...) should not be used directly friend struct CurrentMemoryTracker; - void allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker = nullptr); - void free(Int64 size); + [[nodiscard]] AllocationTrace allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker = nullptr); + [[nodiscard]] AllocationTrace free(Int64 size); public: static constexpr auto USAGE_EVENT_NAME = "MemoryTrackerUsage"; @@ -146,6 +148,8 @@ public: sample_probability = value; } + double getSampleProbability(); + void setProfilerStep(Int64 value) { profiler_step = value; diff --git a/src/Common/MemoryTrackerBlockerInThread.h b/src/Common/MemoryTrackerBlockerInThread.h index d3882056f54..73794049007 100644 --- a/src/Common/MemoryTrackerBlockerInThread.h +++ b/src/Common/MemoryTrackerBlockerInThread.h @@ -28,4 +28,5 @@ public: } friend class MemoryTracker; + friend struct AllocationTrace; }; diff --git a/src/Common/OvercommitTracker.cpp b/src/Common/OvercommitTracker.cpp index bb477d6019d..2f067b7c193 100644 --- a/src/Common/OvercommitTracker.cpp +++ b/src/Common/OvercommitTracker.cpp @@ -3,8 +3,13 @@ #include #include #include +#include #include +namespace CurrentMetrics +{ + extern const Metric ThreadsInOvercommitTracker; +} namespace ProfileEvents { @@ -32,6 +37,8 @@ OvercommitResult OvercommitTracker::needToStopQuery(MemoryTracker * tracker, Int if (OvercommitTrackerBlockerInThread::isBlocked()) return OvercommitResult::NONE; + + CurrentMetrics::Increment metric_increment(CurrentMetrics::ThreadsInOvercommitTracker); // NOTE: Do not change the order of locks // // global mutex must be acquired before overcommit_m, because diff --git a/src/Common/ProgressIndication.cpp b/src/Common/ProgressIndication.cpp index 071d4bb2f48..b049edcdcf7 100644 --- a/src/Common/ProgressIndication.cpp +++ b/src/Common/ProgressIndication.cpp @@ -123,16 +123,13 @@ void ProgressIndication::writeFinalProgress() if (progress.read_rows < 1000) return; - UInt64 processed_rows = progress.read_rows + progress.written_rows; - UInt64 processed_bytes = progress.read_bytes + progress.written_bytes; - - std::cout << "Processed " << formatReadableQuantity(processed_rows) << " rows, " - << formatReadableSizeWithDecimalSuffix(processed_bytes); + std::cout << "Processed " << formatReadableQuantity(progress.read_rows) << " rows, " + << formatReadableSizeWithDecimalSuffix(progress.read_bytes); UInt64 elapsed_ns = getElapsedNanoseconds(); if (elapsed_ns) - std::cout << " (" << formatReadableQuantity(processed_rows * 1000000000.0 / elapsed_ns) << " rows/s., " - << formatReadableSizeWithDecimalSuffix(processed_bytes * 1000000000.0 / elapsed_ns) << "/s.)"; + std::cout << " (" << formatReadableQuantity(progress.read_rows * 1000000000.0 / elapsed_ns) << " rows/s., " + << formatReadableSizeWithDecimalSuffix(progress.read_bytes * 1000000000.0 / elapsed_ns) << "/s.)"; else std::cout << ". "; } @@ -167,18 +164,16 @@ void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message) size_t prefix_size = message.count(); - UInt64 processed_rows = progress.read_rows + progress.written_rows; - UInt64 processed_bytes = progress.read_bytes + progress.written_bytes; message << indicator << " Progress: "; message - << formatReadableQuantity(processed_rows) << " rows, " - << formatReadableSizeWithDecimalSuffix(processed_bytes); + << formatReadableQuantity(progress.read_rows) << " rows, " + << formatReadableSizeWithDecimalSuffix(progress.read_bytes); UInt64 elapsed_ns = getElapsedNanoseconds(); if (elapsed_ns) message << " (" - << formatReadableQuantity(processed_rows * 1000000000.0 / elapsed_ns) << " rows/s., " - << formatReadableSizeWithDecimalSuffix(processed_bytes * 1000000000.0 / elapsed_ns) << "/s.) "; + << formatReadableQuantity(progress.read_rows * 1000000000.0 / elapsed_ns) << " rows/s., " + << formatReadableSizeWithDecimalSuffix(progress.read_bytes * 1000000000.0 / elapsed_ns) << "/s.) "; else message << ". "; diff --git a/src/Common/TraceSender.cpp b/src/Common/TraceSender.cpp index 64d7b2b0eaf..91d07367a82 100644 --- a/src/Common/TraceSender.cpp +++ b/src/Common/TraceSender.cpp @@ -33,6 +33,7 @@ void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Ext + sizeof(TraceType) /// trace type + sizeof(UInt64) /// thread_id + sizeof(Int64) /// size + + sizeof(void *) /// ptr + sizeof(ProfileEvents::Event) /// event + sizeof(ProfileEvents::Count); /// increment @@ -74,6 +75,7 @@ void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Ext writePODBinary(trace_type, out); writePODBinary(thread_id, out); writePODBinary(extras.size, out); + writePODBinary(UInt64(extras.ptr), out); writePODBinary(extras.event, out); writePODBinary(extras.increment, out); diff --git a/src/Common/TraceSender.h b/src/Common/TraceSender.h index 21b44b651dd..68ba15ee400 100644 --- a/src/Common/TraceSender.h +++ b/src/Common/TraceSender.h @@ -28,8 +28,9 @@ class TraceSender public: struct Extras { - /// size - for memory tracing is the amount of memory allocated; for other trace types it is 0. + /// size, ptr - for memory tracing is the amount of memory allocated; for other trace types it is 0. Int64 size{}; + void * ptr = nullptr; /// Event type and increment for 'ProfileEvent' trace type; for other trace types defaults. ProfileEvents::Event event{ProfileEvents::end()}; ProfileEvents::Count increment{}; diff --git a/src/Common/clickhouse_malloc.cpp b/src/Common/clickhouse_malloc.cpp index 3f69ebdf58d..afdad3c6599 100644 --- a/src/Common/clickhouse_malloc.cpp +++ b/src/Common/clickhouse_malloc.cpp @@ -9,7 +9,11 @@ extern "C" void * clickhouse_malloc(size_t size) { void * res = malloc(size); if (res) - Memory::trackMemory(size); + { + AllocationTrace trace; + size_t actual_size = Memory::trackMemory(size, trace); + trace.onAlloc(res, actual_size); + } return res; } @@ -17,17 +21,29 @@ extern "C" void * clickhouse_calloc(size_t number_of_members, size_t size) { void * res = calloc(number_of_members, size); if (res) - Memory::trackMemory(number_of_members * size); + { + AllocationTrace trace; + size_t actual_size = Memory::trackMemory(number_of_members * size, trace); + trace.onAlloc(res, actual_size); + } return res; } extern "C" void * clickhouse_realloc(void * ptr, size_t size) { if (ptr) - Memory::untrackMemory(ptr); + { + AllocationTrace trace; + size_t actual_size = Memory::untrackMemory(ptr, trace); + trace.onFree(ptr, actual_size); + } void * res = realloc(ptr, size); if (res) - Memory::trackMemory(size); + { + AllocationTrace trace; + size_t actual_size = Memory::trackMemory(size, trace); + trace.onAlloc(res, actual_size); + } return res; } @@ -42,7 +58,9 @@ extern "C" void * clickhouse_reallocarray(void * ptr, size_t number_of_members, extern "C" void clickhouse_free(void * ptr) { - Memory::untrackMemory(ptr); + AllocationTrace trace; + size_t actual_size = Memory::untrackMemory(ptr, trace); + trace.onFree(ptr, actual_size); free(ptr); } @@ -50,6 +68,10 @@ extern "C" int clickhouse_posix_memalign(void ** memptr, size_t alignment, size_ { int res = posix_memalign(memptr, alignment, size); if (res == 0) - Memory::trackMemory(size); + { + AllocationTrace trace; + size_t actual_size = Memory::trackMemory(size, trace); + trace.onAlloc(*memptr, actual_size); + } return res; } diff --git a/src/Common/memory.h b/src/Common/memory.h index 4cb1c535e56..87ccdce070a 100644 --- a/src/Common/memory.h +++ b/src/Common/memory.h @@ -112,16 +112,19 @@ inline ALWAYS_INLINE size_t getActualAllocationSize(size_t size, TAlign... align template ... TAlign> requires DB::OptionalArgument -inline ALWAYS_INLINE void trackMemory(std::size_t size, TAlign... align) +inline ALWAYS_INLINE size_t trackMemory(std::size_t size, AllocationTrace & trace, TAlign... align) { std::size_t actual_size = getActualAllocationSize(size, align...); - CurrentMemoryTracker::allocNoThrow(actual_size); + trace = CurrentMemoryTracker::allocNoThrow(actual_size); + return actual_size; } template ... TAlign> requires DB::OptionalArgument -inline ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t size [[maybe_unused]] = 0, TAlign... align [[maybe_unused]]) noexcept +inline ALWAYS_INLINE size_t untrackMemory(void * ptr [[maybe_unused]], AllocationTrace & trace, std::size_t size [[maybe_unused]] = 0, TAlign... align [[maybe_unused]]) noexcept { + std::size_t actual_size = 0; + try { #if USE_JEMALLOC @@ -130,23 +133,26 @@ inline ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t if (likely(ptr != nullptr)) { if constexpr (sizeof...(TAlign) == 1) - CurrentMemoryTracker::free(sallocx(ptr, MALLOCX_ALIGN(alignToSizeT(align...)))); + actual_size = sallocx(ptr, MALLOCX_ALIGN(alignToSizeT(align...))); else - CurrentMemoryTracker::free(sallocx(ptr, 0)); + actual_size = sallocx(ptr, 0); } #else if (size) - CurrentMemoryTracker::free(size); + actual_size = size; # if defined(_GNU_SOURCE) /// It's innaccurate resource free for sanitizers. malloc_usable_size() result is greater or equal to allocated size. else - CurrentMemoryTracker::free(malloc_usable_size(ptr)); + actual_size = malloc_usable_size(ptr); # endif #endif + trace = CurrentMemoryTracker::free(actual_size); } catch (...) { } + + return actual_size; } } diff --git a/src/Common/new_delete.cpp b/src/Common/new_delete.cpp index 871ab750907..d0170bd820c 100644 --- a/src/Common/new_delete.cpp +++ b/src/Common/new_delete.cpp @@ -50,50 +50,74 @@ static struct InitializeJemallocZoneAllocatorForOSX void * operator new(std::size_t size) { - Memory::trackMemory(size); - return Memory::newImpl(size); + AllocationTrace trace; + std::size_t actual_size = Memory::trackMemory(size, trace); + void * ptr = Memory::newImpl(size); + trace.onAlloc(ptr, actual_size); + return ptr; } void * operator new(std::size_t size, std::align_val_t align) { - Memory::trackMemory(size, align); - return Memory::newImpl(size, align); + AllocationTrace trace; + std::size_t actual_size = Memory::trackMemory(size, trace, align); + void * ptr = Memory::newImpl(size, align); + trace.onAlloc(ptr, actual_size); + return ptr; } void * operator new[](std::size_t size) { - Memory::trackMemory(size); - return Memory::newImpl(size); + AllocationTrace trace; + std::size_t actual_size = Memory::trackMemory(size, trace); + void * ptr = Memory::newImpl(size); + trace.onAlloc(ptr, actual_size); + return ptr; } void * operator new[](std::size_t size, std::align_val_t align) { - Memory::trackMemory(size, align); - return Memory::newImpl(size, align); + AllocationTrace trace; + std::size_t actual_size = Memory::trackMemory(size, trace, align); + void * ptr = Memory::newImpl(size, align); + trace.onAlloc(ptr, actual_size); + return ptr; } void * operator new(std::size_t size, const std::nothrow_t &) noexcept { - Memory::trackMemory(size); - return Memory::newNoExept(size); + AllocationTrace trace; + std::size_t actual_size = Memory::trackMemory(size, trace); + void * ptr = Memory::newNoExept(size); + trace.onAlloc(ptr, actual_size); + return ptr; } void * operator new[](std::size_t size, const std::nothrow_t &) noexcept { - Memory::trackMemory(size); - return Memory::newNoExept(size); + AllocationTrace trace; + std::size_t actual_size = Memory::trackMemory(size, trace); + void * ptr = Memory::newNoExept(size); + trace.onAlloc(ptr, actual_size); + return ptr; } void * operator new(std::size_t size, std::align_val_t align, const std::nothrow_t &) noexcept { - Memory::trackMemory(size, align); - return Memory::newNoExept(size, align); + AllocationTrace trace; + std::size_t actual_size = Memory::trackMemory(size, trace, align); + void * ptr = Memory::newNoExept(size, align); + trace.onAlloc(ptr, actual_size); + return ptr; } void * operator new[](std::size_t size, std::align_val_t align, const std::nothrow_t &) noexcept { - Memory::trackMemory(size, align); - return Memory::newNoExept(size, align); + AllocationTrace trace; + std::size_t actual_size = Memory::trackMemory(size, trace, align); + void * ptr = Memory::newNoExept(size, align); + trace.onAlloc(ptr, actual_size); + return ptr; } /// delete @@ -109,48 +133,64 @@ void * operator new[](std::size_t size, std::align_val_t align, const std::nothr void operator delete(void * ptr) noexcept { - Memory::untrackMemory(ptr); + AllocationTrace trace; + std::size_t actual_size = Memory::untrackMemory(ptr, trace); + trace.onFree(ptr, actual_size); Memory::deleteImpl(ptr); } void operator delete(void * ptr, std::align_val_t align) noexcept { - Memory::untrackMemory(ptr, 0, align); + AllocationTrace trace; + std::size_t actual_size = Memory::untrackMemory(ptr, trace, 0, align); + trace.onFree(ptr, actual_size); Memory::deleteImpl(ptr); } void operator delete[](void * ptr) noexcept { - Memory::untrackMemory(ptr); + AllocationTrace trace; + std::size_t actual_size = Memory::untrackMemory(ptr, trace); + trace.onFree(ptr, actual_size); Memory::deleteImpl(ptr); } void operator delete[](void * ptr, std::align_val_t align) noexcept { - Memory::untrackMemory(ptr, 0, align); + AllocationTrace trace; + std::size_t actual_size = Memory::untrackMemory(ptr, trace, 0, align); + trace.onFree(ptr, actual_size); Memory::deleteImpl(ptr); } void operator delete(void * ptr, std::size_t size) noexcept { - Memory::untrackMemory(ptr, size); + AllocationTrace trace; + std::size_t actual_size = Memory::untrackMemory(ptr, trace, size); + trace.onFree(ptr, actual_size); Memory::deleteSized(ptr, size); } void operator delete(void * ptr, std::size_t size, std::align_val_t align) noexcept { - Memory::untrackMemory(ptr, size, align); + AllocationTrace trace; + std::size_t actual_size = Memory::untrackMemory(ptr, trace, size, align); + trace.onFree(ptr, actual_size); Memory::deleteSized(ptr, size, align); } void operator delete[](void * ptr, std::size_t size) noexcept { - Memory::untrackMemory(ptr, size); + AllocationTrace trace; + std::size_t actual_size = Memory::untrackMemory(ptr, trace, size); + trace.onFree(ptr, actual_size); Memory::deleteSized(ptr, size); } void operator delete[](void * ptr, std::size_t size, std::align_val_t align) noexcept { - Memory::untrackMemory(ptr, size, align); + AllocationTrace trace; + std::size_t actual_size = Memory::untrackMemory(ptr, trace, size, align); + trace.onFree(ptr, actual_size); Memory::deleteSized(ptr, size, align); } diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp index 02451ac36de..bcb513157d7 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.cpp +++ b/src/Coordination/KeeperSnapshotManagerS3.cpp @@ -149,31 +149,13 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const std::string & snapshot_pa }; }; - const auto file_exists = [&](const auto & key) - { - Aws::S3::Model::HeadObjectRequest request; - request.SetBucket(s3_client->uri.bucket); - request.SetKey(key); - auto outcome = s3_client->client->HeadObject(request); - - if (outcome.IsSuccess()) - return true; - - const auto & error = outcome.GetError(); - if (error.GetErrorType() != Aws::S3::S3Errors::NO_SUCH_KEY && error.GetErrorType() != Aws::S3::S3Errors::RESOURCE_NOT_FOUND) - throw S3Exception(error.GetErrorType(), "Failed to verify existence of lock file: {}", error.GetMessage()); - - return false; - }; - - LOG_INFO(log, "Will try to upload snapshot on {} to S3", snapshot_path); ReadBufferFromFile snapshot_file(snapshot_path); auto snapshot_name = fs::path(snapshot_path).filename().string(); auto lock_file = fmt::format(".{}_LOCK", snapshot_name); - if (file_exists(snapshot_name)) + if (S3::objectExists(*s3_client->client, s3_client->uri.bucket, snapshot_name)) { LOG_ERROR(log, "Snapshot {} already exists", snapshot_name); return; @@ -181,7 +163,7 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const std::string & snapshot_pa // First we need to verify that there isn't already a lock file for the snapshot we want to upload // Only leader uploads a snapshot, but there can be a rare case where we have 2 leaders in NuRaft - if (file_exists(lock_file)) + if (S3::objectExists(*s3_client->client, s3_client->uri.bucket, lock_file)) { LOG_ERROR(log, "Lock file for {} already, exists. Probably a different node is already uploading the snapshot", snapshot_name); return; diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 4d9e46e0543..3498d01fbc3 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -767,7 +767,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Bool, input_format_json_validate_types_from_metadata, true, "For JSON/JSONCompact/JSONColumnsWithMetadata input formats this controls whether format parser should check if data types from input metadata match data types of the corresponding columns from the table", 0) \ M(Bool, input_format_json_read_numbers_as_strings, false, "Allow to parse numbers as strings in JSON input formats", 0) \ M(Bool, input_format_json_read_objects_as_strings, false, "Allow to parse JSON objects as strings in JSON input formats", 0) \ - M(Bool, input_format_try_infer_integers, true, "Try to infer numbers from string fields while schema inference in text formats", 0) \ + M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \ M(Bool, input_format_protobuf_flatten_google_wrappers, false, "Enable Google wrappers for regular non-nested columns, e.g. google.protobuf.StringValue 'str' for String column 'str'. For Nullable columns empty wrappers are recognized as defaults, and missing as nulls", 0) \ diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index 8a57c4bc9a1..7f494a694bd 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -525,7 +525,7 @@ ReturnType SerializationNullable::deserializeTextCSVImpl(IColumn & column, ReadB } /// Check if we have enough data in buffer to check if it's a null. - if (istr.available() > null_representation.size()) + if (settings.csv.custom_delimiter.empty() && istr.available() > null_representation.size()) { auto check_for_null = [&istr, &null_representation, &settings]() { @@ -550,8 +550,21 @@ ReturnType SerializationNullable::deserializeTextCSVImpl(IColumn & column, ReadB { buf.setCheckpoint(); SCOPE_EXIT(buf.dropCheckpoint()); - if (checkString(null_representation, buf) && (buf.eof() || *buf.position() == settings.csv.delimiter || *buf.position() == '\r' || *buf.position() == '\n')) - return true; + if (checkString(null_representation, buf)) + { + if (!settings.csv.custom_delimiter.empty()) + { + if (checkString(settings.csv.custom_delimiter, buf)) + { + /// Rollback to the beginning of custom delimiter. + buf.rollbackToCheckpoint(); + assertString(null_representation, buf); + return true; + } + } + else if (buf.eof() || *buf.position() == settings.csv.delimiter || *buf.position() == '\r' || *buf.position() == '\n') + return true; + } buf.rollbackToCheckpoint(); return false; diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index ed7b8182622..a9996926408 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -66,12 +66,6 @@ namespace ErrorCodes namespace { -bool isNotFoundError(Aws::S3::S3Errors error) -{ - return error == Aws::S3::S3Errors::RESOURCE_NOT_FOUND - || error == Aws::S3::S3Errors::NO_SUCH_KEY; -} - template void throwIfError(const Aws::Utils::Outcome & response) { @@ -89,7 +83,7 @@ void throwIfUnexpectedError(const Aws::Utils::Outcome & response, /// the log will be polluted with error messages from aws sdk. /// Looks like there is no way to suppress them. - if (!response.IsSuccess() && (!if_exists || !isNotFoundError(response.GetError().GetErrorType()))) + if (!response.IsSuccess() && (!if_exists || !S3::isNotFoundError(response.GetError().GetErrorType()))) { const auto & err = response.GetError(); throw S3Exception(err.GetErrorType(), "{} (Code: {})", err.GetMessage(), static_cast(err.GetErrorType())); @@ -130,28 +124,12 @@ std::string S3ObjectStorage::generateBlobNameForPath(const std::string & /* path Aws::S3::Model::HeadObjectOutcome S3ObjectStorage::requestObjectHeadData(const std::string & bucket_from, const std::string & key) const { - auto client_ptr = client.get(); - - ProfileEvents::increment(ProfileEvents::S3HeadObject); - ProfileEvents::increment(ProfileEvents::DiskS3HeadObject); - Aws::S3::Model::HeadObjectRequest request; - request.SetBucket(bucket_from); - request.SetKey(key); - - return client_ptr->HeadObject(request); + return S3::headObject(*client.get(), bucket_from, key, "", true); } bool S3ObjectStorage::exists(const StoredObject & object) const { - auto object_head = requestObjectHeadData(bucket, object.absolute_path); - if (!object_head.IsSuccess()) - { - if (object_head.GetError().GetErrorType() == Aws::S3::S3Errors::RESOURCE_NOT_FOUND) - return false; - - throwIfError(object_head); - } - return true; + return S3::objectExists(*client.get(), bucket, object.absolute_path, "", true); } std::unique_ptr S3ObjectStorage::readObjects( /// NOLINT diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 98f9e486141..963213f31ad 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -119,6 +119,7 @@ struct FormatSettings char tuple_delimiter = ','; bool use_best_effort_in_schema_inference = true; UInt64 skip_first_lines = 0; + String custom_delimiter; } csv; struct HiveText diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp index 7e52c55e5b0..f66a193dea2 100644 --- a/src/Functions/FunctionsLogical.cpp +++ b/src/Functions/FunctionsLogical.cpp @@ -16,6 +16,7 @@ #include #include +#include #include @@ -94,7 +95,7 @@ void convertAnyColumnToBool(const IColumn * column, UInt8Container & res) } -template +template bool extractConstColumns(ColumnRawPtrs & in, UInt8 & res, Func && func) { bool has_res = false; @@ -112,7 +113,10 @@ bool extractConstColumns(ColumnRawPtrs & in, UInt8 & res, Func && func) if (has_res) { - res = Op::apply(res, x); + if constexpr (IsTernary) + res = Op::ternaryApply(res, x); + else + res = Op::apply(res, x); } else { @@ -129,7 +133,7 @@ bool extractConstColumns(ColumnRawPtrs & in, UInt8 & res, Func && func) template inline bool extractConstColumnsAsBool(ColumnRawPtrs & in, UInt8 & res) { - return extractConstColumns( + return extractConstColumns( in, res, [](const Field & value) { @@ -141,7 +145,7 @@ inline bool extractConstColumnsAsBool(ColumnRawPtrs & in, UInt8 & res) template inline bool extractConstColumnsAsTernary(ColumnRawPtrs & in, UInt8 & res_3v) { - return extractConstColumns( + return extractConstColumns( in, res_3v, [](const Field & value) { @@ -192,47 +196,74 @@ private: }; -/// A helper class used by AssociativeGenericApplierImpl -/// Allows for on-the-fly conversion of any data type into intermediate ternary representation -using TernaryValueGetter = std::function; - template -struct ValueGetterBuilderImpl; +struct TernaryValueBuilderImpl; template -struct ValueGetterBuilderImpl +struct TernaryValueBuilderImpl { - static TernaryValueGetter build(const IColumn * x) + static void build(const IColumn * x, UInt8* __restrict ternary_column_data) { + size_t size = x->size(); if (x->onlyNull()) { - return [](size_t){ return Ternary::Null; }; + memset(ternary_column_data, Ternary::Null, size); } else if (const auto * nullable_column = typeid_cast(x)) { if (const auto * nested_column = typeid_cast *>(nullable_column->getNestedColumnPtr().get())) { - return [ - &null_data = nullable_column->getNullMapData(), - &column_data = nested_column->getData()](size_t i) + const auto& null_data = nullable_column->getNullMapData(); + const auto& column_data = nested_column->getData(); + + if constexpr (sizeof(Type) == 1) { - return Ternary::makeValue(column_data[i], null_data[i]); - }; + for (size_t i = 0; i < size; ++i) + { + auto has_value = static_cast(column_data[i] != 0); + auto is_null = !!null_data[i]; + + ternary_column_data[i] = ((has_value << 1) | is_null) & (1 << !is_null); + } + } + else + { + for (size_t i = 0; i < size; ++i) + { + auto has_value = static_cast(column_data[i] != 0); + ternary_column_data[i] = has_value; + } + + for (size_t i = 0; i < size; ++i) + { + auto has_value = ternary_column_data[i]; + auto is_null = !!null_data[i]; + + ternary_column_data[i] = ((has_value << 1) | is_null) & (1 << !is_null); + } + } } else - return ValueGetterBuilderImpl::build(x); + TernaryValueBuilderImpl::build(x, ternary_column_data); } else if (const auto column = typeid_cast *>(x)) - return [&column_data = column->getData()](size_t i) { return Ternary::makeValue(column_data[i]); }; + { + auto &column_data = column->getData(); + + for (size_t i = 0; i < size; ++i) + { + ternary_column_data[i] = (column_data[i] != 0) << 1; + } + } else - return ValueGetterBuilderImpl::build(x); + TernaryValueBuilderImpl::build(x, ternary_column_data); } }; template <> -struct ValueGetterBuilderImpl<> +struct TernaryValueBuilderImpl<> { - static TernaryValueGetter build(const IColumn * x) + [[noreturn]] static void build(const IColumn * x, UInt8 * /* nullable_ternary_column_data */) { throw Exception( std::string("Unknown numeric column of type: ") + demangle(typeid(*x).name()), @@ -240,12 +271,12 @@ struct ValueGetterBuilderImpl<> } }; -using ValueGetterBuilder = - ValueGetterBuilderImpl; +using TernaryValueBuilder = + TernaryValueBuilderImpl; -/// This class together with helper class ValueGetterBuilder can be used with columns of arbitrary data type -/// Allows for on-the-fly conversion of any type of data into intermediate ternary representation -/// and eliminates the need to materialize data columns in intermediate representation +/// This class together with helper class TernaryValueBuilder can be used with columns of arbitrary data type +/// Converts column of any data type into an intermediate UInt8Column of ternary representation for the +/// vectorized ternary logic evaluation. template class AssociativeGenericApplierImpl { @@ -254,20 +285,19 @@ class AssociativeGenericApplierImpl public: /// Remembers the last N columns from `in`. explicit AssociativeGenericApplierImpl(const ColumnRawPtrs & in) - : val_getter{ValueGetterBuilder::build(in[in.size() - N])}, next{in} {} + : vec(in[in.size() - N]->size()), next{in} + { + TernaryValueBuilder::build(in[in.size() - N], vec.data()); + } /// Returns a combination of values in the i-th row of all columns stored in the constructor. inline ResultValueType apply(const size_t i) const { - const auto a = val_getter(i); - if constexpr (Op::isSaturable()) - return Op::isSaturatedValueTernary(a) ? a : Op::apply(a, next.apply(i)); - else - return Op::apply(a, next.apply(i)); + return Op::ternaryApply(vec[i], next.apply(i)); } private: - const TernaryValueGetter val_getter; + UInt8Container vec; const AssociativeGenericApplierImpl next; }; @@ -280,12 +310,15 @@ class AssociativeGenericApplierImpl public: /// Remembers the last N columns from `in`. explicit AssociativeGenericApplierImpl(const ColumnRawPtrs & in) - : val_getter{ValueGetterBuilder::build(in[in.size() - 1])} {} + : vec(UInt8Container(in[in.size() - 1]->size())) + { + TernaryValueBuilder::build(in[in.size() - 1], vec.data()); + } - inline ResultValueType apply(const size_t i) const { return val_getter(i); } + inline ResultValueType apply(const size_t i) const { return vec[i]; } private: - const TernaryValueGetter val_getter; + UInt8Container vec; }; @@ -318,7 +351,12 @@ struct OperationApplier for (size_t i = 0; i < size; ++i) { if constexpr (CarryResult) - result_data[i] = Op::apply(result_data[i], operation_applier_impl.apply(i)); + { + if constexpr (std::is_same_v, AssociativeApplierImpl>) + result_data[i] = Op::apply(result_data[i], operation_applier_impl.apply(i)); + else + result_data[i] = Op::ternaryApply(result_data[i], operation_applier_impl.apply(i)); + } else result_data[i] = operation_applier_impl.apply(i); } diff --git a/src/Functions/FunctionsLogical.h b/src/Functions/FunctionsLogical.h index 22471a151d2..30d8983b8cc 100644 --- a/src/Functions/FunctionsLogical.h +++ b/src/Functions/FunctionsLogical.h @@ -44,21 +44,29 @@ namespace Ternary { using ResultType = UInt8; - /** These carefully picked values magically work so bitwise "and", "or" on them - * corresponds to the expected results in three-valued logic. + /** These values are carefully picked so that they could be efficiently evaluated with bitwise operations, which + * are feasible for auto-vectorization by the compiler. The expression for the ternary value evaluation writes: * - * False and True are represented by all-0 and all-1 bits, so all bitwise operations on them work as expected. - * Null is represented as single 1 bit. So, it is something in between False and True. - * And "or" works like maximum and "and" works like minimum: - * "or" keeps True as is and lifts False with Null to Null. - * "and" keeps False as is and downs True with Null to Null. + * ternary_value = ((value << 1) | is_null) & (1 << !is_null) + * + * The truth table of the above formula lists: + * +---------------+--------------+-------------+ + * | is_null\value | 0 | 1 | + * +---------------+--------------+-------------+ + * | 0 | 0b00 (False) | 0b10 (True) | + * | 1 | 0b01 (Null) | 0b01 (Null) | + * +---------------+--------------+-------------+ + * + * As the numerical values of False, Null and True are assigned in ascending order, the "and" and "or" of + * ternary logic could be implemented with minimum and maximum respectively, which are also vectorizable. + * https://en.wikipedia.org/wiki/Three-valued_logic * * This logic does not apply for "not" and "xor" - they work with default implementation for NULLs: * anything with NULL returns NULL, otherwise use conventional two-valued logic. */ - static constexpr UInt8 False = 0; /// All zero bits. - static constexpr UInt8 True = -1; /// All one bits. - static constexpr UInt8 Null = 1; /// Single one bit. + static constexpr UInt8 False = 0; /// 0b00 + static constexpr UInt8 Null = 1; /// 0b01 + static constexpr UInt8 True = 2; /// 0b10 template inline ResultType makeValue(T value) @@ -90,6 +98,8 @@ struct AndImpl static inline constexpr ResultType apply(UInt8 a, UInt8 b) { return a & b; } + static inline constexpr ResultType ternaryApply(UInt8 a, UInt8 b) { return std::min(a, b); } + /// Will use three-valued logic for NULLs (see above) or default implementation (any operation with NULL returns NULL). static inline constexpr bool specialImplementationForNulls() { return true; } }; @@ -102,6 +112,7 @@ struct OrImpl static inline constexpr bool isSaturatedValue(bool a) { return a; } static inline constexpr bool isSaturatedValueTernary(UInt8 a) { return a == Ternary::True; } static inline constexpr ResultType apply(UInt8 a, UInt8 b) { return a | b; } + static inline constexpr ResultType ternaryApply(UInt8 a, UInt8 b) { return std::max(a, b); } static inline constexpr bool specialImplementationForNulls() { return true; } }; @@ -113,6 +124,7 @@ struct XorImpl static inline constexpr bool isSaturatedValue(bool) { return false; } static inline constexpr bool isSaturatedValueTernary(UInt8) { return false; } static inline constexpr ResultType apply(UInt8 a, UInt8 b) { return a != b; } + static inline constexpr ResultType ternaryApply(UInt8 a, UInt8 b) { return a != b; } static inline constexpr bool specialImplementationForNulls() { return false; } #if USE_EMBEDDED_COMPILER diff --git a/src/Functions/tests/gtest_ternary_logic.cpp b/src/Functions/tests/gtest_ternary_logic.cpp new file mode 100644 index 00000000000..5ecafabb361 --- /dev/null +++ b/src/Functions/tests/gtest_ternary_logic.cpp @@ -0,0 +1,354 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// I know that inclusion of .cpp is not good at all +#include // NOLINT + +using namespace DB; +using TernaryValues = std::vector; + +struct LinearCongruentialGenerator +{ + /// Constants from `man lrand48_r`. + static constexpr UInt64 a = 0x5DEECE66D; + static constexpr UInt64 c = 0xB; + + /// And this is from `head -c8 /dev/urandom | xxd -p` + UInt64 current = 0x09826f4a081cee35ULL; + + UInt32 next() + { + current = current * a + c; + return static_cast(current >> 16); + } +}; + +void generateRandomTernaryValue(LinearCongruentialGenerator & gen, Ternary::ResultType * output, size_t size, double false_ratio, double null_ratio) +{ + /// The LinearCongruentialGenerator generates nonnegative integers uniformly distributed over the interval [0, 2^32). + /// See https://linux.die.net/man/3/nrand48 + + double false_percentile = false_ratio; + double null_percentile = false_ratio + null_ratio; + + false_percentile = false_percentile > 1 ? 1 : false_percentile; + null_percentile = null_percentile > 1 ? 1 : null_percentile; + + UInt32 false_threshold = static_cast(static_cast(std::numeric_limits::max()) * false_percentile); + UInt32 null_threshold = static_cast(static_cast(std::numeric_limits::max()) * null_percentile); + + for (Ternary::ResultType * end = output + size; output != end; ++output) + { + UInt32 val = gen.next(); + *output = val < false_threshold ? Ternary::False : (val < null_threshold ? Ternary::Null : Ternary::True); + } +} + +template +ColumnPtr createColumnNullable(const Ternary::ResultType * ternary_values, size_t size) +{ + auto nested_column = ColumnVector::create(size); + auto null_map = ColumnUInt8::create(size); + auto & nested_column_data = nested_column->getData(); + auto & null_map_data = null_map->getData(); + + for (size_t i = 0; i < size; ++i) + { + if (ternary_values[i] == Ternary::Null) + { + null_map_data[i] = 1; + nested_column_data[i] = 0; + } + else if (ternary_values[i] == Ternary::True) + { + null_map_data[i] = 0; + nested_column_data[i] = 100; + } + else + { + null_map_data[i] = 0; + nested_column_data[i] = 0; + } + } + + return ColumnNullable::create(std::move(nested_column), std::move(null_map)); +} + +template +ColumnPtr createColumnVector(const Ternary::ResultType * ternary_values, size_t size) +{ + auto column = ColumnVector::create(size); + auto & column_data = column->getData(); + + for (size_t i = 0; i < size; ++i) + { + if (ternary_values[i] == Ternary::True) + { + column_data[i] = 100; + } + else + { + column_data[i] = 0; + } + } + + return column; +} + +template +ColumnPtr createRandomColumn(LinearCongruentialGenerator & gen, TernaryValues & ternary_values) +{ + size_t size = ternary_values.size(); + Ternary::ResultType * ternary_data = ternary_values.data(); + + if constexpr (std::is_same_v) + { + generateRandomTernaryValue(gen, ternary_data, size, 0.3, 0.7); + return createColumnNullable(ternary_data, size); + } + else if constexpr (std::is_same_v>) + { + generateRandomTernaryValue(gen, ternary_data, size, 0.5, 0); + return createColumnVector(ternary_data, size); + } + else + { + auto nested_col = ColumnNothing::create(size); + auto null_map = ColumnUInt8::create(size); + + memset(ternary_data, Ternary::Null, size); + + return ColumnNullable::create(std::move(nested_col), std::move(null_map)); + } +} + +/* The truth table of ternary And and Or operations: + * +-------+-------+---------+--------+ + * | a | b | a And b | a Or b | + * +-------+-------+---------+--------+ + * | False | False | False | False | + * | False | Null | False | Null | + * | False | True | False | True | + * | Null | False | False | Null | + * | Null | Null | Null | Null | + * | Null | True | Null | True | + * | True | False | False | True | + * | True | Null | Null | True | + * | True | True | True | True | + * +-------+-------+---------+--------+ + * + * https://en.wikibooks.org/wiki/Structured_Query_Language/NULLs_and_the_Three_Valued_Logic + */ +template +bool testTernaryLogicTruthTable() +{ + constexpr size_t size = 9; + + Ternary::ResultType col_a_ternary[] = {Ternary::False, Ternary::False, Ternary::False, Ternary::Null, Ternary::Null, Ternary::Null, Ternary::True, Ternary::True, Ternary::True}; + Ternary::ResultType col_b_ternary[] = {Ternary::False, Ternary::Null, Ternary::True, Ternary::False, Ternary::Null, Ternary::True,Ternary::False, Ternary::Null, Ternary::True}; + Ternary::ResultType and_expected_ternary[] = {Ternary::False, Ternary::False, Ternary::False, Ternary::False, Ternary::Null, Ternary::Null,Ternary::False, Ternary::Null, Ternary::True}; + Ternary::ResultType or_expected_ternary[] = {Ternary::False, Ternary::Null, Ternary::True, Ternary::Null, Ternary::Null, Ternary::True,Ternary::True, Ternary::True, Ternary::True}; + Ternary::ResultType * expected_ternary; + + + if constexpr (std::is_same_v) + { + expected_ternary = and_expected_ternary; + } + else + { + expected_ternary = or_expected_ternary; + } + + auto col_a = createColumnNullable(col_a_ternary, size); + auto col_b = createColumnNullable(col_b_ternary, size); + ColumnRawPtrs arguments = {col_a.get(), col_b.get()}; + + auto col_res = ColumnUInt8::create(size); + auto & col_res_data = col_res->getData(); + + OperationApplier::apply(arguments, col_res->getData(), false); + + for (size_t i = 0; i < size; ++i) + { + if (col_res_data[i] != expected_ternary[i]) return false; + } + + return true; +} + +template +bool testTernaryLogicOfTwoColumns(size_t size) +{ + LinearCongruentialGenerator gen; + + TernaryValues left_column_ternary(size); + TernaryValues right_column_ternary(size); + TernaryValues expected_ternary(size); + + ColumnPtr left = createRandomColumn(gen, left_column_ternary); + ColumnPtr right = createRandomColumn(gen, right_column_ternary); + + for (size_t i = 0; i < size; ++i) + { + /// Given that False is less than Null and Null is less than True, the And operation can be implemented + /// with std::min, and the Or operation can be implemented with std::max. + if constexpr (std::is_same_v) + { + expected_ternary[i] = std::min(left_column_ternary[i], right_column_ternary[i]); + } + else + { + expected_ternary[i] = std::max(left_column_ternary[i], right_column_ternary[i]); + } + } + + ColumnRawPtrs arguments = {left.get(), right.get()}; + + auto col_res = ColumnUInt8::create(size); + auto & col_res_data = col_res->getData(); + + OperationApplier::apply(arguments, col_res->getData(), false); + + for (size_t i = 0; i < size; ++i) + { + if (col_res_data[i] != expected_ternary[i]) return false; + } + + return true; +} + +TEST(TernaryLogicTruthTable, NestedUInt8) +{ + bool test_1 = testTernaryLogicTruthTable(); + bool test_2 = testTernaryLogicTruthTable(); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} + +TEST(TernaryLogicTruthTable, NestedUInt16) +{ + bool test_1 = testTernaryLogicTruthTable(); + bool test_2 = testTernaryLogicTruthTable(); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} + +TEST(TernaryLogicTruthTable, NestedUInt32) +{ + bool test_1 = testTernaryLogicTruthTable(); + bool test_2 = testTernaryLogicTruthTable(); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} + +TEST(TernaryLogicTruthTable, NestedUInt64) +{ + bool test_1 = testTernaryLogicTruthTable(); + bool test_2 = testTernaryLogicTruthTable(); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} + +TEST(TernaryLogicTruthTable, NestedInt8) +{ + bool test_1 = testTernaryLogicTruthTable(); + bool test_2 = testTernaryLogicTruthTable(); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} + +TEST(TernaryLogicTruthTable, NestedInt16) +{ + bool test_1 = testTernaryLogicTruthTable(); + bool test_2 = testTernaryLogicTruthTable(); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} + +TEST(TernaryLogicTruthTable, NestedInt32) +{ + bool test_1 = testTernaryLogicTruthTable(); + bool test_2 = testTernaryLogicTruthTable(); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} + +TEST(TernaryLogicTruthTable, NestedInt64) +{ + bool test_1 = testTernaryLogicTruthTable(); + bool test_2 = testTernaryLogicTruthTable(); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} + +TEST(TernaryLogicTruthTable, NestedFloat32) +{ + bool test_1 = testTernaryLogicTruthTable(); + bool test_2 = testTernaryLogicTruthTable(); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} + +TEST(TernaryLogicTruthTable, NestedFloat64) +{ + bool test_1 = testTernaryLogicTruthTable(); + bool test_2 = testTernaryLogicTruthTable(); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} + +TEST(TernaryLogicTwoColumns, TwoNullable) +{ + bool test_1 = testTernaryLogicOfTwoColumns(100 /*size*/); + bool test_2 = testTernaryLogicOfTwoColumns(100 /*size*/); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} + +TEST(TernaryLogicTwoColumns, TwoVector) +{ + bool test_1 = testTernaryLogicOfTwoColumns(100 /*size*/); + bool test_2 = testTernaryLogicOfTwoColumns(100 /*size*/); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} + +TEST(TernaryLogicTwoColumns, TwoNothing) +{ + bool test_1 = testTernaryLogicOfTwoColumns(100 /*size*/); + bool test_2 = testTernaryLogicOfTwoColumns(100 /*size*/); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} + +TEST(TernaryLogicTwoColumns, NullableVector) +{ + bool test_1 = testTernaryLogicOfTwoColumns(100 /*size*/); + bool test_2 = testTernaryLogicOfTwoColumns(100 /*size*/); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} + +TEST(TernaryLogicTwoColumns, NullableNothing) +{ + bool test_1 = testTernaryLogicOfTwoColumns(100 /*size*/); + bool test_2 = testTernaryLogicOfTwoColumns(100 /*size*/); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} + +TEST(TernaryLogicTwoColumns, VectorNothing) +{ + bool test_1 = testTernaryLogicOfTwoColumns(100 /*size*/); + bool test_2 = testTernaryLogicOfTwoColumns(100 /*size*/); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index 6f7836c2fe7..2d274435a74 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -340,9 +340,7 @@ SeekableReadBufferPtr ReadBufferS3Factory::getReader() { const auto next_range = range_generator.nextRange(); if (!next_range) - { return nullptr; - } auto reader = std::make_shared( client_ptr, @@ -350,10 +348,11 @@ SeekableReadBufferPtr ReadBufferS3Factory::getReader() key, version_id, request_settings, - read_settings, + read_settings.adjustBufferSize(object_size), false /*use_external_buffer*/, next_range->first, next_range->second); + return reader; } diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 127912a0b2a..8de1b85c8b9 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -35,6 +35,8 @@ namespace ErrorCodes extern const int CANNOT_PARSE_DATE; extern const int INCORRECT_DATA; extern const int ATTEMPT_TO_READ_AFTER_EOF; + extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; } template @@ -642,9 +644,10 @@ void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & const char delimiter = settings.delimiter; const char maybe_quote = *buf.position(); + const String & custom_delimiter = settings.custom_delimiter; /// Emptiness and not even in quotation marks. - if (maybe_quote == delimiter) + if (custom_delimiter.empty() && maybe_quote == delimiter) return; if ((settings.allow_single_quotes && maybe_quote == '\'') || (settings.allow_double_quotes && maybe_quote == '"')) @@ -682,6 +685,42 @@ void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & } else { + /// If custom_delimiter is specified, we should read until first occurrences of + /// custom_delimiter in buffer. + if (!custom_delimiter.empty()) + { + PeekableReadBuffer * peekable_buf = dynamic_cast(&buf); + if (!peekable_buf) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Reading CSV string with custom delimiter is allowed only when using PeekableReadBuffer"); + + while (true) + { + if (peekable_buf->eof()) + throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected EOF while reading CSV string, expected custom delimiter \"{}\"", custom_delimiter); + + char * next_pos = reinterpret_cast(memchr(peekable_buf->position(), custom_delimiter[0], peekable_buf->available())); + if (!next_pos) + next_pos = peekable_buf->buffer().end(); + + appendToStringOrVector(s, *peekable_buf, next_pos); + peekable_buf->position() = next_pos; + + if (!buf.hasPendingData()) + continue; + + { + PeekableReadBufferCheckpoint checkpoint{*peekable_buf, true}; + if (checkString(custom_delimiter, *peekable_buf)) + return; + } + + s.push_back(*peekable_buf->position()); + ++peekable_buf->position(); + } + + return; + } + /// Unquoted case. Look for delimiter or \r or \n. while (!buf.eof()) { @@ -776,6 +815,72 @@ void readCSVField(String & s, ReadBuffer & buf, const FormatSettings::CSV & sett s.push_back(quote); } +void readCSVWithTwoPossibleDelimitersImpl(String & s, PeekableReadBuffer & buf, const String & first_delimiter, const String & second_delimiter) +{ + /// Check that delimiters are not empty. + if (first_delimiter.empty() || second_delimiter.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read CSV field with two possible delimiters, one of delimiters '{}' and '{}' is empty", first_delimiter, second_delimiter); + + /// Read all data until first_delimiter or second_delimiter + while (true) + { + if (buf.eof()) + throw Exception(ErrorCodes::INCORRECT_DATA, R"(Unexpected EOF while reading CSV string, expected on of delimiters "{}" or "{}")", first_delimiter, second_delimiter); + + char * next_pos = buf.position(); + while (next_pos != buf.buffer().end() && *next_pos != first_delimiter[0] && *next_pos != second_delimiter[0]) + ++next_pos; + + appendToStringOrVector(s, buf, next_pos); + buf.position() = next_pos; + if (!buf.hasPendingData()) + continue; + + if (*buf.position() == first_delimiter[0]) + { + PeekableReadBufferCheckpoint checkpoint(buf, true); + if (checkString(first_delimiter, buf)) + return; + } + + if (*buf.position() == second_delimiter[0]) + { + PeekableReadBufferCheckpoint checkpoint(buf, true); + if (checkString(second_delimiter, buf)) + return; + } + + s.push_back(*buf.position()); + ++buf.position(); + } +} + +String readCSVStringWithTwoPossibleDelimiters(PeekableReadBuffer & buf, const FormatSettings::CSV & settings, const String & first_delimiter, const String & second_delimiter) +{ + String res; + + /// If value is quoted, use regular CSV reading since we need to read only data inside quotes. + if (!buf.eof() && ((settings.allow_single_quotes && *buf.position() == '\'') || (settings.allow_double_quotes && *buf.position() == '"'))) + readCSVStringInto(res, buf, settings); + else + readCSVWithTwoPossibleDelimitersImpl(res, buf, first_delimiter, second_delimiter); + + return res; +} + +String readCSVFieldWithTwoPossibleDelimiters(PeekableReadBuffer & buf, const FormatSettings::CSV & settings, const String & first_delimiter, const String & second_delimiter) +{ + String res; + + /// If value is quoted, use regular CSV reading since we need to read only data inside quotes. + if (!buf.eof() && ((settings.allow_single_quotes && *buf.position() == '\'') || (settings.allow_double_quotes && *buf.position() == '"'))) + readCSVField(res, buf, settings); + else + readCSVWithTwoPossibleDelimitersImpl(res, buf, first_delimiter, second_delimiter); + + return res; +} + template void readCSVStringInto>(PaddedPODArray & s, ReadBuffer & buf, const FormatSettings::CSV & settings); template void readCSVStringInto(NullOutput & s, ReadBuffer & buf, const FormatSettings::CSV & settings); diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index f7d5ca9b00c..4225c01bbd4 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -558,9 +558,10 @@ void readStringUntilWhitespace(String & s, ReadBuffer & buf); * - string could be placed in quotes; quotes could be single: ' if FormatSettings::CSV::allow_single_quotes is true * or double: " if FormatSettings::CSV::allow_double_quotes is true; * - or string could be unquoted - this is determined by first character; - * - if string is unquoted, then it is read until next delimiter, - * either until end of line (CR or LF), - * or until end of stream; + * - if string is unquoted, then: + * - If settings.custom_delimiter is not specified, it is read until next settings.delimiter, either until end of line (CR or LF) or until end of stream; + * - If settings.custom_delimiter is specified it reads until first occurrences of settings.custom_delimiter in buffer. + * This works only if provided buffer is PeekableReadBuffer. * but spaces and tabs at begin and end of unquoted string are consumed but ignored (note that this behaviour differs from RFC). * - if string is in quotes, then it will be read until closing quote, * but sequences of two consecutive quotes are parsed as single quote inside string; @@ -570,6 +571,13 @@ void readCSVString(String & s, ReadBuffer & buf, const FormatSettings::CSV & set /// Differ from readCSVString in that it doesn't remove quotes around field if any. void readCSVField(String & s, ReadBuffer & buf, const FormatSettings::CSV & settings); +/// Read string in CSV format until the first occurrence of first_delimiter or second_delimiter. +/// Similar to readCSVString if string is in quotes, we read only data in quotes. +String readCSVStringWithTwoPossibleDelimiters(PeekableReadBuffer & buf, const FormatSettings::CSV & settings, const String & first_delimiter, const String & second_delimiter); + +/// Same as above but includes quotes in the result if any. +String readCSVFieldWithTwoPossibleDelimiters(PeekableReadBuffer & buf, const FormatSettings::CSV & settings, const String & first_delimiter, const String & second_delimiter); + /// Read and append result to array of characters. template void readStringInto(Vector & s, ReadBuffer & buf); diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index b7c3072e4ae..c03f7f07310 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -851,8 +851,12 @@ namespace S3 quoteString(bucket), !uri.empty() ? " (" + uri.toString() + ")" : ""); } + bool isNotFoundError(Aws::S3::S3Errors error) + { + return error == Aws::S3::S3Errors::RESOURCE_NOT_FOUND || error == Aws::S3::S3Errors::NO_SUCH_KEY; + } - S3::ObjectInfo getObjectInfo(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3) + Aws::S3::Model::HeadObjectOutcome headObject(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3) { ProfileEvents::increment(ProfileEvents::S3HeadObject); if (for_disk_s3) @@ -865,7 +869,12 @@ namespace S3 if (!version_id.empty()) req.SetVersionId(version_id); - Aws::S3::Model::HeadObjectOutcome outcome = client.HeadObject(req); + return client.HeadObject(req); + } + + S3::ObjectInfo getObjectInfo(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3) + { + auto outcome = headObject(client, bucket, key, version_id, for_disk_s3); if (outcome.IsSuccess()) { @@ -874,7 +883,10 @@ namespace S3 } else if (throw_on_error) { - throw DB::Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR); + const auto & error = outcome.GetError(); + throw DB::Exception(ErrorCodes::S3_ERROR, + "Failed to HEAD object: {}. HTTP response code: {}", + error.GetMessage(), static_cast(error.GetResponseCode())); } return {}; } @@ -884,6 +896,21 @@ namespace S3 return getObjectInfo(client, bucket, key, version_id, throw_on_error, for_disk_s3).size; } + bool objectExists(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3) + { + auto outcome = headObject(client, bucket, key, version_id, for_disk_s3); + + if (outcome.IsSuccess()) + return true; + + const auto & error = outcome.GetError(); + if (isNotFoundError(error.GetErrorType())) + return false; + + throw S3Exception(error.GetErrorType(), + "Failed to check existence of key {} in bucket {}: {}", + key, bucket, error.GetMessage()); + } } } diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h index 4eb720b3b85..73dc51b980f 100644 --- a/src/IO/S3Common.h +++ b/src/IO/S3Common.h @@ -13,20 +13,17 @@ #include #include #include +#include #include #include #include #include -namespace Aws::S3 -{ - class S3Client; -} - namespace DB { + namespace ErrorCodes { extern const int S3_ERROR; @@ -130,16 +127,22 @@ struct ObjectInfo time_t last_modification_time = 0; }; +bool isNotFoundError(Aws::S3::S3Errors error); + +Aws::S3::Model::HeadObjectOutcome headObject(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id = "", bool for_disk_s3 = false); + S3::ObjectInfo getObjectInfo(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3); size_t getObjectSize(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3); +bool objectExists(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id = "", bool for_disk_s3 = false); + } #endif namespace Poco::Util { -class AbstractConfiguration; + class AbstractConfiguration; }; namespace DB::S3 diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 1cd6a8ab64e..d5ec2eeee39 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -179,17 +179,7 @@ void WriteBufferFromS3::finalizeImpl() { LOG_TRACE(log, "Checking object {} exists after upload", key); - - Aws::S3::Model::HeadObjectRequest request; - request.SetBucket(bucket); - request.SetKey(key); - - ProfileEvents::increment(ProfileEvents::S3HeadObject); - if (write_settings.for_object_storage) - ProfileEvents::increment(ProfileEvents::DiskS3HeadObject); - - auto response = client_ptr->HeadObject(request); - + auto response = S3::headObject(*client_ptr, bucket, key, "", write_settings.for_object_storage); if (!response.IsSuccess()) throw S3Exception(fmt::format("Object {} from bucket {} disappeared immediately after upload, it's a bug in S3 or S3 API.", key, bucket), response.GetError().GetErrorType()); else diff --git a/src/Interpreters/Access/InterpreterCreateRoleQuery.cpp b/src/Interpreters/Access/InterpreterCreateRoleQuery.cpp index d623d510ffd..3386dfb8792 100644 --- a/src/Interpreters/Access/InterpreterCreateRoleQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateRoleQuery.cpp @@ -40,13 +40,18 @@ BlockIO InterpreterCreateRoleQuery::execute() else getContext()->checkAccess(AccessType::CREATE_ROLE); - if (!query.cluster.empty()) - return executeDDLQueryOnCluster(query_ptr, getContext()); - std::optional settings_from_query; if (query.settings) + { settings_from_query = SettingsProfileElements{*query.settings, access_control}; + if (!query.attach) + getContext()->checkSettingsConstraints(*settings_from_query); + } + + if (!query.cluster.empty()) + return executeDDLQueryOnCluster(query_ptr, getContext()); + if (query.alter) { auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr diff --git a/src/Interpreters/Access/InterpreterCreateSettingsProfileQuery.cpp b/src/Interpreters/Access/InterpreterCreateSettingsProfileQuery.cpp index 95c2a58388a..0727b6f2182 100644 --- a/src/Interpreters/Access/InterpreterCreateSettingsProfileQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateSettingsProfileQuery.cpp @@ -48,16 +48,21 @@ BlockIO InterpreterCreateSettingsProfileQuery::execute() else getContext()->checkAccess(AccessType::CREATE_SETTINGS_PROFILE); + std::optional settings_from_query; + if (query.settings) + { + settings_from_query = SettingsProfileElements{*query.settings, access_control}; + + if (!query.attach) + getContext()->checkSettingsConstraints(*settings_from_query); + } + if (!query.cluster.empty()) { query.replaceCurrentUserTag(getContext()->getUserName()); return executeDDLQueryOnCluster(query_ptr, getContext()); } - std::optional settings_from_query; - if (query.settings) - settings_from_query = SettingsProfileElements{*query.settings, access_control}; - std::optional roles_from_query; if (query.to_roles) roles_from_query = RolesOrUsersSet{*query.to_roles, access_control, getContext()->getUserID()}; diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp index 169498e43eb..1aa5cd03530 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -124,12 +124,19 @@ BlockIO InterpreterCreateUserQuery::execute() access->checkAdminOption(role); } } - if (!query.cluster.empty()) - return executeDDLQueryOnCluster(query_ptr, getContext()); + std::optional settings_from_query; if (query.settings) + { settings_from_query = SettingsProfileElements{*query.settings, access_control}; + if (!query.attach) + getContext()->checkSettingsConstraints(*settings_from_query); + } + + if (!query.cluster.empty()) + return executeDDLQueryOnCluster(query_ptr, getContext()); + if (query.alter) { std::optional grantees_from_query; diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 72fa1b3c324..db95b161a4f 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -1107,8 +1107,10 @@ void FileCache::reduceSizeToDownloaded( file_segment->getInfoForLogUnlocked(segment_lock)); } + CreateFileSegmentSettings create_settings{ .is_persistent = file_segment->is_persistent }; + cell->file_segment = std::make_shared( - offset, downloaded_size, key, this, FileSegment::State::DOWNLOADED, CreateFileSegmentSettings{}); + offset, downloaded_size, key, this, FileSegment::State::DOWNLOADED, create_settings); assert(file_segment->reserved_size == downloaded_size); } diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 418bcee05d9..177c6aecf7c 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -56,6 +56,7 @@ FileSegment::FileSegment( { reserved_size = downloaded_size = size_; is_downloaded = true; + chassert(std::filesystem::file_size(getPathInLocalCache()) == size_); break; } case (State::SKIP_CACHE): @@ -331,6 +332,8 @@ void FileSegment::write(const char * from, size_t size, size_t offset) cache_writer->next(); downloaded_size += size; + + chassert(std::filesystem::file_size(getPathInLocalCache()) == downloaded_size); } catch (Exception & e) { @@ -345,9 +348,7 @@ void FileSegment::write(const char * from, size_t size, size_t offset) throw; } -#ifndef NDEBUG chassert(getFirstNonDownloadedOffset() == offset + size); -#endif } FileSegment::State FileSegment::wait() @@ -545,6 +546,13 @@ void FileSegment::completeBasedOnCurrentState(std::lock_guard & cach resetDownloaderUnlocked(segment_lock); } + if (cache_writer && (is_downloader || is_last_holder)) + { + cache_writer->finalize(); + cache_writer.reset(); + remote_file_reader.reset(); + } + switch (download_state) { case State::SKIP_CACHE: @@ -557,8 +565,9 @@ void FileSegment::completeBasedOnCurrentState(std::lock_guard & cach case State::DOWNLOADED: { chassert(getDownloadedSizeUnlocked(segment_lock) == range().size()); - assert(is_downloaded); - assert(!cache_writer); + chassert(getDownloadedSizeUnlocked(segment_lock) == std::filesystem::file_size(getPathInLocalCache())); + chassert(is_downloaded); + chassert(!cache_writer); break; } case State::DOWNLOADING: diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index bfd29de970d..b8b322cdde1 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1254,6 +1254,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const if (select_query_hint && getSettingsRef().use_structure_from_insertion_table_in_table_functions == 2) { const auto * expression_list = select_query_hint->select()->as(); + std::unordered_set virtual_column_names = table_function_ptr->getVirtualsToCheckBeforeUsingStructureHint(); Names columns_names; bool have_asterisk = false; /// First, check if we have only identifiers, asterisk and literals in select expression, @@ -1275,10 +1276,10 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const } } - /// Check that all identifiers are column names from insertion table. + /// Check that all identifiers are column names from insertion table and not virtual column names from storage. for (const auto & column_name : columns_names) { - if (!structure_hint.has(column_name)) + if (!structure_hint.has(column_name) || virtual_column_names.contains(column_name)) { use_columns_from_insert_query = false; break; @@ -1411,6 +1412,11 @@ void Context::applySettingsChanges(const SettingsChanges & changes) } +void Context::checkSettingsConstraints(const SettingsProfileElements & profile_elements) const +{ + getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, profile_elements); +} + void Context::checkSettingsConstraints(const SettingChange & change) const { getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, change); diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 807467959a0..a8d9b43c030 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -110,6 +110,7 @@ class AccessControl; class Credentials; class GSSAcceptorContext; struct SettingsConstraintsAndProfileIDs; +class SettingsProfileElements; class RemoteHostFilter; struct StorageID; class IDisk; @@ -658,6 +659,7 @@ public: void applySettingsChanges(const SettingsChanges & changes); /// Checks the constraints. + void checkSettingsConstraints(const SettingsProfileElements & profile_elements) const; void checkSettingsConstraints(const SettingChange & change) const; void checkSettingsConstraints(const SettingsChanges & changes) const; void checkSettingsConstraints(SettingsChanges & changes) const; diff --git a/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp b/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp index cda91cd4ba1..866d38f3aa5 100644 --- a/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp b/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp index c7397d3d64c..ccbbac71279 100644 --- a/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp b/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp index cb237287dc3..2205d6cff88 100644 --- a/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp index ad3ec9b3189..fb77f0997d8 100644 --- a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp +++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp @@ -112,6 +112,11 @@ QueryPlan && InterpreterSelectQueryAnalyzer::extractQueryPlan() && return std::move(planner).extractQueryPlan(); } +void InterpreterSelectQueryAnalyzer::addStorageLimits(const StorageLimitsList & storage_limits) +{ + planner.addStorageLimits(storage_limits); +} + void InterpreterSelectQueryAnalyzer::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr) const { elem.query_kind = "Select"; diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.h b/src/Interpreters/InterpreterSelectQueryAnalyzer.h index 1dbe5ffccc6..04dfe4e0948 100644 --- a/src/Interpreters/InterpreterSelectQueryAnalyzer.h +++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.h @@ -41,6 +41,8 @@ public: QueryPlan && extractQueryPlan() &&; + void addStorageLimits(const StorageLimitsList & storage_limits); + bool supportsTransactions() const override { return true; } bool ignoreLimits() const override { return select_query_options.ignore_limits; } diff --git a/src/Interpreters/TraceCollector.cpp b/src/Interpreters/TraceCollector.cpp index 050dea02717..367249f1289 100644 --- a/src/Interpreters/TraceCollector.cpp +++ b/src/Interpreters/TraceCollector.cpp @@ -97,6 +97,9 @@ void TraceCollector::run() Int64 size; readPODBinary(size, in); + UInt64 ptr; + readPODBinary(ptr, in); + ProfileEvents::Event event; readPODBinary(event, in); @@ -112,7 +115,7 @@ void TraceCollector::run() UInt64 time = static_cast(ts.tv_sec * 1000000000LL + ts.tv_nsec); UInt64 time_in_microseconds = static_cast((ts.tv_sec * 1000000LL) + (ts.tv_nsec / 1000)); - TraceLogElement element{time_t(time / 1000000000), time_in_microseconds, time, trace_type, thread_id, query_id, trace, size, event, increment}; + TraceLogElement element{time_t(time / 1000000000), time_in_microseconds, time, trace_type, thread_id, query_id, trace, size, ptr, event, increment}; trace_log->add(element); } } diff --git a/src/Interpreters/TraceLog.cpp b/src/Interpreters/TraceLog.cpp index 0408ebe504b..cd5f965a679 100644 --- a/src/Interpreters/TraceLog.cpp +++ b/src/Interpreters/TraceLog.cpp @@ -38,6 +38,7 @@ NamesAndTypesList TraceLogElement::getNamesAndTypes() {"query_id", std::make_shared()}, {"trace", std::make_shared(std::make_shared())}, {"size", std::make_shared()}, + {"ptr", std::make_shared()}, {"event", std::make_shared(std::make_shared())}, {"increment", std::make_shared()}, }; @@ -57,6 +58,7 @@ void TraceLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insertData(query_id.data(), query_id.size()); columns[i++]->insert(trace); columns[i++]->insert(size); + columns[i++]->insert(ptr); String event_name; if (event != ProfileEvents::end()) diff --git a/src/Interpreters/TraceLog.h b/src/Interpreters/TraceLog.h index c481f033a72..71aec0b50c4 100644 --- a/src/Interpreters/TraceLog.h +++ b/src/Interpreters/TraceLog.h @@ -27,8 +27,10 @@ struct TraceLogElement UInt64 thread_id{}; String query_id{}; Array trace{}; - /// Allocation size in bytes for TraceType::Memory. + /// Allocation size in bytes for TraceType::Memory and TraceType::MemorySample. Int64 size{}; + /// Allocation ptr for TraceType::MemorySample. + UInt64 ptr{}; /// ProfileEvent for TraceType::ProfileEvent. ProfileEvents::Event event{ProfileEvents::end()}; /// Increment of profile event for TraceType::ProfileEvent. diff --git a/src/Interpreters/tests/gtest_lru_file_cache.cpp b/src/Interpreters/tests/gtest_lru_file_cache.cpp index 22150b9f656..5bad3e9bba2 100644 --- a/src/Interpreters/tests/gtest_lru_file_cache.cpp +++ b/src/Interpreters/tests/gtest_lru_file_cache.cpp @@ -80,7 +80,7 @@ void complete(const DB::FileSegmentsHolder & holder) { ASSERT_TRUE(file_segment->getOrSetDownloader() == DB::FileSegment::getCallerId()); prepareAndDownload(file_segment); - file_segment->completeWithState(DB::FileSegment::State::DOWNLOADED); + file_segment->completeWithoutState(); } } @@ -127,7 +127,7 @@ TEST(FileCache, get) assertRange(2, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADING); download(segments[0]); - segments[0]->completeWithState(DB::FileSegment::State::DOWNLOADED); + segments[0]->completeWithoutState(); assertRange(3, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADED); } @@ -148,7 +148,7 @@ TEST(FileCache, get) ASSERT_TRUE(segments[1]->getOrSetDownloader() == DB::FileSegment::getCallerId()); prepareAndDownload(segments[1]); - segments[1]->completeWithState(DB::FileSegment::State::DOWNLOADED); + segments[1]->completeWithoutState(); assertRange(6, segments[1], DB::FileSegment::Range(10, 14), DB::FileSegment::State::DOWNLOADED); } @@ -205,7 +205,7 @@ TEST(FileCache, get) ASSERT_TRUE(segments[2]->getOrSetDownloader() == DB::FileSegment::getCallerId()); prepareAndDownload(segments[2]); - segments[2]->completeWithState(DB::FileSegment::State::DOWNLOADED); + segments[2]->completeWithoutState(); assertRange(14, segments[3], DB::FileSegment::Range(17, 20), DB::FileSegment::State::DOWNLOADED); @@ -246,7 +246,7 @@ TEST(FileCache, get) ASSERT_TRUE(segments[3]->getOrSetDownloader() == DB::FileSegment::getCallerId()); prepareAndDownload(segments[3]); - segments[3]->completeWithState(DB::FileSegment::State::DOWNLOADED); + segments[3]->completeWithoutState(); ASSERT_TRUE(segments[3]->state() == DB::FileSegment::State::DOWNLOADED); } @@ -269,8 +269,8 @@ TEST(FileCache, get) ASSERT_TRUE(segments[2]->getOrSetDownloader() == DB::FileSegment::getCallerId()); prepareAndDownload(segments[0]); prepareAndDownload(segments[2]); - segments[0]->completeWithState(DB::FileSegment::State::DOWNLOADED); - segments[2]->completeWithState(DB::FileSegment::State::DOWNLOADED); + segments[0]->completeWithoutState(); + segments[2]->completeWithoutState(); } /// Current cache: [____][_] [][___][__] @@ -292,8 +292,8 @@ TEST(FileCache, get) ASSERT_TRUE(s1[0]->getOrSetDownloader() == DB::FileSegment::getCallerId()); prepareAndDownload(s5[0]); prepareAndDownload(s1[0]); - s5[0]->completeWithState(DB::FileSegment::State::DOWNLOADED); - s1[0]->completeWithState(DB::FileSegment::State::DOWNLOADED); + s5[0]->completeWithoutState(); + s1[0]->completeWithoutState(); /// Current cache: [___] [_][___][_] [__] /// ^ ^ ^ ^ ^ ^ ^ ^ @@ -395,7 +395,7 @@ TEST(FileCache, get) } prepareAndDownload(segments[2]); - segments[2]->completeWithState(DB::FileSegment::State::DOWNLOADED); + segments[2]->completeWithoutState(); ASSERT_TRUE(segments[2]->state() == DB::FileSegment::State::DOWNLOADED); other_1.join(); @@ -460,7 +460,7 @@ TEST(FileCache, get) ASSERT_TRUE(segments_2[1]->getOrSetDownloader() == DB::FileSegment::getCallerId()); prepareAndDownload(segments_2[1]); - segments_2[1]->completeWithState(DB::FileSegment::State::DOWNLOADED); + segments_2[1]->completeWithoutState(); }); { diff --git a/src/Interpreters/threadPoolCallbackRunner.cpp b/src/Interpreters/threadPoolCallbackRunner.cpp deleted file mode 100644 index 9bf32e4f2e1..00000000000 --- a/src/Interpreters/threadPoolCallbackRunner.cpp +++ /dev/null @@ -1,44 +0,0 @@ -#include "threadPoolCallbackRunner.h" - -#include -#include -#include -#include -#include - - -namespace DB -{ - -template ThreadPoolCallbackRunner threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name) -{ - return [pool = &pool, thread_group = CurrentThread::getGroup(), thread_name](std::function && callback, size_t priority) mutable -> std::future - { - auto task = std::make_shared>([thread_group, thread_name, callback = std::move(callback)]() -> Result - { - if (thread_group) - CurrentThread::attachTo(thread_group); - - SCOPE_EXIT_SAFE({ - if (thread_group) - CurrentThread::detachQueryIfNotDetached(); - }); - - setThreadName(thread_name.data()); - - return callback(); - }); - - auto future = task->get_future(); - - /// ThreadPool is using "bigger is higher priority" instead of "smaller is more priority". - pool->scheduleOrThrow([task]{ (*task)(); }, -priority); - - return future; - }; -} - -template ThreadPoolCallbackRunner threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name); -template ThreadPoolCallbackRunner threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name); - -} diff --git a/src/Interpreters/threadPoolCallbackRunner.h b/src/Interpreters/threadPoolCallbackRunner.h index c146ac67482..9cadcc0ebb0 100644 --- a/src/Interpreters/threadPoolCallbackRunner.h +++ b/src/Interpreters/threadPoolCallbackRunner.h @@ -1,9 +1,11 @@ #pragma once #include +#include +#include +#include #include - namespace DB { @@ -13,6 +15,32 @@ using ThreadPoolCallbackRunner = std::function(std::function /// Creates CallbackRunner that runs every callback with 'pool->scheduleOrThrow()'. template -ThreadPoolCallbackRunner threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name); +ThreadPoolCallbackRunner threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name) +{ + return [pool = &pool, thread_group = CurrentThread::getGroup(), thread_name](std::function && callback, size_t priority) mutable -> std::future + { + auto task = std::make_shared>([thread_group, thread_name, callback = std::move(callback)]() -> Result + { + if (thread_group) + CurrentThread::attachTo(thread_group); + + SCOPE_EXIT_SAFE({ + if (thread_group) + CurrentThread::detachQueryIfNotDetached(); + }); + + setThreadName(thread_name.data()); + + return callback(); + }); + + auto future = task->get_future(); + + /// ThreadPool is using "bigger is higher priority" instead of "smaller is more priority". + pool->scheduleOrThrow([task]{ (*task)(); }, -priority); + + return future; + }; +} } diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 0cb4f77035b..d88766f3656 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -365,9 +365,9 @@ void Planner::buildQueryPlanIfNeeded() select_query_info.query = select_query_info.original_query; select_query_info.planner_context = planner_context; - StorageLimitsList storage_limits; - storage_limits.push_back(buildStorageLimits(*query_context, select_query_options)); - select_query_info.storage_limits = std::make_shared(storage_limits); + auto current_storage_limits = storage_limits; + current_storage_limits.push_back(buildStorageLimits(*query_context, select_query_options)); + select_query_info.storage_limits = std::make_shared(std::move(current_storage_limits)); collectTableExpressionData(query_tree, *planner_context); checkStoragesSupportTransactions(planner_context); @@ -847,4 +847,10 @@ void Planner::buildQueryPlanIfNeeded() extendQueryContextAndStoragesLifetime(query_plan, planner_context); } +void Planner::addStorageLimits(const StorageLimitsList & limits) +{ + for (const auto & limit : limits) + storage_limits.push_back(limit); +} + } diff --git a/src/Planner/Planner.h b/src/Planner/Planner.h index de4ed5b92e9..1de3e0efded 100644 --- a/src/Planner/Planner.h +++ b/src/Planner/Planner.h @@ -45,11 +45,14 @@ public: return std::move(query_plan); } + void addStorageLimits(const StorageLimitsList & limits); + private: QueryTreeNodePtr query_tree; QueryPlan query_plan; SelectQueryOptions select_query_options; PlannerContextPtr planner_context; + StorageLimitsList storage_limits; }; } diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp index 16df132b9d8..7583bf72457 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp @@ -12,16 +12,6 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -static FormatSettings updateFormatSettings(const FormatSettings & settings) -{ - if (settings.custom.escaping_rule != FormatSettings::EscapingRule::CSV || settings.custom.field_delimiter.empty()) - return settings; - - auto updated = settings; - updated.csv.delimiter = settings.custom.field_delimiter.front(); - return updated; -} - CustomSeparatedRowInputFormat::CustomSeparatedRowInputFormat( const Block & header_, ReadBuffer & in_buf_, @@ -31,7 +21,7 @@ CustomSeparatedRowInputFormat::CustomSeparatedRowInputFormat( bool ignore_spaces_, const FormatSettings & format_settings_) : CustomSeparatedRowInputFormat( - header_, std::make_unique(in_buf_), params_, with_names_, with_types_, ignore_spaces_, updateFormatSettings(format_settings_)) + header_, std::make_unique(in_buf_), params_, with_names_, with_types_, ignore_spaces_, format_settings_) { } @@ -171,15 +161,31 @@ bool CustomSeparatedFormatReader::checkEndOfRow() } template -String CustomSeparatedFormatReader::readFieldIntoString(bool is_first) +String CustomSeparatedFormatReader::readFieldIntoString(bool is_first, bool is_last, bool is_unknown) { if (!is_first) skipFieldDelimiter(); skipSpaces(); + updateFormatSettings(is_last); if constexpr (is_header) + { + /// If the number of columns is unknown and we use CSV escaping rule, + /// we don't know what delimiter to expect after the value, + /// so we should read until we meet field_delimiter or row_after_delimiter. + if (is_unknown && format_settings.custom.escaping_rule == FormatSettings::EscapingRule::CSV) + return readCSVStringWithTwoPossibleDelimiters( + *buf, format_settings.csv, format_settings.custom.field_delimiter, format_settings.custom.row_after_delimiter); + return readStringByEscapingRule(*buf, format_settings.custom.escaping_rule, format_settings); + } else + { + if (is_unknown && format_settings.custom.escaping_rule == FormatSettings::EscapingRule::CSV) + return readCSVFieldWithTwoPossibleDelimiters( + *buf, format_settings.csv, format_settings.custom.field_delimiter, format_settings.custom.row_after_delimiter); + return readFieldByEscapingRule(*buf, format_settings.custom.escaping_rule, format_settings); + } } template @@ -192,14 +198,14 @@ std::vector CustomSeparatedFormatReader::readRowImpl() { do { - values.push_back(readFieldIntoString(values.empty())); + values.push_back(readFieldIntoString(values.empty(), false, true)); } while (!checkEndOfRow()); columns = values.size(); } else { for (size_t i = 0; i != columns; ++i) - values.push_back(readFieldIntoString(i == 0)); + values.push_back(readFieldIntoString(i == 0, i + 1 == columns, false)); } skipRowEndDelimiter(); @@ -223,9 +229,41 @@ void CustomSeparatedFormatReader::skipHeaderRow() skipRowEndDelimiter(); } -bool CustomSeparatedFormatReader::readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool, const String &) +void CustomSeparatedFormatReader::updateFormatSettings(bool is_last_column) +{ + if (format_settings.custom.escaping_rule != FormatSettings::EscapingRule::CSV) + return; + + /// Clean custom delimiter from previous delimiter. + format_settings.csv.custom_delimiter.clear(); + + /// If delimiter has length = 1, it will be more efficient to use csv.delimiter. + /// If we have some complex delimiter, normal CSV reading will now work properly if we will + /// use just the first character of delimiter (for example, if delimiter='||' and we have data 'abc|d||') + /// We have special implementation for such case that uses custom delimiter, it's not so efficient, + /// but works properly. + + if (is_last_column) + { + /// If field delimiter has length = 1, it will be more efficient to use csv.delimiter. + if (format_settings.custom.row_after_delimiter.size() == 1) + format_settings.csv.delimiter = format_settings.custom.row_after_delimiter.front(); + else + format_settings.csv.custom_delimiter = format_settings.custom.row_after_delimiter; + } + else + { + if (format_settings.custom.field_delimiter.size() == 1) + format_settings.csv.delimiter = format_settings.custom.field_delimiter.front(); + else + format_settings.csv.custom_delimiter = format_settings.custom.field_delimiter; + } +} + +bool CustomSeparatedFormatReader::readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool is_last_file_column, const String &) { skipSpaces(); + updateFormatSettings(is_last_file_column); return deserializeFieldByEscapingRule(type, serialization, column, *buf, format_settings.custom.escaping_rule, format_settings); } @@ -237,6 +275,8 @@ bool CustomSeparatedFormatReader::checkForSuffixImpl(bool check_eof) if (!check_eof) return false; + /// Allow optional \n before eof. + checkChar('\n', *buf); return buf->eof(); } @@ -246,6 +286,8 @@ bool CustomSeparatedFormatReader::checkForSuffixImpl(bool check_eof) if (!check_eof) return true; + /// Allow optional \n before eof. + checkChar('\n', *buf); if (buf->eof()) return true; } @@ -312,7 +354,7 @@ CustomSeparatedSchemaReader::CustomSeparatedSchemaReader( &reader, getDefaultDataTypeForEscapingRule(format_setting_.custom.escaping_rule)) , buf(in_) - , reader(buf, ignore_spaces_, updateFormatSettings(format_setting_)) + , reader(buf, ignore_spaces_, format_setting_) { } diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h index e7e96ab87b1..625278631a5 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h @@ -83,7 +83,9 @@ private: std::vector readRowImpl(); template - String readFieldIntoString(bool is_first); + String readFieldIntoString(bool is_first, bool is_last, bool is_unknown); + + void updateFormatSettings(bool is_last_column); PeekableReadBuffer * buf; bool ignore_spaces; diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp index 76fd0d2a907..1532b16525f 100644 --- a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp @@ -25,6 +25,27 @@ namespace ErrorCodes ErrorCodes::CANNOT_READ_ALL_DATA); } +static void updateFormatSettingsIfNeeded(FormatSettings::EscapingRule escaping_rule, FormatSettings & settings, const ParsedTemplateFormatString & row_format, char default_csv_delimiter, size_t file_column) +{ + if (escaping_rule != FormatSettings::EscapingRule::CSV) + return; + + /// Clean custom_delimiter from previous column. + settings.csv.custom_delimiter.clear(); + /// If field delimiter is empty, we read until default csv delimiter. + if (row_format.delimiters[file_column + 1].empty()) + settings.csv.delimiter = default_csv_delimiter; + /// If field delimiter has length = 1, it will be more efficient to use csv.delimiter. + else if (row_format.delimiters[file_column + 1].size() == 1) + settings.csv.delimiter = row_format.delimiters[file_column + 1].front(); + /// If we have some complex delimiter, normal CSV reading will now work properly if we will + /// use the first character of delimiter (for example, if delimiter='||' and we have data 'abc|d||') + /// We have special implementation for such case that uses custom delimiter, it's not so efficient, + /// but works properly. + else + settings.csv.custom_delimiter = row_format.delimiters[file_column + 1]; +} + TemplateRowInputFormat::TemplateRowInputFormat( const Block & header_, ReadBuffer & in_, @@ -129,10 +150,8 @@ bool TemplateRowInputFormat::deserializeField(const DataTypePtr & type, const SerializationPtr & serialization, IColumn & column, size_t file_column) { EscapingRule escaping_rule = row_format.escaping_rules[file_column]; - if (escaping_rule == EscapingRule::CSV) - /// Will read unquoted string until settings.csv.delimiter - settings.csv.delimiter = row_format.delimiters[file_column + 1].empty() ? default_csv_delimiter : - row_format.delimiters[file_column + 1].front(); + updateFormatSettingsIfNeeded(escaping_rule, settings, row_format, default_csv_delimiter, file_column); + try { return deserializeFieldByEscapingRule(type, serialization, column, *buf, escaping_rule, settings); @@ -466,6 +485,7 @@ TemplateSchemaReader::TemplateSchemaReader( , format(format_) , row_format(row_format_) , format_reader(buf, ignore_spaces_, format, row_format, row_between_delimiter, format_settings) + , default_csv_delimiter(format_settings_.csv.delimiter) { setColumnNames(row_format.column_names); } @@ -489,9 +509,7 @@ DataTypes TemplateSchemaReader::readRowAndGetDataTypes() for (size_t i = 0; i != row_format.columnsCount(); ++i) { format_reader.skipDelimiter(i); - if (row_format.escaping_rules[i] == FormatSettings::EscapingRule::CSV) - format_settings.csv.delimiter = row_format.delimiters[i + 1].empty() ? format_settings.csv.delimiter : row_format.delimiters[i + 1].front(); - + updateFormatSettingsIfNeeded(row_format.escaping_rules[i], format_settings, row_format, default_csv_delimiter, i); field = readFieldByEscapingRule(buf, row_format.escaping_rules[i], format_settings); data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, row_format.escaping_rules[i])); } diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.h b/src/Processors/Formats/Impl/TemplateRowInputFormat.h index 740683ad95d..cf12eb8d136 100644 --- a/src/Processors/Formats/Impl/TemplateRowInputFormat.h +++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.h @@ -128,6 +128,7 @@ private: const ParsedTemplateFormatString row_format; TemplateFormatReader format_reader; bool first_row = true; + const char default_csv_delimiter; }; bool parseDelimiterWithDiagnosticInfo(WriteBuffer & out, ReadBuffer & buf, const String & delimiter, const String & description, bool skip_spaces); diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h index e7dda957b04..b3066f0bdbb 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h @@ -111,7 +111,7 @@ public: protected: ReadBuffer * in; - const FormatSettings format_settings; + FormatSettings format_settings; }; /// Base class for schema inference for formats with -WithNames and -WithNamesAndTypes suffixes. diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 12e1a2989db..39814e9dfd5 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -737,14 +737,20 @@ void TCPHandler::processOrdinaryQueryWithProcessors() auto & pipeline = state.io.pipeline; if (query_context->getSettingsRef().allow_experimental_query_deduplication) + { + std::lock_guard lock(task_callback_mutex); sendPartUUIDs(); + } /// Send header-block, to allow client to prepare output format for data to send. { const auto & header = pipeline.getHeader(); if (header) + { + std::lock_guard lock(task_callback_mutex); sendData(header); + } } { diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index e6c6f02b098..963e874b2a3 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -46,6 +46,17 @@ class MarkCache; class UncompressedCache; class MergeTreeTransaction; + +enum class DataPartRemovalState +{ + NOT_ATTEMPTED, + VISIBLE_TO_TRANSACTIONS, + NON_UNIQUE_OWNERSHIP, + NOT_REACHED_REMOVAL_TIME, + HAS_SKIPPED_MUTATION_PARENT, + REMOVED, +}; + /// Description of the data part. class IMergeTreeDataPart : public std::enable_shared_from_this, public DataPartStorageHolder { @@ -446,6 +457,10 @@ public: void removeDeleteOnDestroyMarker(); void removeVersionMetadata(); + mutable std::atomic removal_state = DataPartRemovalState::NOT_ATTEMPTED; + + mutable std::atomic last_removal_attemp_time = 0; + protected: /// Total size of all columns, calculated once in calcuateColumnSizesOnDisk diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp index 76d69cc6b7d..a833da7064f 100644 --- a/src/Storages/MergeTree/MergeList.cpp +++ b/src/Storages/MergeTree/MergeList.cpp @@ -88,6 +88,10 @@ MergeListElement::MergeListElement( /// thread_group::memory_tracker, but MemoryTrackerThreadSwitcher will reset parent). memory_tracker.setProfilerStep(settings.memory_profiler_step); memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability); + /// Specify sample probability also for current thread to track more deallocations. + if (auto * thread_memory_tracker = DB::CurrentThread::getMemoryTracker()) + thread_memory_tracker->setSampleProbability(settings.memory_profiler_sample_probability); + memory_tracker.setSoftLimit(settings.memory_overcommit_ratio_denominator); if (settings.memory_tracker_fault_probability > 0.0) memory_tracker.setFaultProbability(settings.memory_tracker_fault_probability); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 3841295b875..e60781efa9c 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -84,6 +84,7 @@ #include #include +#include #include #include #include @@ -1762,9 +1763,12 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force) { const DataPartPtr & part = *it; + part->last_removal_attemp_time.store(time_now, std::memory_order_relaxed); + /// Do not remove outdated part if it may be visible for some transaction if (!part->version.canBeRemoved()) { + part->removal_state.store(DataPartRemovalState::VISIBLE_TO_TRANSACTIONS, std::memory_order_relaxed); skipped_parts.push_back(part->info); continue; } @@ -1772,20 +1776,27 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force) /// Grab only parts that are not used by anyone (SELECTs for example). if (!part.unique()) { + part->removal_state.store(DataPartRemovalState::NON_UNIQUE_OWNERSHIP, std::memory_order_relaxed); skipped_parts.push_back(part->info); continue; } auto part_remove_time = part->remove_time.load(std::memory_order_relaxed); - if ((part_remove_time < time_now && time_now - part_remove_time > getSettings()->old_parts_lifetime.totalSeconds() && !has_skipped_mutation_parent(part)) + bool reached_removal_time = part_remove_time < time_now && time_now - part_remove_time > getSettings()->old_parts_lifetime.totalSeconds(); + if ((reached_removal_time && !has_skipped_mutation_parent(part)) || force || isInMemoryPart(part) /// Remove in-memory parts immediately to not store excessive data in RAM || (part->version.creation_csn == Tx::RolledBackCSN && getSettings()->remove_rolled_back_parts_immediately)) { + part->removal_state.store(DataPartRemovalState::REMOVED, std::memory_order_relaxed); parts_to_delete.emplace_back(it); } else { + if (!reached_removal_time) + part->removal_state.store(DataPartRemovalState::NOT_REACHED_REMOVAL_TIME, std::memory_order_relaxed); + else + part->removal_state.store(DataPartRemovalState::HAS_SKIPPED_MUTATION_PARENT, std::memory_order_relaxed); skipped_parts.push_back(part->info); continue; } diff --git a/src/Storages/MergeTree/RPNBuilder.cpp b/src/Storages/MergeTree/RPNBuilder.cpp index d7ea68e7d64..b1d726335ae 100644 --- a/src/Storages/MergeTree/RPNBuilder.cpp +++ b/src/Storages/MergeTree/RPNBuilder.cpp @@ -172,7 +172,7 @@ ColumnWithTypeAndName RPNBuilderTreeNode::getConstantColumn() const if (ast_node) { - const auto * literal = assert_cast(ast_node); + const auto * literal = typeid_cast(ast_node); if (literal) { result.type = applyVisitor(FieldToDataType(), literal->value); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index 10ec4702b53..93724e4946d 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -386,8 +386,13 @@ void ReplicatedMergeTreeRestartingThread::setReadonly(bool on_shutdown) CurrentMetrics::add(CurrentMetrics::ReadonlyReplica); /// Replica was already readonly, but we should decrement the metric, because we are detaching/dropping table. - if (on_shutdown) + /// if first pass wasn't done we don't have to decrement because it wasn't incremented in the first place + /// the task should be deactivated if it's full shutdown so no race is present + if (!first_time && on_shutdown) + { CurrentMetrics::sub(CurrentMetrics::ReadonlyReplica); + assert(CurrentMetrics::get(CurrentMetrics::ReadonlyReplica) >= 0); + } } void ReplicatedMergeTreeRestartingThread::setNotReadonly() @@ -397,7 +402,10 @@ void ReplicatedMergeTreeRestartingThread::setNotReadonly() /// because we don't want to change this metric if replication is started successfully. /// So we should not decrement it when replica stopped being readonly on startup. if (storage.is_readonly.compare_exchange_strong(old_val, false) && !first_time) + { CurrentMetrics::sub(CurrentMetrics::ReadonlyReplica); + assert(CurrentMetrics::get(CurrentMetrics::ReadonlyReplica) >= 0); + } } } diff --git a/src/Storages/NamedCollectionConfiguration.cpp b/src/Storages/NamedCollections/NamedCollectionConfiguration.cpp similarity index 88% rename from src/Storages/NamedCollectionConfiguration.cpp rename to src/Storages/NamedCollections/NamedCollectionConfiguration.cpp index b0e7bdce32a..6875458958b 100644 --- a/src/Storages/NamedCollectionConfiguration.cpp +++ b/src/Storages/NamedCollections/NamedCollectionConfiguration.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -35,19 +35,30 @@ template T getConfigValueOrDefault( return *default_value; } - if constexpr (std::is_same_v) - return config.getString(path); - else if constexpr (std::is_same_v) - return config.getUInt64(path); - else if constexpr (std::is_same_v) - return config.getInt64(path); - else if constexpr (std::is_same_v) - return config.getDouble(path); - else + try + { + if constexpr (std::is_same_v) + return config.getString(path); + else if constexpr (std::is_same_v) + return config.getUInt64(path); + else if constexpr (std::is_same_v) + return config.getInt64(path); + else if constexpr (std::is_same_v) + return config.getDouble(path); + else + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, + "Unsupported type in getConfigValueOrDefault(). " + "Supported types are String, UInt64, Int64, Float64"); + } + catch (const Poco::SyntaxException &) + { throw Exception( - ErrorCodes::NOT_IMPLEMENTED, - "Unsupported type in getConfigValueOrDefault(). " - "Supported types are String, UInt64, Int64, Float64"); + ErrorCodes::BAD_ARGUMENTS, + "Cannot extract {} from {}", + toString(magic_enum::enum_name(Field::TypeToEnum>::value)), + path); + } } template void setConfigValue( diff --git a/src/Storages/NamedCollectionConfiguration.h b/src/Storages/NamedCollections/NamedCollectionConfiguration.h similarity index 100% rename from src/Storages/NamedCollectionConfiguration.h rename to src/Storages/NamedCollections/NamedCollectionConfiguration.h diff --git a/src/Storages/NamedCollectionUtils.cpp b/src/Storages/NamedCollections/NamedCollectionUtils.cpp similarity index 97% rename from src/Storages/NamedCollectionUtils.cpp rename to src/Storages/NamedCollections/NamedCollectionUtils.cpp index 75d5aace664..c4caa5c95f6 100644 --- a/src/Storages/NamedCollectionUtils.cpp +++ b/src/Storages/NamedCollections/NamedCollectionUtils.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -13,8 +13,8 @@ #include #include #include -#include -#include +#include +#include namespace fs = std::filesystem; @@ -78,7 +78,7 @@ public: /// (`enumerate_result` == ). const bool collection_is_empty = enumerate_result.size() == 1 && *enumerate_result.begin() == collection_prefix; - std::set keys; + std::set> keys; if (!collection_is_empty) { /// Skip collection prefix and add +1 to avoid '.' in the beginning. @@ -296,7 +296,7 @@ private: const auto config = NamedCollectionConfiguration::createConfiguration( collection_name, query.changes); - std::set keys; + std::set> keys; for (const auto & [name, _] : query.changes) keys.insert(name); diff --git a/src/Storages/NamedCollectionUtils.h b/src/Storages/NamedCollections/NamedCollectionUtils.h similarity index 100% rename from src/Storages/NamedCollectionUtils.h rename to src/Storages/NamedCollections/NamedCollectionUtils.h diff --git a/src/Storages/NamedCollections.cpp b/src/Storages/NamedCollections/NamedCollections.cpp similarity index 92% rename from src/Storages/NamedCollections.cpp rename to src/Storages/NamedCollections/NamedCollections.cpp index d90225547ac..03633bbd370 100644 --- a/src/Storages/NamedCollections.cpp +++ b/src/Storages/NamedCollections/NamedCollections.cpp @@ -3,8 +3,8 @@ #include #include #include -#include -#include +#include +#include #include #include @@ -234,6 +234,16 @@ public: return keys; } + Keys::const_iterator begin() const + { + return keys.begin(); + } + + Keys::const_iterator end() const + { + return keys.end(); + } + std::string dumpStructure() const { /// Convert a collection config like @@ -375,6 +385,22 @@ NamedCollection::Keys NamedCollection::getKeys() const return pimpl->getKeys(); } +template NamedCollection::const_iterator NamedCollection::begin() const +{ + std::unique_lock lock(mutex, std::defer_lock); + if constexpr (!Locked) + lock.lock(); + return pimpl->begin(); +} + +template NamedCollection::const_iterator NamedCollection::end() const +{ + std::unique_lock lock(mutex, std::defer_lock); + if constexpr (!Locked) + lock.lock(); + return pimpl->end(); +} + std::string NamedCollection::dumpStructure() const { std::lock_guard lock(mutex); @@ -417,4 +443,8 @@ template void NamedCollection::setOrUpdate(const NamedCollection template void NamedCollection::remove(const Key & key); template void NamedCollection::remove(const Key & key); +template NamedCollection::const_iterator NamedCollection::begin() const; +template NamedCollection::const_iterator NamedCollection::begin() const; +template NamedCollection::const_iterator NamedCollection::end() const; +template NamedCollection::const_iterator NamedCollection::end() const; } diff --git a/src/Storages/NamedCollections.h b/src/Storages/NamedCollections/NamedCollections.h similarity index 89% rename from src/Storages/NamedCollections.h rename to src/Storages/NamedCollections/NamedCollections.h index f7181c2b539..53b7a828a63 100644 --- a/src/Storages/NamedCollections.h +++ b/src/Storages/NamedCollections/NamedCollections.h @@ -1,7 +1,7 @@ #pragma once #include -#include -#include +#include +#include namespace Poco { namespace Util { class AbstractConfiguration; } } @@ -22,7 +22,7 @@ class NamedCollection { public: using Key = std::string; - using Keys = std::set; + using Keys = std::set>; using SourceId = NamedCollectionUtils::SourceId; static MutableNamedCollectionPtr create( @@ -49,6 +49,13 @@ public: Keys getKeys() const; + using iterator = typename Keys::iterator; + using const_iterator = typename Keys::const_iterator; + + template const_iterator begin() const; + + template const_iterator end() const; + std::string dumpStructure() const; bool isMutable() const { return is_mutable; } diff --git a/src/Storages/NamedCollections/NamedCollectionsHelpers.cpp b/src/Storages/NamedCollections/NamedCollectionsHelpers.cpp new file mode 100644 index 00000000000..cceabdfd7bf --- /dev/null +++ b/src/Storages/NamedCollections/NamedCollectionsHelpers.cpp @@ -0,0 +1,112 @@ +#include "NamedCollectionsHelpers.h" +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +namespace +{ + NamedCollectionPtr tryGetNamedCollectionFromASTs(ASTs asts) + { + if (asts.empty()) + return nullptr; + + const auto * identifier = asts[0]->as(); + if (!identifier) + return nullptr; + + const auto & collection_name = identifier->name(); + return NamedCollectionFactory::instance().tryGet(collection_name); + } + + std::optional> getKeyValueFromAST(ASTPtr ast) + { + const auto * function = ast->as(); + if (!function || function->name != "equals") + return std::nullopt; + + const auto * function_args_expr = assert_cast(function->arguments.get()); + const auto & function_args = function_args_expr->children; + + if (function_args.size() != 2) + return std::nullopt; + + auto literal_key = evaluateConstantExpressionOrIdentifierAsLiteral( + function_args[0], Context::getGlobalContextInstance()); + auto key = checkAndGetLiteralArgument(literal_key, "key"); + + auto literal_value = evaluateConstantExpressionOrIdentifierAsLiteral( + function_args[1], Context::getGlobalContextInstance()); + auto value = literal_value->as()->value; + + return std::pair{key, value}; + } +} + + +NamedCollectionPtr tryGetNamedCollectionWithOverrides(ASTs asts) +{ + if (asts.empty()) + return nullptr; + + auto collection = tryGetNamedCollectionFromASTs(asts); + if (!collection) + return nullptr; + + if (asts.size() == 1) + return collection; + + auto collection_copy = collection->duplicate(); + + for (const auto & ast : asts) + { + auto value_override = getKeyValueFromAST(ast); + if (!value_override) + continue; + + const auto & [key, value] = *value_override; + collection_copy->setOrUpdate(key, toString(value)); + } + + return collection_copy; +} + +void validateNamedCollection( + const NamedCollection & collection, + const std::unordered_set & required_keys, + const std::unordered_set & optional_keys) +{ + const auto & keys = collection.getKeys(); + for (const auto & key : keys) + { + if (!required_keys.contains(key) && !optional_keys.contains(key)) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Unexpected key `{}` in named collection. Required keys: {}, optional keys: {}", + key, fmt::join(required_keys, ", "), fmt::join(optional_keys, ", ")); + } + } + + for (const auto & key : required_keys) + { + if (!keys.contains(key)) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Key `{}` is required, but not specified. Required keys: {}, optional keys: {}", + key, fmt::join(required_keys, ", "), fmt::join(optional_keys, ", ")); + } + } +} + +} diff --git a/src/Storages/NamedCollections/NamedCollectionsHelpers.h b/src/Storages/NamedCollections/NamedCollectionsHelpers.h new file mode 100644 index 00000000000..39baafa9039 --- /dev/null +++ b/src/Storages/NamedCollections/NamedCollectionsHelpers.h @@ -0,0 +1,18 @@ +#pragma once +#include +#include +#include +#include + + +namespace DB +{ + +NamedCollectionPtr tryGetNamedCollectionWithOverrides(ASTs asts); + +void validateNamedCollection( + const NamedCollection & collection, + const std::unordered_set & required_keys, + const std::unordered_set & optional_keys); + +} diff --git a/src/Storages/NamedCollections_fwd.h b/src/Storages/NamedCollections/NamedCollections_fwd.h similarity index 100% rename from src/Storages/NamedCollections_fwd.h rename to src/Storages/NamedCollections/NamedCollections_fwd.h diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index afd7cf180a9..975ce114e83 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -14,7 +14,6 @@ #include #include -#include #include #include @@ -28,8 +27,14 @@ #include #include #include +#include +#include #include +#include +#include +#include + #include #include @@ -42,7 +47,6 @@ #include #include -#include #include @@ -64,8 +68,6 @@ namespace fs = std::filesystem; -static const String PARTITION_ID_WILDCARD = "{_partition_id}"; - namespace ProfileEvents { extern const Event S3DeleteObjects; @@ -75,6 +77,28 @@ namespace ProfileEvents namespace DB { +static const String PARTITION_ID_WILDCARD = "{_partition_id}"; + +static const std::unordered_set required_configuration_keys = { + "url", +}; +static std::unordered_set optional_configuration_keys = { + "format", + "compression", + "compression_method", + "structure", + "access_key_id", + "secret_access_key", + "filename", + "use_environment_credentials", + "max_single_read_retries", + "min_upload_part_size", + "upload_part_size_multiply_factor", + "upload_part_size_multiply_parts_count_threshold", + "max_single_part_upload_size", + "max_connections", +}; + namespace ErrorCodes { extern const int CANNOT_PARSE_TEXT; @@ -90,9 +114,25 @@ namespace ErrorCodes class IOutputFormat; using OutputFormatPtr = std::shared_ptr; +static void addPathToVirtualColumns(Block & block, const String & path, size_t idx) +{ + if (block.has("_path")) + block.getByName("_path").column->assumeMutableRef().insert(path); + + if (block.has("_file")) + { + auto pos = path.find_last_of('/'); + assert(pos != std::string::npos); + + auto file = path.substr(pos + 1); + block.getByName("_file").column->assumeMutableRef().insert(file); + } + + block.getByName("_idx").column->assumeMutableRef().insert(idx); +} + class StorageS3Source::DisclosedGlobIterator::Impl : WithContext { - public: Impl( const Aws::S3::S3Client & client_, @@ -100,7 +140,7 @@ public: ASTPtr & query_, const Block & virtual_header_, ContextPtr context_, - std::unordered_map * object_infos_, + ObjectInfos * object_infos_, Strings * read_keys_, const S3Settings::RequestSettings & request_settings_) : WithContext(context_) @@ -111,6 +151,8 @@ public: , object_infos(object_infos_) , read_keys(read_keys_) , request_settings(request_settings_) + , list_objects_pool(1) + , list_objects_scheduler(threadPoolCallbackRunner(list_objects_pool, "ListObjects")) { if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos) throw Exception("Expression can not have wildcards inside bucket name", ErrorCodes::UNEXPECTED_EXPRESSION); @@ -120,35 +162,23 @@ public: /// We don't have to list bucket, because there is no asterisks. if (key_prefix.size() == globbed_uri.key.size()) { - buffer.emplace_back(globbed_uri.key); + buffer.emplace_back(globbed_uri.key, std::nullopt); buffer_iter = buffer.begin(); is_finished = true; return; } - /// Create a virtual block with one row to construct filter - if (query && virtual_header) - { - /// Append "key" column as the filter result - virtual_header.insert({ColumnString::create(), std::make_shared(), "_key"}); - - auto block = virtual_header.cloneEmpty(); - MutableColumns columns = block.mutateColumns(); - for (auto & column : columns) - column->insertDefault(); - block.setColumns(std::move(columns)); - VirtualColumnUtils::prepareFilterBlockWithQuery(query, getContext(), block, filter_ast); - } - request.SetBucket(globbed_uri.bucket); request.SetPrefix(key_prefix); + outcome_future = listObjectsAsync(); + matcher = std::make_unique(makeRegexpPatternFromGlobs(globbed_uri.key)); recursive = globbed_uri.key == "/**" ? true : false; fillInternalBufferAssumeLocked(); } - String next() + KeyWithInfo next() { std::lock_guard lock(mutex); return nextAssumeLocked(); @@ -159,9 +189,15 @@ public: return total_size; } -private: + ~Impl() + { + list_objects_pool.wait(); + } - String nextAssumeLocked() +private: + using ListObjectsOutcome = Aws::S3::Model::ListObjectsV2Outcome; + + KeyWithInfo nextAssumeLocked() { if (buffer_iter != buffer.end()) { @@ -174,7 +210,6 @@ private: return {}; fillInternalBufferAssumeLocked(); - return nextAssumeLocked(); } @@ -182,8 +217,9 @@ private: { buffer.clear(); - ProfileEvents::increment(ProfileEvents::S3ListObjects); - outcome = client.ListObjectsV2(request); + assert(outcome_future.valid()); + auto outcome = outcome_future.get(); + if (!outcome.IsSuccess()) throw Exception(ErrorCodes::S3_ERROR, "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}", quoteString(request.GetBucket()), quoteString(request.GetPrefix()), @@ -191,109 +227,136 @@ private: const auto & result_batch = outcome.GetResult().GetContents(); + /// It returns false when all objects were returned + is_finished = !outcome.GetResult().GetIsTruncated(); + + if (!is_finished) + { + /// Even if task is finished the thread may be not freed in pool. + /// So wait until it will be freed before scheduling a new task. + list_objects_pool.wait(); + outcome_future = listObjectsAsync(); + } + + KeysWithInfo temp_buffer; + temp_buffer.reserve(result_batch.size()); + + for (const auto & row : result_batch) + { + String key = row.GetKey(); + if (recursive || re2::RE2::FullMatch(key, *matcher)) + { + S3::ObjectInfo info = + { + .size = size_t(row.GetSize()), + .last_modification_time = row.GetLastModified().Millis() / 1000, + }; + + if (object_infos) + (*object_infos)[fs::path(globbed_uri.bucket) / key] = info; + + temp_buffer.emplace_back(std::move(key), std::move(info)); + } + } + + if (temp_buffer.empty()) + { + buffer_iter = buffer.begin(); + return; + } + + if (!is_initialized) + { + createFilterAST(temp_buffer.front().key); + is_initialized = true; + } + if (filter_ast) { auto block = virtual_header.cloneEmpty(); - MutableColumnPtr path_column; - MutableColumnPtr file_column; - MutableColumnPtr key_column = block.getByName("_key").column->assumeMutable(); - - if (block.has("_path")) - path_column = block.getByName("_path").column->assumeMutable(); - - if (block.has("_file")) - file_column = block.getByName("_file").column->assumeMutable(); - - std::unordered_map all_object_infos; - for (const auto & key_info : result_batch) - { - const String & key = key_info.GetKey(); - if (recursive || re2::RE2::FullMatch(key, *matcher)) - { - String path = fs::path(globbed_uri.bucket) / key; - const size_t key_size = key_info.GetSize(); - - all_object_infos.emplace(path, S3::ObjectInfo{.size = key_size, .last_modification_time = key_info.GetLastModified().Millis() / 1000}); - - if (path_column) - { - path_column->insert(path); - } - if (file_column) - { - String file = path.substr(path.find_last_of('/') + 1); - file_column->insert(file); - } - - key_column->insert(key); - } - } + for (size_t i = 0; i < temp_buffer.size(); ++i) + addPathToVirtualColumns(block, fs::path(globbed_uri.bucket) / temp_buffer[i].key, i); VirtualColumnUtils::filterBlockWithQuery(query, block, getContext(), filter_ast); - const ColumnString & keys = typeid_cast(*block.getByName("_key").column); - size_t rows = block.rows(); - buffer.reserve(rows); - for (size_t i = 0; i < rows; ++i) + const auto & idxs = typeid_cast(*block.getByName("_idx").column); + + buffer.reserve(block.rows()); + for (UInt64 idx : idxs.getData()) { - auto key = keys.getDataAt(i).toString(); - std::string path = fs::path(globbed_uri.bucket) / key; - - const auto & object_info = all_object_infos.at(path); - total_size += object_info.size; - if (object_infos) - object_infos->emplace(path, object_info); - - buffer.emplace_back(key); + total_size += temp_buffer[idx].info->size; + buffer.emplace_back(std::move(temp_buffer[idx])); } } else { - buffer.reserve(result_batch.size()); - for (const auto & key_info : result_batch) - { - String key = key_info.GetKey(); - if (recursive || re2::RE2::FullMatch(key, *matcher)) - { - const size_t key_size = key_info.GetSize(); - total_size += key_size; - if (object_infos) - { - const std::string path = fs::path(globbed_uri.bucket) / key; - (*object_infos)[path] = {.size = key_size, .last_modification_time = key_info.GetLastModified().Millis() / 1000}; - } - buffer.emplace_back(std::move(key)); - } - } + buffer = std::move(temp_buffer); + for (const auto & [_, info] : buffer) + total_size += info->size; } /// Set iterator only after the whole batch is processed buffer_iter = buffer.begin(); - request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); - - /// It returns false when all objects were returned - is_finished = !outcome.GetResult().GetIsTruncated(); - if (read_keys) - read_keys->insert(read_keys->end(), buffer.begin(), buffer.end()); + { + read_keys->reserve(read_keys->size() + buffer.size()); + for (const auto & [key, _] : buffer) + read_keys->push_back(key); + } + } + + void createFilterAST(const String & any_key) + { + if (!query || !virtual_header) + return; + + /// Create a virtual block with one row to construct filter + /// Append "idx" column as the filter result + virtual_header.insert({ColumnUInt64::create(), std::make_shared(), "_idx"}); + + auto block = virtual_header.cloneEmpty(); + addPathToVirtualColumns(block, fs::path(globbed_uri.bucket) / any_key, 0); + VirtualColumnUtils::prepareFilterBlockWithQuery(query, getContext(), block, filter_ast); + } + + std::future listObjectsAsync() + { + return list_objects_scheduler([this] + { + ProfileEvents::increment(ProfileEvents::S3ListObjects); + auto outcome = client.ListObjectsV2(request); + + /// Outcome failure will be handled on the caller side. + if (outcome.IsSuccess()) + request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); + + return outcome; + }, 0); } std::mutex mutex; - Strings buffer; - Strings::iterator buffer_iter; + + KeysWithInfo buffer; + KeysWithInfo::iterator buffer_iter; + Aws::S3::S3Client client; S3::URI globbed_uri; ASTPtr query; Block virtual_header; + bool is_initialized{false}; ASTPtr filter_ast; - Aws::S3::Model::ListObjectsV2Request request; - Aws::S3::Model::ListObjectsV2Outcome outcome; std::unique_ptr matcher; bool recursive{false}; bool is_finished{false}; - std::unordered_map * object_infos; + ObjectInfos * object_infos; Strings * read_keys; + + Aws::S3::Model::ListObjectsV2Request request; S3Settings::RequestSettings request_settings; + + ThreadPool list_objects_pool; + ThreadPoolCallbackRunner list_objects_scheduler; + std::future outcome_future; size_t total_size = 0; }; @@ -303,14 +366,14 @@ StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator( ASTPtr query, const Block & virtual_header, ContextPtr context, - std::unordered_map * object_infos_, + ObjectInfos * object_infos_, Strings * read_keys_, const S3Settings::RequestSettings & request_settings_) : pimpl(std::make_shared(client_, globbed_uri_, query, virtual_header, context, object_infos_, read_keys_, request_settings_)) { } -String StorageS3Source::DisclosedGlobIterator::next() +StorageS3Source::KeyWithInfo StorageS3Source::DisclosedGlobIterator::next() { return pimpl->next(); } @@ -331,24 +394,23 @@ public: ASTPtr query_, const Block & virtual_header_, ContextPtr context_, - std::unordered_map * object_infos_) + ObjectInfos * object_infos_, + Strings * read_keys_) : WithContext(context_) - , keys(keys_) , bucket(bucket_) , query(query_) , virtual_header(virtual_header_) { + Strings all_keys = keys_; + /// Create a virtual block with one row to construct filter - if (query && virtual_header) + if (query && virtual_header && !all_keys.empty()) { - /// Append "key" column as the filter result - virtual_header.insert({ColumnString::create(), std::make_shared(), "_key"}); + /// Append "idx" column as the filter result + virtual_header.insert({ColumnUInt64::create(), std::make_shared(), "_idx"}); auto block = virtual_header.cloneEmpty(); - MutableColumns columns = block.mutateColumns(); - for (auto & column : columns) - column->insertDefault(); - block.setColumns(std::move(columns)); + addPathToVirtualColumns(block, fs::path(bucket) / all_keys.front(), 0); ASTPtr filter_ast; VirtualColumnUtils::prepareFilterBlockWithQuery(query, getContext(), block, filter_ast); @@ -356,71 +418,49 @@ public: if (filter_ast) { block = virtual_header.cloneEmpty(); - MutableColumnPtr path_column; - MutableColumnPtr file_column; - MutableColumnPtr key_column = block.getByName("_key").column->assumeMutable(); - - if (block.has("_path")) - path_column = block.getByName("_path").column->assumeMutable(); - - if (block.has("_file")) - file_column = block.getByName("_file").column->assumeMutable(); - - std::unordered_map all_object_infos; - for (const auto & key : keys) - { - const String path = fs::path(bucket) / key; - - /// To avoid extra requests update total_size only if object_infos != nullptr - /// (which means we eventually need this info anyway, so it should be ok to do it now). - if (object_infos_) - { - auto key_info = S3::getObjectInfo(client_, bucket, key, version_id_, true, false); - all_object_infos.emplace(path, S3::ObjectInfo{.size = key_info.size, .last_modification_time = key_info.last_modification_time}); - } - - if (path_column) - { - path_column->insert(path); - } - if (file_column) - { - const String file = path.substr(path.find_last_of('/') + 1); - file_column->insert(file); - } - key_column->insert(key); - } + for (size_t i = 0; i < all_keys.size(); ++i) + addPathToVirtualColumns(block, fs::path(bucket) / all_keys[i], i); VirtualColumnUtils::filterBlockWithQuery(query, block, getContext(), filter_ast); - const ColumnString & keys_col = typeid_cast(*block.getByName("_key").column); - size_t rows = block.rows(); + const auto & idxs = typeid_cast(*block.getByName("_idx").column); + Strings filtered_keys; - filtered_keys.reserve(rows); - for (size_t i = 0; i < rows; ++i) - { - auto key = keys_col.getDataAt(i).toString(); + filtered_keys.reserve(block.rows()); + for (UInt64 idx : idxs.getData()) + filtered_keys.emplace_back(std::move(all_keys[idx])); - if (object_infos_) - { - std::string path = fs::path(bucket) / key; - const auto & object_info = all_object_infos.at(path); - total_size += object_info.size; - object_infos_->emplace(path, object_info); - } - - filtered_keys.emplace_back(key); - } - - keys = std::move(filtered_keys); + all_keys = std::move(filtered_keys); } } + + if (read_keys_) + *read_keys_ = all_keys; + + for (auto && key : all_keys) + { + std::optional info; + + /// To avoid extra requests update total_size only if object_infos != nullptr + /// (which means we eventually need this info anyway, so it should be ok to do it now) + if (object_infos_) + { + info = S3::getObjectInfo(client_, bucket, key, version_id_, true, false); + total_size += info->size; + + String path = fs::path(bucket) / key; + (*object_infos_)[std::move(path)] = *info; + } + + keys.emplace_back(std::move(key), std::move(info)); + } } - String next() + KeyWithInfo next() { size_t current_index = index.fetch_add(1, std::memory_order_relaxed); if (current_index >= keys.size()) - return ""; + return {}; + return keys[current_index]; } @@ -430,7 +470,7 @@ public: } private: - Strings keys; + KeysWithInfo keys; std::atomic_size_t index = 0; String bucket; @@ -448,13 +488,15 @@ StorageS3Source::KeysIterator::KeysIterator( ASTPtr query, const Block & virtual_header, ContextPtr context, - std::unordered_map * object_infos_) + ObjectInfos * object_infos, + Strings * read_keys) : pimpl(std::make_shared( - client_, version_id_, keys_, bucket_, query, virtual_header, context, object_infos_)) + client_, version_id_, keys_, bucket_, query, + virtual_header, context, object_infos, read_keys)) { } -String StorageS3Source::KeysIterator::next() +StorageS3Source::KeyWithInfo StorageS3Source::KeysIterator::next() { return pimpl->next(); } @@ -487,8 +529,7 @@ StorageS3Source::StorageS3Source( const String & bucket_, const String & version_id_, std::shared_ptr file_iterator_, - const size_t download_thread_num_, - const std::unordered_map & object_infos_) + const size_t download_thread_num_) : ISource(getHeader(sample_block_, requested_virtual_columns_)) , WithContext(context_) , name(std::move(name_)) @@ -505,9 +546,12 @@ StorageS3Source::StorageS3Source( , requested_virtual_columns(requested_virtual_columns_) , file_iterator(file_iterator_) , download_thread_num(download_thread_num_) - , object_infos(object_infos_) + , create_reader_pool(1) + , create_reader_scheduler(threadPoolCallbackRunner(create_reader_pool, "CreateS3Reader")) { - initialize(); + reader = createReader(); + if (reader) + reader_future = createReaderAsync(); } @@ -519,17 +563,21 @@ void StorageS3Source::onCancel() } -bool StorageS3Source::initialize() +StorageS3Source::ReaderHolder StorageS3Source::createReader() { - String current_key = (*file_iterator)(); + auto [current_key, info] = (*file_iterator)(); if (current_key.empty()) - return false; + return {}; - file_path = fs::path(bucket) / current_key; + size_t object_size = info + ? info->size + : S3::getObjectSize(*client, bucket, current_key, version_id, true, false); int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); - read_buf = wrapReadBufferWithCompressionMethod( - createS3ReadBuffer(current_key), chooseCompressionMethod(current_key, compression_hint), zstd_window_log_max); + auto read_buf = wrapReadBufferWithCompressionMethod( + createS3ReadBuffer(current_key, object_size), + chooseCompressionMethod(current_key, compression_hint), + zstd_window_log_max); auto input_format = getContext()->getInputFormat(format, *read_buf, sample_block, max_block_size, format_settings); QueryPipelineBuilder builder; @@ -542,32 +590,36 @@ bool StorageS3Source::initialize() { return std::make_shared(header, columns_desc, *input_format, getContext()); }); } - pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); - reader = std::make_unique(*pipeline); + auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); + auto current_reader = std::make_unique(*pipeline); - return true; + return ReaderHolder{fs::path(bucket) / current_key, std::move(read_buf), std::move(pipeline), std::move(current_reader)}; } -std::unique_ptr StorageS3Source::createS3ReadBuffer(const String & key) +std::future StorageS3Source::createReaderAsync() { - size_t object_size; - auto it = object_infos.find(fs::path(bucket) / key); - if (it != object_infos.end()) - object_size = it->second.size; - else - object_size = DB::S3::getObjectSize(*client, bucket, key, version_id, false, false); + return create_reader_scheduler([this] { return createReader(); }, 0); +} + +std::unique_ptr StorageS3Source::createS3ReadBuffer(const String & key, size_t object_size) +{ + auto read_settings = getContext()->getReadSettings().adjustBufferSize(object_size); + read_settings.enable_filesystem_cache = false; auto download_buffer_size = getContext()->getSettings().max_download_buffer_size; const bool use_parallel_download = download_buffer_size > 0 && download_thread_num > 1; const bool object_too_small = object_size < download_thread_num * download_buffer_size; + if (!use_parallel_download || object_too_small) { LOG_TRACE(log, "Downloading object of size {} from S3 in single thread", object_size); - return std::make_unique(client, bucket, key, version_id, request_settings, getContext()->getReadSettings()); + if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) + return createAsyncS3ReadBuffer(key, read_settings, object_size); + + return std::make_unique(client, bucket, key, version_id, request_settings, read_settings); } assert(object_size > 0); - if (download_buffer_size < DBMS_DEFAULT_BUFFER_SIZE) { LOG_WARNING(log, "Downloading buffer {} bytes too small, set at least {} bytes", download_buffer_size, DBMS_DEFAULT_BUFFER_SIZE); @@ -575,13 +627,55 @@ std::unique_ptr StorageS3Source::createS3ReadBuffer(const String & k } auto factory = std::make_unique( - client, bucket, key, version_id, download_buffer_size, object_size, request_settings, getContext()->getReadSettings()); - LOG_TRACE( - log, "Downloading from S3 in {} threads. Object size: {}, Range size: {}.", download_thread_num, object_size, download_buffer_size); + client, bucket, key, version_id, download_buffer_size, object_size, request_settings, read_settings); + + LOG_TRACE(log, + "Downloading from S3 in {} threads. Object size: {}, Range size: {}.", + download_thread_num, object_size, download_buffer_size); return std::make_unique(std::move(factory), threadPoolCallbackRunner(IOThreadPool::get(), "S3ParallelRead"), download_thread_num); } +std::unique_ptr StorageS3Source::createAsyncS3ReadBuffer( + const String & key, const ReadSettings & read_settings, size_t object_size) +{ + auto read_buffer_creator = + [this, read_settings] + (const std::string & path, size_t read_until_position) -> std::shared_ptr + { + return std::make_shared( + client, + bucket, + path, + version_id, + request_settings, + read_settings, + /* use_external_buffer */true, + /* offset */0, + read_until_position, + /* restricted_seek */true); + }; + + auto s3_impl = std::make_unique( + std::move(read_buffer_creator), + StoredObjects{StoredObject{key, object_size}}, + read_settings); + + auto & pool_reader = getContext()->getThreadPoolReader(Context::FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER); + auto async_reader = std::make_unique(pool_reader, read_settings, std::move(s3_impl)); + + async_reader->setReadUntilEnd(); + if (read_settings.remote_fs_prefetch) + async_reader->prefetch(); + + return async_reader; +} + +StorageS3Source::~StorageS3Source() +{ + create_reader_pool.wait(); +} + String StorageS3Source::getName() const { return name; @@ -599,11 +693,12 @@ Chunk StorageS3Source::generate() { UInt64 num_rows = chunk.getNumRows(); - auto it = object_infos.find(file_path); - if (num_rows && it != object_infos.end()) + const auto & file_path = reader.getPath(); + size_t total_size = file_iterator->getTotalSize(); + if (num_rows && total_size) { updateRowsProgressApprox( - *this, chunk, file_iterator->getTotalSize(), total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); + *this, chunk, total_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); } for (const auto & virtual_column : requested_virtual_columns) @@ -625,52 +720,22 @@ Chunk StorageS3Source::generate() { std::lock_guard lock(reader_mutex); - reader.reset(); - pipeline.reset(); - read_buf.reset(); - if (!initialize()) + assert(reader_future.valid()); + reader = reader_future.get(); + + if (!reader) break; + + /// Even if task is finished the thread may be not freed in pool. + /// So wait until it will be freed before scheduling a new task. + create_reader_pool.wait(); + reader_future = createReaderAsync(); } } return {}; } -static bool checkIfObjectExists(const std::shared_ptr & client, const String & bucket, const String & key) -{ - bool is_finished = false; - Aws::S3::Model::ListObjectsV2Request request; - Aws::S3::Model::ListObjectsV2Outcome outcome; - - request.SetBucket(bucket); - request.SetPrefix(key); - while (!is_finished) - { - ProfileEvents::increment(ProfileEvents::S3ListObjects); - outcome = client->ListObjectsV2(request); - if (!outcome.IsSuccess()) - throw Exception( - ErrorCodes::S3_ERROR, - "Could not list objects in bucket {} with key {}, S3 exception: {}, message: {}", - quoteString(bucket), - quoteString(key), - backQuote(outcome.GetError().GetExceptionName()), - quoteString(outcome.GetError().GetMessage())); - - const auto & result_batch = outcome.GetResult().GetContents(); - for (const auto & obj : result_batch) - { - if (obj.GetKey() == key) - return true; - } - - request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); - is_finished = !outcome.GetResult().GetIsTruncated(); - } - - return false; -} - class StorageS3Sink : public SinkToStorage { public: @@ -880,7 +945,9 @@ StorageS3::StorageS3( distributed_processing_, is_key_with_globs, format_settings, - context_); + context_, + &object_infos); + storage_metadata.setColumns(columns); } else @@ -908,7 +975,7 @@ std::shared_ptr StorageS3::createFileIterator( ContextPtr local_context, ASTPtr query, const Block & virtual_block, - std::unordered_map * object_infos, + ObjectInfos * object_infos, Strings * read_keys) { if (distributed_processing) @@ -924,11 +991,10 @@ std::shared_ptr StorageS3::createFileIterator( } else { - if (read_keys) - *read_keys = keys; - return std::make_shared( - *s3_configuration.client, s3_configuration.uri.version_id, keys, s3_configuration.uri.bucket, query, virtual_block, local_context, object_infos); + *s3_configuration.client, s3_configuration.uri.version_id, keys, + s3_configuration.uri.bucket, query, virtual_block, local_context, + object_infos, read_keys); } } @@ -1020,9 +1086,9 @@ Pipe StorageS3::read( s3_configuration.uri.bucket, s3_configuration.uri.version_id, iterator_wrapper, - max_download_threads, - object_infos)); + max_download_threads)); } + auto pipe = Pipe::unitePipes(std::move(pipes)); narrowPipe(pipe, num_streams); @@ -1061,7 +1127,7 @@ SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr bool truncate_in_insert = local_context->getSettingsRef().s3_truncate_on_insert; - if (!truncate_in_insert && checkIfObjectExists(s3_configuration.client, s3_configuration.uri.bucket, keys.back())) + if (!truncate_in_insert && S3::objectExists(*s3_configuration.client, s3_configuration.uri.bucket, keys.back(), s3_configuration.uri.version_id)) { if (local_context->getSettingsRef().s3_create_new_file_on_insert) { @@ -1073,7 +1139,7 @@ SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr new_key = keys[0].substr(0, pos) + "." + std::to_string(index) + (pos == std::string::npos ? "" : keys[0].substr(pos)); ++index; } - while (checkIfObjectExists(s3_configuration.client, s3_configuration.uri.bucket, new_key)); + while (S3::objectExists(*s3_configuration.client, s3_configuration.uri.bucket, new_key, s3_configuration.uri.version_id)); keys.push_back(new_key); } else @@ -1097,7 +1163,6 @@ SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr } } - void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &) { updateS3Configuration(local_context, s3_configuration); @@ -1175,48 +1240,60 @@ void StorageS3::updateS3Configuration(ContextPtr ctx, StorageS3::S3Configuration upd.auth_settings.use_insecure_imds_request.value_or(ctx->getConfigRef().getBool("s3.use_insecure_imds_request", false))); } - -void StorageS3::processNamedCollectionResult(StorageS3Configuration & configuration, const std::vector> & key_value_args) +void StorageS3::processNamedCollectionResult(StorageS3Configuration & configuration, const NamedCollection & collection) { - for (const auto & [arg_name, arg_value] : key_value_args) - { - if (arg_name == "access_key_id") - configuration.auth_settings.access_key_id = checkAndGetLiteralArgument(arg_value, "access_key_id"); - else if (arg_name == "secret_access_key") - configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument(arg_value, "secret_access_key"); - else if (arg_name == "filename") - configuration.url = std::filesystem::path(configuration.url) / checkAndGetLiteralArgument(arg_value, "filename"); - else if (arg_name == "use_environment_credentials") - configuration.auth_settings.use_environment_credentials = checkAndGetLiteralArgument(arg_value, "use_environment_credentials"); - else if (arg_name == "max_single_read_retries") - configuration.request_settings.max_single_read_retries = checkAndGetLiteralArgument(arg_value, "max_single_read_retries"); - else if (arg_name == "min_upload_part_size") - configuration.request_settings.min_upload_part_size = checkAndGetLiteralArgument(arg_value, "min_upload_part_size"); - else if (arg_name == "upload_part_size_multiply_factor") - configuration.request_settings.upload_part_size_multiply_factor = checkAndGetLiteralArgument(arg_value, "upload_part_size_multiply_factor"); - else if (arg_name == "upload_part_size_multiply_parts_count_threshold") - configuration.request_settings.upload_part_size_multiply_parts_count_threshold = checkAndGetLiteralArgument(arg_value, "upload_part_size_multiply_parts_count_threshold"); - else if (arg_name == "max_single_part_upload_size") - configuration.request_settings.max_single_part_upload_size = checkAndGetLiteralArgument(arg_value, "max_single_part_upload_size"); - else if (arg_name == "max_connections") - configuration.request_settings.max_connections = checkAndGetLiteralArgument(arg_value, "max_connections"); - else - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Unknown key-value argument `{}` for StorageS3, expected: url, [access_key_id, secret_access_key], name of used format and [compression_method].", - arg_name); - } -} + validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); + std::string filename; + for (const auto & key : collection) + { + if (key == "url") + configuration.url = collection.get(key); + else if (key == "access_key_id") + configuration.auth_settings.access_key_id = collection.get(key); + else if (key == "secret_access_key") + configuration.auth_settings.secret_access_key = collection.get(key); + else if (key == "filename") + filename = collection.get(key); + else if (key == "format") + configuration.format = collection.get(key); + else if (key == "compression" || key == "compression_method") + configuration.compression_method = collection.get(key); + else if (key == "structure") + configuration.structure = collection.get(key); + else if (key == "use_environment_credentials") + configuration.auth_settings.use_environment_credentials = collection.get(key); + else if (key == "max_single_read_retries") + configuration.request_settings.max_single_read_retries = collection.get(key); + else if (key == "min_upload_part_size") + configuration.request_settings.min_upload_part_size = collection.get(key); + else if (key == "upload_part_size_multiply_factor") + configuration.request_settings.upload_part_size_multiply_factor = collection.get(key); + else if (key == "upload_part_size_multiply_parts_count_threshold") + configuration.request_settings.upload_part_size_multiply_parts_count_threshold = collection.get(key); + else if (key == "max_single_part_upload_size") + configuration.request_settings.max_single_part_upload_size = collection.get(key); + else if (key == "max_connections") + configuration.request_settings.max_connections = collection.get(key); + else + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Unknown configuration key `{}` for StorageS3, " + "expected: url, [access_key_id, secret_access_key], " + "name of used format and [compression_method].", + key); + } + if (!filename.empty()) + configuration.url = std::filesystem::path(configuration.url) / filename; +} StorageS3Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPtr local_context) { StorageS3Configuration configuration; - if (auto named_collection = getURLBasedDataSourceConfiguration(engine_args, local_context)) + if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args)) { - auto [common_configuration, storage_specific_args] = named_collection.value(); - configuration.set(common_configuration); - processNamedCollectionResult(configuration, storage_specific_args); + processNamedCollectionResult(configuration, *named_collection); } else { @@ -1270,7 +1347,7 @@ ColumnsDescription StorageS3::getTableStructureFromData( bool distributed_processing, const std::optional & format_settings, ContextPtr ctx, - std::unordered_map * object_infos) + ObjectInfos * object_infos) { S3Configuration s3_configuration{ configuration.url, @@ -1293,12 +1370,17 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl( bool is_key_with_globs, const std::optional & format_settings, ContextPtr ctx, - std::unordered_map * object_infos) + ObjectInfos * object_infos) { std::vector read_keys; - auto file_iterator - = createFileIterator(s3_configuration, {s3_configuration.uri.key}, is_key_with_globs, distributed_processing, ctx, nullptr, {}, object_infos, &read_keys); + auto file_iterator = createFileIterator( + s3_configuration, + {s3_configuration.uri.key}, + is_key_with_globs, + distributed_processing, + ctx, nullptr, + {}, object_infos, &read_keys); std::optional columns_from_cache; size_t prev_read_keys_size = read_keys.size(); @@ -1307,7 +1389,7 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl( ReadBufferIterator read_buffer_iterator = [&, first = true](ColumnsDescription & cached_columns) mutable -> std::unique_ptr { - auto key = (*file_iterator)(); + auto [key, _] = (*file_iterator)(); if (key.empty()) { @@ -1448,7 +1530,7 @@ std::optional StorageS3::tryGetColumnsFromCache( const Strings::const_iterator & begin, const Strings::const_iterator & end, const S3Configuration & s3_configuration, - std::unordered_map * object_infos, + ObjectInfos * object_infos, const String & format_name, const std::optional & format_settings, const ContextPtr & ctx) @@ -1476,6 +1558,7 @@ std::optional StorageS3::tryGetColumnsFromCache( if (info.last_modification_time) return info.last_modification_time; + return std::nullopt; }; diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index fa58ff9809e..671610173bd 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -12,11 +12,13 @@ #include #include +#include #include #include #include #include #include +#include #include #include @@ -30,17 +32,34 @@ namespace DB class PullingPipelineExecutor; class StorageS3SequentialSource; +class NamedCollection; + class StorageS3Source : public ISource, WithContext { public: + + struct KeyWithInfo + { + KeyWithInfo() = default; + KeyWithInfo(String key_, std::optional info_) + : key(std::move(key_)), info(std::move(info_)) + { + } + + String key; + std::optional info; + }; + + using KeysWithInfo = std::vector; + using ObjectInfos = std::unordered_map; class IIterator { public: virtual ~IIterator() = default; - virtual String next() = 0; + virtual KeyWithInfo next() = 0; virtual size_t getTotalSize() const = 0; - String operator ()() { return next(); } + KeyWithInfo operator ()() { return next(); } }; class DisclosedGlobIterator : public IIterator @@ -52,12 +71,11 @@ public: ASTPtr query, const Block & virtual_header, ContextPtr context, - std::unordered_map * object_infos = nullptr, + ObjectInfos * object_infos = nullptr, Strings * read_keys_ = nullptr, const S3Settings::RequestSettings & request_settings_ = {}); - String next() override; - + KeyWithInfo next() override; size_t getTotalSize() const override; private: @@ -77,10 +95,10 @@ public: ASTPtr query, const Block & virtual_header, ContextPtr context, - std::unordered_map * object_infos = nullptr); - - String next() override; + ObjectInfos * object_infos = nullptr, + Strings * read_keys = nullptr); + KeyWithInfo next() override; size_t getTotalSize() const override; private: @@ -94,7 +112,7 @@ public: public: explicit ReadTaskIterator(const ReadTaskCallback & callback_) : callback(callback_) {} - String next() override { return callback(); } + KeyWithInfo next() override { return {callback(), {}}; } size_t getTotalSize() const override { return 0; } @@ -119,8 +137,9 @@ public: const String & bucket, const String & version_id, std::shared_ptr file_iterator_, - size_t download_thread_num, - const std::unordered_map & object_infos_); + size_t download_thread_num); + + ~StorageS3Source() override; String getName() const override; @@ -132,7 +151,6 @@ private: String name; String bucket; String version_id; - String file_path; String format; ColumnsDescription columns_desc; UInt64 max_block_size; @@ -142,10 +160,37 @@ private: Block sample_block; std::optional format_settings; + struct ReaderHolder + { + public: + ReaderHolder( + String path_, + std::unique_ptr read_buf_, + std::unique_ptr pipeline_, + std::unique_ptr reader_) + : path(std::move(path_)) + , read_buf(std::move(read_buf_)) + , pipeline(std::move(pipeline_)) + , reader(std::move(reader_)) + { + } + + ReaderHolder() = default; + + explicit operator bool() const { return reader != nullptr; } + PullingPipelineExecutor * operator->() { return reader.get(); } + const PullingPipelineExecutor * operator->() const { return reader.get(); } + const String & getPath() const { return path; } + + private: + String path; + std::unique_ptr read_buf; + std::unique_ptr pipeline; + std::unique_ptr reader; + }; + + ReaderHolder reader; - std::unique_ptr read_buf; - std::unique_ptr pipeline; - std::unique_ptr reader; /// onCancel and generate can be called concurrently std::mutex reader_mutex; std::vector requested_virtual_columns; @@ -154,16 +199,20 @@ private: Poco::Logger * log = &Poco::Logger::get("StorageS3Source"); + ThreadPool create_reader_pool; + ThreadPoolCallbackRunner create_reader_scheduler; + std::future reader_future; + UInt64 total_rows_approx_max = 0; size_t total_rows_count_times = 0; UInt64 total_rows_approx_accumulated = 0; - std::unordered_map object_infos; - /// Recreate ReadBuffer and Pipeline for each file. - bool initialize(); + ReaderHolder createReader(); + std::future createReaderAsync(); - std::unique_ptr createS3ReadBuffer(const String & key); + std::unique_ptr createS3ReadBuffer(const String & key, size_t object_size); + std::unique_ptr createAsyncS3ReadBuffer(const String & key, const ReadSettings & read_settings, size_t object_size); }; /** @@ -209,14 +258,16 @@ public: static StorageS3Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context); + using ObjectInfos = StorageS3Source::ObjectInfos; + static ColumnsDescription getTableStructureFromData( const StorageS3Configuration & configuration, bool distributed_processing, const std::optional & format_settings, ContextPtr ctx, - std::unordered_map * object_infos = nullptr); + ObjectInfos * object_infos = nullptr); - static void processNamedCollectionResult(StorageS3Configuration & configuration, const std::vector> & key_value_args); + static void processNamedCollectionResult(StorageS3Configuration & configuration, const NamedCollection & collection); struct S3Configuration { @@ -266,7 +317,7 @@ private: ASTPtr partition_by; bool is_key_with_globs = false; - std::unordered_map object_infos; + ObjectInfos object_infos; static void updateS3Configuration(ContextPtr, S3Configuration &); @@ -278,7 +329,7 @@ private: ContextPtr local_context, ASTPtr query, const Block & virtual_block, - std::unordered_map * object_infos = nullptr, + ObjectInfos * object_infos = nullptr, Strings * read_keys = nullptr); static ColumnsDescription getTableStructureFromDataImpl( @@ -289,7 +340,7 @@ private: bool is_key_with_globs, const std::optional & format_settings, ContextPtr ctx, - std::unordered_map * object_infos = nullptr); + ObjectInfos * object_infos = nullptr); bool supportsSubcolumns() const override; @@ -299,7 +350,7 @@ private: const Strings::const_iterator & begin, const Strings::const_iterator & end, const S3Configuration & s3_configuration, - std::unordered_map * object_infos, + ObjectInfos * object_infos, const String & format_name, const std::optional & format_settings, const ContextPtr & ctx); diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index b6d714cc1cc..86eb5a2702c 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -102,7 +102,8 @@ Pipe StorageS3Cluster::read( auto iterator = std::make_shared( *s3_configuration.client, s3_configuration.uri, query_info.query, virtual_block, context); - auto callback = std::make_shared>([iterator]() mutable -> String { return iterator->next(); }); + + auto callback = std::make_shared>([iterator]() mutable -> String { return iterator->next().key; }); /// Calculate the header. This is significant, because some columns could be thrown away in some cases like query with count(*) auto interpreter = InterpreterSelectQuery(query_info.query, context, SelectQueryOptions(processed_stage).analyze()); diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index a55d7ad3c09..4f3003e68b0 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -123,9 +124,19 @@ void StorageView::read( } auto options = SelectQueryOptions(QueryProcessingStage::Complete, 0, false, query_info.settings_limit_offset_done); - InterpreterSelectWithUnionQuery interpreter(current_inner_query, context, options, column_names); - interpreter.addStorageLimits(*query_info.storage_limits); - interpreter.buildQueryPlan(query_plan); + + if (context->getSettingsRef().allow_experimental_analyzer) + { + InterpreterSelectQueryAnalyzer interpreter(current_inner_query, options, context); + interpreter.addStorageLimits(*query_info.storage_limits); + query_plan = std::move(interpreter).extractQueryPlan(); + } + else + { + InterpreterSelectWithUnionQuery interpreter(current_inner_query, context, options, column_names); + interpreter.addStorageLimits(*query_info.storage_limits); + interpreter.buildQueryPlan(query_plan); + } /// It's expected that the columns read from storage are not constant. /// Because method 'getSampleBlockForColumns' is used to obtain a structure of result in InterpreterSelectQuery. diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index f5b6829c7ef..f69f9f8ee7f 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -94,6 +94,7 @@ const char * auto_contributors[] { "Aliaksandr Shylau", "Alina Terekhova", "Amesaru", + "Amila Welihinda", "Amir Vaza", "Amos Bird", "Amr Alaa", @@ -174,6 +175,7 @@ const char * auto_contributors[] { "Avogar", "Azat Khuzhin", "BSD_Conqueror", + "BSWaterB", "Babacar Diassé", "Bakhtiyor Ruziev", "BanyRule", @@ -186,6 +188,7 @@ const char * auto_contributors[] { "Bharat Nallan", "Bharat Nallan Chakravarthy", "Big Elephant", + "BigRedEye", "Bill", "BiteTheDDDDt", "BlahGeek", @@ -203,6 +206,7 @@ const char * auto_contributors[] { "Brett Hoerner", "Brian Hunter", "Bulat Gaifullin", + "Camden Cheek", "Camilo Sierra", "Carbyn", "Carlos Rodríguez Hernández", @@ -291,6 +295,7 @@ const char * auto_contributors[] { "Eldar Zaitov", "Elena", "Elena Baskakova", + "Elena Torró", "Elghazal Ahmed", "Elizaveta Mironyuk", "Elykov Alexandr", @@ -525,6 +530,7 @@ const char * auto_contributors[] { "Maksim Kita", "Mallik Hassan", "Malte", + "Manuel de la Peña", "Marat IDRISOV", "Marcelo Rodriguez", "Marek Vavrusa", @@ -534,6 +540,7 @@ const char * auto_contributors[] { "Mark Andreev", "Mark Frost", "Mark Papadakis", + "Mark Polokhov", "Maroun Maroun", "Marquitos", "Marsel Arduanov", @@ -709,6 +716,7 @@ const char * auto_contributors[] { "Quanfa Fu", "Quid37", "Radistka-75", + "Raevsky Rudolf", "Rafael Acevedo", "Rafael David Tinoco", "Rajkumar", @@ -779,6 +787,7 @@ const char * auto_contributors[] { "Sergey Mirvoda", "Sergey Ryzhkov", "Sergey Shtykov", + "Sergey Skvortsov", "Sergey Tulentsev", "Sergey V. Galtsev", "Sergey Zaikin", @@ -790,6 +799,7 @@ const char * auto_contributors[] { "Sherry Wang", "Shoh Jahon", "SiderZhang", + "Sidorov Pavel", "Silviu Caragea", "Simeon Emanuilov", "Simon Liu", @@ -878,6 +888,7 @@ const char * auto_contributors[] { "Viktor Taranenko", "Vincent Bernat", "Vitalii S", + "Vitaliy", "Vitaliy Fedorchenko", "Vitaliy Karnienko", "Vitaliy Kozlovskiy", @@ -922,6 +933,7 @@ const char * auto_contributors[] { "Weiqing Xu", "William Shallum", "Winter Zhang", + "Xbitz29", "XenoAmess", "Xianda Ke", "Xiang Zhou", @@ -1013,6 +1025,7 @@ const char * auto_contributors[] { "benbiti", "bgranvea", "bharatnc", + "bit-ranger", "bkuschel", "blazerer", "bluebirddm", @@ -1238,6 +1251,7 @@ const char * auto_contributors[] { "luc1ph3r", "lulichao", "luocongkai", + "lzydmxy", "m-ves", "madianjun", "maiha", @@ -1313,6 +1327,7 @@ const char * auto_contributors[] { "peter279k", "philip.han", "pingyu", + "pkubaj", "potya", "presto53", "proller", @@ -1378,6 +1393,7 @@ const char * auto_contributors[] { "taiyang-li", "tangjiangling", "tao jiang", + "taofengliu", "taojiatao", "tavplubix", "tchepavel", @@ -1394,6 +1410,7 @@ const char * auto_contributors[] { "turbo jason", "tyrionhuang", "ubuntu", + "unbyte", "unegare", "unknown", "urgordeadbeef", @@ -1481,6 +1498,7 @@ const char * auto_contributors[] { "Дмитрий Канатников", "Иванов Евгений", "Илья Исаев", + "Коренберг ☢️ Марк", "Павел Литвиненко", "Смитюх Вячеслав", "Сундуков Алексей", diff --git a/src/Storages/System/StorageSystemDatabases.cpp b/src/Storages/System/StorageSystemDatabases.cpp index 2353be9b69f..432d2c4ac64 100644 --- a/src/Storages/System/StorageSystemDatabases.cpp +++ b/src/Storages/System/StorageSystemDatabases.cpp @@ -46,7 +46,7 @@ static String getEngineFull(const DatabasePtr & database) break; /// Database was dropped - if (!locked_database && name == database->getDatabaseName()) + if (name == database->getDatabaseName()) return {}; guard.reset(); diff --git a/src/Storages/System/StorageSystemNamedCollections.cpp b/src/Storages/System/StorageSystemNamedCollections.cpp index 6f4078369d2..16c259796e6 100644 --- a/src/Storages/System/StorageSystemNamedCollections.cpp +++ b/src/Storages/System/StorageSystemNamedCollections.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index 0be44219c7d..b205b7c224d 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -1,4 +1,7 @@ #include "StorageSystemParts.h" +#include +#include +#include #include #include @@ -15,6 +18,29 @@ #include #include +namespace +{ +std::string_view getRemovalStateDescription(DB::DataPartRemovalState state) +{ + switch (state) + { + case DB::DataPartRemovalState::NOT_ATTEMPTED: + return "Cleanup thread hasn't seen this part yet"; + case DB::DataPartRemovalState::VISIBLE_TO_TRANSACTIONS: + return "Part maybe visible for transactions"; + case DB::DataPartRemovalState::NON_UNIQUE_OWNERSHIP: + return "Part ownership is not unique"; + case DB::DataPartRemovalState::NOT_REACHED_REMOVAL_TIME: + return "Part hasn't reached removal time yet"; + case DB::DataPartRemovalState::HAS_SKIPPED_MUTATION_PARENT: + return "Waiting mutation parent to be removed"; + case DB::DataPartRemovalState::REMOVED: + return "Part was selected to be removed"; + } +} + +} + namespace DB { @@ -92,6 +118,9 @@ StorageSystemParts::StorageSystemParts(const StorageID & table_id_) {"removal_csn", std::make_shared()}, {"has_lightweight_delete", std::make_shared()}, + + {"last_removal_attemp_time", std::make_shared()}, + {"removal_state", std::make_shared()}, } ) { @@ -310,6 +339,10 @@ void StorageSystemParts::processNextStorage( columns[res_index++]->insert(part->version.removal_csn.load(std::memory_order_relaxed)); if (columns_mask[src_index++]) columns[res_index++]->insert(part->hasLightweightDelete()); + if (columns_mask[src_index++]) + columns[res_index++]->insert(static_cast(part->last_removal_attemp_time.load(std::memory_order_relaxed))); + if (columns_mask[src_index++]) + columns[res_index++]->insert(getRemovalStateDescription(part->removal_state.load(std::memory_order_relaxed))); /// _state column should be the latest. /// Do not use part->getState*, it can be changed from different thread diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 43e31b8e4f4..2ada0fa3323 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -144,6 +144,7 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block else const_columns[i] = ColumnConst::create(columns[i]->cloneResized(1), 1); } + block.setColumns(const_columns); bool unmodified = true; @@ -163,6 +164,7 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block ActionsVisitor::Data visitor_data( context, SizeLimits{}, 1, source_columns, std::move(actions), prepared_sets, true, true, true, false, { aggregation_keys, grouping_set_keys, GroupByKind::NONE }); + ActionsVisitor(visitor_data).visit(node); actions = visitor_data.getActions(); auto expression_actions = std::make_shared(actions); diff --git a/src/Storages/tests/gtest_named_collections.cpp b/src/Storages/tests/gtest_named_collections.cpp index 369e8ec44f6..d5fe5010991 100644 --- a/src/Storages/tests/gtest_named_collections.cpp +++ b/src/Storages/tests/gtest_named_collections.cpp @@ -1,6 +1,6 @@ #include -#include -#include +#include +#include #include #include #include diff --git a/src/TableFunctions/ITableFunction.h b/src/TableFunctions/ITableFunction.h index 21c26062566..9d490105b17 100644 --- a/src/TableFunctions/ITableFunction.h +++ b/src/TableFunctions/ITableFunction.h @@ -64,6 +64,12 @@ public: /// This hint could be used not to repeat schema in function arguments. virtual void setStructureHint(const ColumnsDescription &) {} + /// Used for table functions that can use structure hint during INSERT INTO ... SELECT ... FROM table_function(...) + /// It returns possible virtual column names of corresponding storage. If select query contains + /// one of these columns, the structure from insertion table won't be used as a structure hint, + /// because we cannot determine which column from table correspond to this virtual column. + virtual std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const { return {}; } + virtual bool supportsReadingSubsetOfColumns() { return true; } /// Create storage according to the query. diff --git a/src/TableFunctions/TableFunctionFile.h b/src/TableFunctions/TableFunctionFile.h index 20ecdb6222c..797948cad03 100644 --- a/src/TableFunctions/TableFunctionFile.h +++ b/src/TableFunctions/TableFunctionFile.h @@ -22,6 +22,11 @@ public: ColumnsDescription getActualTableStructure(ContextPtr context) const override; + std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override + { + return {"_path", "_file"}; + } + protected: int fd = -1; void parseFirstArguments(const ASTPtr & arg, const ContextPtr & context) override; diff --git a/src/TableFunctions/TableFunctionHDFS.h b/src/TableFunctions/TableFunctionHDFS.h index a391673e04d..c4c111de6e5 100644 --- a/src/TableFunctions/TableFunctionHDFS.h +++ b/src/TableFunctions/TableFunctionHDFS.h @@ -26,6 +26,11 @@ public: ColumnsDescription getActualTableStructure(ContextPtr context) const override; + std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override + { + return {"_path", "_file"}; + } + private: StoragePtr getStorage( const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context, diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index 3b68a0766aa..23822486c29 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "registerTableFunctions.h" #include @@ -30,11 +31,9 @@ namespace ErrorCodes /// This is needed to avoid copy-pase. Because s3Cluster arguments only differ in additional argument (first) - cluster name void TableFunctionS3::parseArgumentsImpl(const String & error_message, ASTs & args, ContextPtr context, StorageS3Configuration & s3_configuration) { - if (auto named_collection = getURLBasedDataSourceConfiguration(args, context)) + if (auto named_collection = tryGetNamedCollectionWithOverrides(args)) { - auto [common_configuration, storage_specific_args] = named_collection.value(); - s3_configuration.set(common_configuration); - StorageS3::processNamedCollectionResult(s3_configuration, storage_specific_args); + StorageS3::processNamedCollectionResult(s3_configuration, *named_collection); } else { diff --git a/src/TableFunctions/TableFunctionS3.h b/src/TableFunctions/TableFunctionS3.h index b2eb03e8839..125238fa7db 100644 --- a/src/TableFunctions/TableFunctionS3.h +++ b/src/TableFunctions/TableFunctionS3.h @@ -32,6 +32,11 @@ public: bool supportsReadingSubsetOfColumns() override; + std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override + { + return {"_path", "_file"}; + } + protected: friend class TableFunctionS3Cluster; diff --git a/tests/ci/release.py b/tests/ci/release.py index 502efd79173..57d5c4cdd6e 100755 --- a/tests/ci/release.py +++ b/tests/ci/release.py @@ -32,8 +32,6 @@ from version_helper import ( RELEASE_READY_STATUS = "Ready for release" -git = Git() - class Repo: VALID = ("ssh", "https", "origin") @@ -79,7 +77,7 @@ class Release: self.release_commit = release_commit assert release_type in self.BIG + self.SMALL self.release_type = release_type - self._git = git + self._git = Git() self._version = get_version_from_repo(git=self._git) self._release_branch = "" self._rollback_stack = [] # type: List[str] diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index c02128d114f..b7f74c5aeb7 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -145,7 +145,7 @@ if __name__ == "__main__": ) logging.info("Going to run func tests: %s", run_command) - with TeePopen(run_command, run_log_path) as process: + with TeePopen(run_command, run_log_path, timeout=60 * 150) as process: retcode = process.wait() if retcode == 0: logging.info("Run successfully") diff --git a/tests/ci/tee_popen.py b/tests/ci/tee_popen.py index 61404847bff..b74069c16ab 100644 --- a/tests/ci/tee_popen.py +++ b/tests/ci/tee_popen.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 +from io import TextIOWrapper from subprocess import Popen, PIPE, STDOUT from threading import Thread from time import sleep @@ -14,15 +15,23 @@ import sys # it finishes. stderr and stdout will be redirected both to specified file and # stdout. class TeePopen: - # pylint: disable=W0102 - def __init__(self, command, log_file, env=os.environ.copy(), timeout=None): + def __init__( + self, + command: str, + log_file: str, + env: Optional[dict] = None, + timeout: Optional[int] = None, + ): self.command = command - self.log_file = log_file - self.env = env + self._log_file_name = log_file + self._log_file = None # type: Optional[TextIOWrapper] + self.env = env or os.environ.copy() self._process = None # type: Optional[Popen] self.timeout = timeout - def _check_timeout(self): + def _check_timeout(self) -> None: + if self.timeout is None: + return sleep(self.timeout) while self.process.poll() is None: logging.warning( @@ -33,7 +42,7 @@ class TeePopen: os.killpg(self.process.pid, 9) sleep(10) - def __enter__(self): + def __enter__(self) -> "TeePopen": self.process = Popen( self.command, shell=True, @@ -44,25 +53,21 @@ class TeePopen: stdout=PIPE, bufsize=1, ) - self.log_file = open(self.log_file, "w", encoding="utf-8") if self.timeout is not None and self.timeout > 0: t = Thread(target=self._check_timeout) t.daemon = True # does not block the program from exit t.start() return self - def __exit__(self, t, value, traceback): - for line in self.process.stdout: # type: ignore - sys.stdout.write(line) - self.log_file.write(line) - - self.process.wait() + def __exit__(self, exc_type, exc_value, traceback): + self.wait() self.log_file.close() def wait(self): - for line in self.process.stdout: # type: ignore - sys.stdout.write(line) - self.log_file.write(line) + if self.process.stdout is not None: + for line in self.process.stdout: + sys.stdout.write(line) + self.log_file.write(line) return self.process.wait() @@ -75,3 +80,9 @@ class TeePopen: @process.setter def process(self, process: Popen) -> None: self._process = process + + @property + def log_file(self) -> TextIOWrapper: + if self._log_file is None: + self._log_file = open(self._log_file_name, "w", encoding="utf-8") + return self._log_file diff --git a/tests/integration/test_create_query_constraints/__init__.py b/tests/integration/test_create_query_constraints/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_create_query_constraints/test.py b/tests/integration/test_create_query_constraints/test.py new file mode 100644 index 00000000000..997671a6996 --- /dev/null +++ b/tests/integration/test_create_query_constraints/test.py @@ -0,0 +1,92 @@ +import pytest +import asyncio +import re +import random +import os.path +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry, TSV + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance("instance") + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_create_query_const_constraints(): + + instance.query("CREATE USER u_const SETTINGS max_threads = 1 CONST") + instance.query("GRANT ALL ON *.* TO u_const") + + expected_error = "Setting max_threads should not be changed" + + assert expected_error in instance.query_and_get_error( + "CREATE USER inner_user SETTINGS max_threads = 1", user="u_const" + ) + assert expected_error in instance.query_and_get_error( + "CREATE USER inner_user SETTINGS max_threads MIN 0 MAX 2", user="u_const" + ) + assert expected_error in instance.query_and_get_error( + "CREATE USER inner_user SETTINGS max_threads WRITABLE", user="u_const" + ) + assert expected_error in instance.query_and_get_error( + "CREATE ROLE inner_role SETTINGS max_threads = 1", user="u_const" + ) + assert expected_error in instance.query_and_get_error( + "CREATE SETTINGS PROFILE inner_profile SETTINGS max_threads = 1", user="u_const" + ) + + instance.query( + "CREATE USER inner_user_1 SETTINGS max_threads CONST", user="u_const" + ) + instance.query( + "CREATE USER inner_user_2 SETTINGS max_threads = 1 CONST", user="u_const" + ) + instance.query("DROP USER u_const, inner_user_1, inner_user_2") + + +def test_create_query_minmax_constraints(): + + instance.query("CREATE USER u_minmax SETTINGS max_threads = 4 MIN 2 MAX 6") + instance.query("GRANT ALL ON *.* TO u_minmax") + + expected_error = "Setting max_threads shouldn't be less than" + + assert expected_error in instance.query_and_get_error( + "CREATE USER inner_user SETTINGS max_threads = 1", user="u_minmax" + ) + assert expected_error in instance.query_and_get_error( + "CREATE USER inner_user SETTINGS max_threads MIN 1 MAX 3", user="u_minmax" + ) + assert expected_error in instance.query_and_get_error( + "CREATE ROLE inner_role SETTINGS max_threads MIN 1 MAX 3", user="u_minmax" + ) + assert expected_error in instance.query_and_get_error( + "CREATE SETTINGS PROFILE inner_profile SETTINGS max_threads MIN 1 MAX 3", + user="u_minmax", + ) + + expected_error = "Setting max_threads shouldn't be greater than" + + assert expected_error in instance.query_and_get_error( + "CREATE USER inner_user SETTINGS max_threads = 8", user="u_minmax" + ) + assert expected_error in instance.query_and_get_error( + "CREATE USER inner_user SETTINGS max_threads MIN 4 MAX 8", user="u_minmax" + ) + assert expected_error in instance.query_and_get_error( + "CREATE ROLE inner_role SETTINGS max_threads MIN 4 MAX 8", user="u_minmax" + ) + assert expected_error in instance.query_and_get_error( + "CREATE SETTINGS PROFILE inner_profile SETTINGS max_threads MIN 4 MAX 8", + user="u_minmax", + ) + + instance.query("CREATE USER inner_user SETTINGS max_threads = 3", user="u_minmax") + instance.query("DROP USER u_minmax, inner_user") diff --git a/tests/integration/test_storage_s3/s3_mocks/echo.py b/tests/integration/test_storage_s3/s3_mocks/echo.py index 5103d7ebc15..7d04bb2f166 100644 --- a/tests/integration/test_storage_s3/s3_mocks/echo.py +++ b/tests/integration/test_storage_s3/s3_mocks/echo.py @@ -3,17 +3,20 @@ import sys class RequestHandler(http.server.BaseHTTPRequestHandler): - def do_HEAD(self): + def get_response(self): if self.path.startswith("/get-my-path/"): - self.send_response(200) - self.send_header("Content-Type", "text/plain") - self.end_headers() - + return b"/" + self.path.split("/", maxsplit=2)[2].encode() elif self.path == "/": + return b"OK" + + return None + + def do_HEAD(self): + if self.path.startswith("/get-my-path/") or self.path == "/": self.send_response(200) self.send_header("Content-Type", "text/plain") + self.send_header("Content-Length", len(self.get_response())) self.end_headers() - else: self.send_response(404) self.send_header("Content-Type", "text/plain") @@ -21,11 +24,7 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): def do_GET(self): self.do_HEAD() - if self.path.startswith("/get-my-path/"): - self.wfile.write(b"/" + self.path.split("/", maxsplit=2)[2].encode()) - - elif self.path == "/": - self.wfile.write(b"OK") + self.wfile.write(self.get_response()) httpd = http.server.HTTPServer(("0.0.0.0", int(sys.argv[1])), RequestHandler) diff --git a/tests/integration/test_storage_s3/s3_mocks/mock_s3.py b/tests/integration/test_storage_s3/s3_mocks/mock_s3.py index 870353ebaa8..b31827e32bc 100644 --- a/tests/integration/test_storage_s3/s3_mocks/mock_s3.py +++ b/tests/integration/test_storage_s3/s3_mocks/mock_s3.py @@ -1,6 +1,6 @@ import sys -from bottle import abort, route, run, request, response +from bottle import route, run, request, response @route("/redirected/<_path:path>") @@ -14,14 +14,22 @@ def infinite_redirect(_path): def server(_bucket, _path): for name in request.headers: if name == "Authorization" and request.headers[name] == "Bearer TOKEN": - return "1, 2, 3" + result = "1, 2, 3" + response.content_type = "text/plain" + response.set_header("Content-Length", len(result)) + return result + + result = 'ForbiddenErrorForbidden Errortxfbd566d03042474888193-00608d7537' response.status = 403 response.content_type = "text/xml" - return 'ForbiddenErrorForbidden Errortxfbd566d03042474888193-00608d7537' + response.set_header("Content-Length", len(result)) + return result @route("/") def ping(): + response.content_type = "text/plain" + response.set_header("Content-Length", 2) return "OK" diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 6e1a6e8a66b..937f14bb878 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -806,7 +806,7 @@ def test_custom_auth_headers_exclusion(started_cluster): print(result) assert ei.value.returncode == 243 - assert "Forbidden Error" in ei.value.stderr + assert "HTTP response code: 403" in ei.value.stderr def test_infinite_redirect(started_cluster): @@ -1714,7 +1714,7 @@ def test_ast_auth_headers(started_cluster): f"select count() from s3('http://resolver:8080/{bucket}/{filename}', 'CSV')" ) - assert "Forbidden Error" in result + assert "HTTP response code: 403" in result assert "S3_ERROR" in result result = instance.query( diff --git a/tests/performance/direct_dictionary.xml b/tests/performance/direct_dictionary.xml index aa7b99e39de..a9472412364 100644 --- a/tests/performance/direct_dictionary.xml +++ b/tests/performance/direct_dictionary.xml @@ -129,10 +129,9 @@ FORMAT Null; - DROP TABLE IF EXISTS simple_key_direct_dictionary_source_table; - DROP TABLE IF EXISTS complex_key_direct_dictionary_source_table; - DROP DICTIONARY IF EXISTS simple_key_direct_dictionary; DROP DICTIONARY IF EXISTS complex_key_direct_dictionary; + DROP TABLE IF EXISTS simple_key_direct_dictionary_source_table; + DROP TABLE IF EXISTS complex_key_direct_dictionary_source_table; diff --git a/tests/performance/flat_dictionary.xml b/tests/performance/flat_dictionary.xml index 8957925ef1a..8e625179608 100644 --- a/tests/performance/flat_dictionary.xml +++ b/tests/performance/flat_dictionary.xml @@ -73,8 +73,7 @@ FORMAT Null; - DROP TABLE IF EXISTS simple_key_flat_dictionary_source_table - DROP DICTIONARY IF EXISTS simple_key_flat_dictionary + DROP TABLE IF EXISTS simple_key_flat_dictionary_source_table diff --git a/tests/performance/hashed_array_dictionary.xml b/tests/performance/hashed_array_dictionary.xml index 5d09d29a9e7..8a805f56371 100644 --- a/tests/performance/hashed_array_dictionary.xml +++ b/tests/performance/hashed_array_dictionary.xml @@ -129,10 +129,9 @@ FORMAT Null; - DROP TABLE IF EXISTS simple_key_hashed_array_dictionary_source_table; - DROP TABLE IF EXISTS complex_key_hashed_array_dictionary_source_table; - DROP DICTIONARY IF EXISTS simple_key_hashed_array_dictionary; DROP DICTIONARY IF EXISTS complex_key_hashed_array_dictionary; + DROP TABLE IF EXISTS simple_key_hashed_array_dictionary_source_table; + DROP TABLE IF EXISTS complex_key_hashed_array_dictionary_source_table; diff --git a/tests/performance/hashed_dictionary.xml b/tests/performance/hashed_dictionary.xml index cf1cdac6df1..e9038e694c6 100644 --- a/tests/performance/hashed_dictionary.xml +++ b/tests/performance/hashed_dictionary.xml @@ -129,10 +129,9 @@ FORMAT Null; - DROP TABLE IF EXISTS simple_key_hashed_dictionary_source_table; - DROP TABLE IF EXISTS complex_key_hashed_dictionary_source_table; - DROP DICTIONARY IF EXISTS simple_key_hashed_dictionary; DROP DICTIONARY IF EXISTS complex_key_hashed_dictionary; + DROP TABLE IF EXISTS simple_key_hashed_dictionary_source_table; + DROP TABLE IF EXISTS complex_key_hashed_dictionary_source_table; diff --git a/tests/performance/hierarchical_dictionaries.xml b/tests/performance/hierarchical_dictionaries.xml index 7d807bf2c8d..b1c385eedb5 100644 --- a/tests/performance/hierarchical_dictionaries.xml +++ b/tests/performance/hierarchical_dictionaries.xml @@ -68,8 +68,7 @@ SELECT {func}('hierarchical_{dictionary_layout}_dictionary', number + 1) FROM numbers(1000000) FORMAT Null; - DROP TABLE IF EXISTS hierarchical_dictionary_source_table; DROP DICTIONARY IF EXISTS hierarchical_{dictionary_layout}_dictionary; DROP DICTIONARY IF EXISTS hierarchical_flat_dictionary; - + DROP TABLE IF EXISTS hierarchical_dictionary_source_table; diff --git a/tests/performance/range_hashed_dictionary.xml b/tests/performance/range_hashed_dictionary.xml index bdf949cd1ff..2ee559cbdaa 100644 --- a/tests/performance/range_hashed_dictionary.xml +++ b/tests/performance/range_hashed_dictionary.xml @@ -117,10 +117,9 @@ FORMAT Null; - DROP TABLE IF EXISTS simple_key_range_hashed_dictionary_source_table; - DROP TABLE IF EXISTS complex_key_range_hashed_dictionary_source_table; - DROP DICTIONARY IF EXISTS simple_key_range_hashed_dictionary; DROP DICTIONARY IF EXISTS complex_key_range_hashed_dictionary; + DROP TABLE IF EXISTS simple_key_range_hashed_dictionary_source_table; + DROP TABLE IF EXISTS complex_key_range_hashed_dictionary_source_table; diff --git a/tests/queries/0_stateless/01072_window_view_multiple_columns_groupby.sh b/tests/queries/0_stateless/01072_window_view_multiple_columns_groupby.sh index 3e2eda96f93..ccc4ed3e08d 100755 --- a/tests/queries/0_stateless/01072_window_view_multiple_columns_groupby.sh +++ b/tests/queries/0_stateless/01072_window_view_multiple_columns_groupby.sh @@ -12,7 +12,7 @@ DROP TABLE IF EXISTS wv; CREATE TABLE dst(time DateTime, colA String, colB String) Engine=MergeTree ORDER BY tuple(); CREATE TABLE mt(colA String, colB String) ENGINE=MergeTree ORDER BY tuple(); -CREATE WINDOW VIEW wv TO dst AS SELECT tumbleStart(w_id) AS time, colA, colB FROM mt GROUP BY tumble(now(), INTERVAL '1' SECOND, 'US/Samoa') AS w_id, colA, colB; +CREATE WINDOW VIEW wv TO dst AS SELECT tumbleStart(w_id) AS time, colA, colB FROM mt GROUP BY tumble(now(), INTERVAL '10' SECOND, 'US/Samoa') AS w_id, colA, colB; INSERT INTO mt VALUES ('test1', 'test2'); EOF diff --git a/tests/queries/0_stateless/01195_formats_diagnostic_info.reference b/tests/queries/0_stateless/01195_formats_diagnostic_info.reference index 139926db671..0d4705dc9bf 100644 --- a/tests/queries/0_stateless/01195_formats_diagnostic_info.reference +++ b/tests/queries/0_stateless/01195_formats_diagnostic_info.reference @@ -27,7 +27,6 @@ Column 2, name: d, type: Decimal(18, 10), parsed text: "123456789"ERROR ERROR: There is no delimiter between fields: expected "", got "7Hello123" ERROR: There is no delimiter after last field: expected "", got "1" ERROR: There is no delimiter after last field: expected "", got "Hello" -Column 0, name: t, type: DateTime, ERROR: text "" is not like DateTime JSONCompactEachRow Column 2, name: d, type: Decimal(18, 10), parsed text: "123456789"ERROR Column 0, name: t, type: DateTime, parsed text: "2020-04-21 12:34:56"ERROR: DateTime must be in YYYY-MM-DD hh:mm:ss or NNNNNNNNNN (unix timestamp, exactly 10 digits) format. diff --git a/tests/queries/0_stateless/01195_formats_diagnostic_info.sh b/tests/queries/0_stateless/01195_formats_diagnostic_info.sh index dde410d95c4..a6b3ebf4087 100755 --- a/tests/queries/0_stateless/01195_formats_diagnostic_info.sh +++ b/tests/queries/0_stateless/01195_formats_diagnostic_info.sh @@ -37,7 +37,6 @@ echo -e '2020-04-21 12:34:56\tHello\t123456789' | "${PARSER[@]}" 2>&1| grep "ERR echo -e '2020-04-21 12:34:567\tHello\t123456789' | "${PARSER[@]}" 2>&1| grep "ERROR" echo -e '2020-04-21 12:34:56\tHello\t12345678\t1' | "${PARSER[@]}" 2>&1| grep "ERROR" echo -e '2020-04-21 12:34:56\t\t123Hello' | "${PARSER[@]}" 2>&1| grep "ERROR" -echo -e '2020-04-21 12:34:56\tHello\t12345678\n' | "${PARSER[@]}" 2>&1| grep "ERROR" PARSER=(${CLICKHOUSE_LOCAL} --query 'SELECT t, s, d FROM table' --structure 't DateTime, s String, d Decimal64(10)' --input-format JSONCompactEachRow) echo '["2020-04-21 12:34:56", "Hello", 12345678]' | "${PARSER[@]}" 2>&1| grep "ERROR" || echo "JSONCompactEachRow" diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 17102b47c1a..c866f3e7b52 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -504,6 +504,8 @@ CREATE TABLE system.parts `creation_csn` UInt64, `removal_csn` UInt64, `has_lightweight_delete` UInt8, + `last_removal_attemp_time` DateTime, + `removal_state` String, `bytes` UInt64, `marks_size` UInt64 ) diff --git a/tests/queries/0_stateless/02270_errors_in_files_s3.reference b/tests/queries/0_stateless/02270_errors_in_files_s3.reference index bf106b8562b..49515a81c8b 100644 --- a/tests/queries/0_stateless/02270_errors_in_files_s3.reference +++ b/tests/queries/0_stateless/02270_errors_in_files_s3.reference @@ -1,2 +1,2 @@ http://localhost:11111/test/tsv_with_header.tsv -test/tsv_with_header.tsv +tsv_with_header.tsv diff --git a/tests/queries/0_stateless/02270_errors_in_files_s3.sh b/tests/queries/0_stateless/02270_errors_in_files_s3.sh index 8d8c6ba56ef..e98a0cc6ae0 100755 --- a/tests/queries/0_stateless/02270_errors_in_files_s3.sh +++ b/tests/queries/0_stateless/02270_errors_in_files_s3.sh @@ -6,4 +6,4 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh ${CLICKHOUSE_CLIENT} --query "SELECT * FROM url('http://localhost:11111/test/{a,tsv_with_header}.tsv', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64')" 2>&1 | grep -o -m1 "http://localhost:11111/test/tsv_with_header.tsv" -${CLICKHOUSE_CLIENT} --query "SELECT * FROM s3('http://localhost:11111/test/{a,tsv_with_header}.tsv', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64')" 2>&1 | grep -o -m1 "test/tsv_with_header.tsv" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM s3('http://localhost:11111/test/{a,tsv_with_header}.tsv', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64')" 2>&1 | grep -o -m1 "tsv_with_header.tsv" diff --git a/tests/queries/0_stateless/02481_custom_separated_and_template_with_csv_field.reference b/tests/queries/0_stateless/02481_custom_separated_and_template_with_csv_field.reference new file mode 100644 index 00000000000..5bad96d521c --- /dev/null +++ b/tests/queries/0_stateless/02481_custom_separated_and_template_with_csv_field.reference @@ -0,0 +1,7 @@ +1 2 +3 4 +1 2 +3 4 +ab|c de&f +ab|c de*f gh&k +|av *ad &ad diff --git a/tests/queries/0_stateless/02481_custom_separated_and_template_with_csv_field.sh b/tests/queries/0_stateless/02481_custom_separated_and_template_with_csv_field.sh new file mode 100755 index 00000000000..ec986a4d16e --- /dev/null +++ b/tests/queries/0_stateless/02481_custom_separated_and_template_with_csv_field.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +echo "1||2&&3||4&&" | $CLICKHOUSE_LOCAL --input-format=CustomSeparated --format_custom_field_delimiter='||' --format_custom_row_after_delimiter='&&' --format_custom_escaping_rule='CSV' -q "select * from table" + +echo "1||2|||3||4|||" | $CLICKHOUSE_LOCAL --input-format=CustomSeparated --format_custom_field_delimiter='||' --format_custom_row_after_delimiter='|||' --format_custom_escaping_rule='CSV' -q "select * from table" + +echo "ab|c||de&f&&" | $CLICKHOUSE_LOCAL --input-format=CustomSeparated --format_custom_field_delimiter='||' --format_custom_row_after_delimiter='&&' --format_custom_escaping_rule='CSV' -q "select * from table" + +echo -e "\${column_1:CSV}||\${column_2:CSV}**\${column_3:CSV}&&" > row_format_02481 + +echo -e "ab|c||de*f**gh&k&&\n|av||*ad**&ad&&" | $CLICKHOUSE_LOCAL -q "select * from table" --input-format=Template --format_template_row='row_format_02481' --format_template_rows_between_delimiter "" + +rm row_format_02481 + diff --git a/tests/queries/0_stateless/02483_check_virtuals_shile_using_structure_from_insertion_table.reference b/tests/queries/0_stateless/02483_check_virtuals_shile_using_structure_from_insertion_table.reference new file mode 100644 index 00000000000..88d21f535eb --- /dev/null +++ b/tests/queries/0_stateless/02483_check_virtuals_shile_using_structure_from_insertion_table.reference @@ -0,0 +1 @@ +Hello 02483_data.LineAsString diff --git a/tests/queries/0_stateless/02483_check_virtuals_shile_using_structure_from_insertion_table.sql b/tests/queries/0_stateless/02483_check_virtuals_shile_using_structure_from_insertion_table.sql new file mode 100644 index 00000000000..2a0e5e7495b --- /dev/null +++ b/tests/queries/0_stateless/02483_check_virtuals_shile_using_structure_from_insertion_table.sql @@ -0,0 +1,9 @@ +-- Tags: no-parallel + +drop table if exists test; +create table test (line String, _file String, _path String) engine=Memory; +insert into function file(02483_data.LineAsString) select 'Hello' settings engine_file_truncate_on_insert=1; +set use_structure_from_insertion_table_in_table_functions=2; +insert into test select *, _file, _path from file(02483_data.LineAsString); +select line, _file from test; +drop table test; diff --git a/tests/queries/0_stateless/02495_s3_filter_by_file.reference b/tests/queries/0_stateless/02495_s3_filter_by_file.reference new file mode 100644 index 00000000000..7cac31e2d5c --- /dev/null +++ b/tests/queries/0_stateless/02495_s3_filter_by_file.reference @@ -0,0 +1,2 @@ +5 5 9 +1 test_02495_1 diff --git a/tests/queries/0_stateless/02495_s3_filter_by_file.sql b/tests/queries/0_stateless/02495_s3_filter_by_file.sql new file mode 100644 index 00000000000..8d6d8a8a5a4 --- /dev/null +++ b/tests/queries/0_stateless/02495_s3_filter_by_file.sql @@ -0,0 +1,22 @@ +-- Tags: no-parallel, no-fasttest + +DROP TABLE IF EXISTS t_s3_filter_02495; + +CREATE TABLE t_s3_filter_02495 (a UInt64) +ENGINE = S3(s3_conn, filename = 'test_02495_{_partition_id}', format = Parquet) +PARTITION BY a; + +INSERT INTO t_s3_filter_02495 SELECT number FROM numbers(10) SETTINGS s3_truncate_on_insert=1; + +SET max_rows_to_read = 5; + +WITH splitByChar('_', _file)[3]::UInt64 AS num +SELECT count(), min(num), max(num) +FROM s3(s3_conn, filename = 'test_02495_*', format = Parquet) +WHERE num >= 5; + +SELECT *, _file +FROM s3(s3_conn, filename = 'test_02495_1', format = Parquet) +WHERE _file = 'test_02495_1'; + +DROP TABLE t_s3_filter_02495; diff --git a/tests/queries/0_stateless/02496_storage_s3_profile_events.reference b/tests/queries/0_stateless/02496_storage_s3_profile_events.reference new file mode 100644 index 00000000000..df850b51d6e --- /dev/null +++ b/tests/queries/0_stateless/02496_storage_s3_profile_events.reference @@ -0,0 +1,2 @@ +10 +0 1 10 1 diff --git a/tests/queries/0_stateless/02496_storage_s3_profile_events.sql b/tests/queries/0_stateless/02496_storage_s3_profile_events.sql new file mode 100644 index 00000000000..6ca4c3370d5 --- /dev/null +++ b/tests/queries/0_stateless/02496_storage_s3_profile_events.sql @@ -0,0 +1,23 @@ +-- Tags: no-parallel, no-fasttest, no-random-settings + +DROP TABLE IF EXISTS t_s3_events_02496; + +CREATE TABLE t_s3_events_02496 (a UInt64) +ENGINE = S3(s3_conn, filename = 'test_02496_{_partition_id}', format = Parquet) +PARTITION BY a; + +INSERT INTO t_s3_events_02496 SELECT number FROM numbers(10) SETTINGS s3_truncate_on_insert=1; + +SET max_threads = 1; +SELECT count() FROM s3(s3_conn, filename = 'test_02496_*', format = Parquet, structure = 'a UInt64'); +SYSTEM FLUSH LOGS; + +SELECT + ProfileEvents['S3HeadObject'], + ProfileEvents['S3ListObjects'], + ProfileEvents['RemoteFSPrefetches'], + ProfileEvents['IOBufferAllocBytes'] < 100000 +FROM system.query_log WHERE current_database = currentDatabase() +AND type = 'QueryFinish' AND query ILIKE 'SELECT count() FROM s3%test_02496%'; + +DROP TABLE t_s3_events_02496; diff --git a/tests/queries/0_stateless/02500_analyzer_storage_view_crash_fix.reference b/tests/queries/0_stateless/02500_analyzer_storage_view_crash_fix.reference new file mode 100644 index 00000000000..bfde072a796 --- /dev/null +++ b/tests/queries/0_stateless/02500_analyzer_storage_view_crash_fix.reference @@ -0,0 +1 @@ +2 2 diff --git a/tests/queries/0_stateless/02500_analyzer_storage_view_crash_fix.sql b/tests/queries/0_stateless/02500_analyzer_storage_view_crash_fix.sql new file mode 100644 index 00000000000..8f4d14b95cc --- /dev/null +++ b/tests/queries/0_stateless/02500_analyzer_storage_view_crash_fix.sql @@ -0,0 +1,19 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + f1 Int32, + f2 Int32, + pk Int32 +) ENGINE = MergeTree PARTITION BY pk ORDER BY f1; + +INSERT INTO test_table SELECT number, number, number FROM numbers(10); + +DROP VIEW IF EXISTS test_view; +CREATE VIEW test_view AS SELECT f1, f2 FROM test_table WHERE pk = 2; + +SELECT * FROM test_view; + +DROP VIEW test_view; +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02502_fuzz_bad_cast_to_ast_literal.reference b/tests/queries/0_stateless/02502_fuzz_bad_cast_to_ast_literal.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02502_fuzz_bad_cast_to_ast_literal.sql b/tests/queries/0_stateless/02502_fuzz_bad_cast_to_ast_literal.sql new file mode 100644 index 00000000000..3db59d9a38f --- /dev/null +++ b/tests/queries/0_stateless/02502_fuzz_bad_cast_to_ast_literal.sql @@ -0,0 +1,7 @@ +SET allow_deprecated_syntax_for_merge_tree=1; +DROP TABLE IF EXISTS test54378; +CREATE TABLE test54378 (`part_date` Date, `pk_date` Date, `date` Date) ENGINE = MergeTree(part_date, pk_date, 8192); +INSERT INTO test54378 values ('2018-04-19', '2018-04-19', '2018-04-19'); +SELECT 232 FROM test54378 PREWHERE (part_date = (SELECT toDate('2018-04-19'))) IN (SELECT toDate('2018-04-19')) GROUP BY toDate(toDate(-2147483649, NULL), NULL), -inf; +DROP TABLE test54378; + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 23aa0d1dbaf..d4c5924d6f3 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,9 +1,11 @@ +v22.12.1.1752-stable 2022-12-15 v22.11.2.30-stable 2022-12-02 v22.11.1.1360-stable 2022-11-17 v22.10.4.23-stable 2022-12-02 v22.10.3.27-stable 2022-11-23 v22.10.2.11-stable 2022-11-01 v22.10.1.1877-stable 2022-10-26 +v22.9.7.34-stable 2022-12-16 v22.9.6.20-stable 2022-12-02 v22.9.5.25-stable 2022-11-23 v22.9.4.32-stable 2022-10-26