diff --git a/SECURITY.md b/SECURITY.md index a4f431d7552..3dcdc5db009 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -13,9 +13,10 @@ The following versions of ClickHouse server are currently being supported with s | Version | Supported | |:-|:-| +| 22.12 | ✔️ | | 22.11 | ✔️ | | 22.10 | ✔️ | -| 22.9 | ✔️ | +| 22.9 | ❌ | | 22.8 | ✔️ | | 22.7 | ❌ | | 22.6 | ❌ | diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index d06d3918612..87b11c46f45 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54469) +SET(VERSION_REVISION 54470) SET(VERSION_MAJOR 22) -SET(VERSION_MINOR 12) +SET(VERSION_MINOR 13) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 0d211ed19849fe44b0e43fdebe2c15d76d560a77) -SET(VERSION_DESCRIBE v22.12.1.1-testing) -SET(VERSION_STRING 22.12.1.1) +SET(VERSION_GITHASH 688e488e930c83eefeac4f87c4cc029cc5b231e3) +SET(VERSION_DESCRIBE v22.13.1.1-testing) +SET(VERSION_STRING 22.13.1.1) # end of autochange diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 305fc279414..22d6282d71c 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="22.11.2.30" +ARG VERSION="22.12.1.1752" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index f1c4dd097aa..3135ec508de 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -21,7 +21,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="22.11.2.30" +ARG VERSION="22.12.1.1752" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docs/changelogs/v22.12.1.1752-stable.md b/docs/changelogs/v22.12.1.1752-stable.md new file mode 100644 index 00000000000..9b3d2379277 --- /dev/null +++ b/docs/changelogs/v22.12.1.1752-stable.md @@ -0,0 +1,320 @@ +--- +sidebar_position: 1 +sidebar_label: 2022 +--- + +# 2022 Changelog + +### ClickHouse release v22.12.1.1752-stable (688e488e930) FIXME as compared to v22.11.1.1360-stable (0d211ed1984) + +#### Backward Incompatible Change +* Fixed backward incompatibility in (de)serialization of states of `min`, `max`, `any*`, `argMin`, `argMax` aggregate functions with `String` argument. The incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/41431 and affects 22.9, 22.10 and 22.11 branches (fixed since 22.9.6, 22.10.4 and 22.11.2 correspondingly). Some minor releases of 22.3, 22.7 and 22.8 branches are also affected: 22.3.13...22.3.14 (fixed since 22.3.15), 22.8.6...22.8.9 (fixed since 22.8.10), 22.7.6 and newer (will not be fixed in 22.7, we recommend to upgrade from 22.7.* to 22.8.10 or newer). This release note does not concern users that have never used affected versions. Incompatible versions append extra `'\0'` to strings when reading states of the aggregate functions mentioned above. For example, if an older version saved state of `anyState('foobar')` to `state_column` then incompatible version will print `'foobar\0'` on `anyMerge(state_column)`. Also incompatible versions write states of the aggregate functions without trailing `'\0'`. Newer versions (that have the fix) can correctly read data written by all versions including incompatible versions, except one corner case. If an incompatible version saved a state with a string that actually ends with null character, then newer version will trim trailing `'\0'` when reading state of affected aggregate function. For example, if an incompatible version saved state of `anyState('abrac\0dabra\0')` to `state_column` then newer versions will print `'abrac\0dabra'` on `anyMerge(state_column)`. The issue also affects distributed queries when an incompatible version works in a cluster together with older or newer versions. [#43038](https://github.com/ClickHouse/ClickHouse/pull/43038) ([Raúl Marín](https://github.com/Algunenano)). + +#### New Feature +* Add "grace_hash" join_algorithm. [#38191](https://github.com/ClickHouse/ClickHouse/pull/38191) ([BigRedEye](https://github.com/BigRedEye)). +* Merging on initiator now uses the same memory bound approach as merging of local aggregation results if `enable_memory_bound_merging_of_aggregation_results` is set. [#40879](https://github.com/ClickHouse/ClickHouse/pull/40879) ([Nikita Taranov](https://github.com/nickitat)). +* Add BSONEachRow input/output format. In this format, ClickHouse formats/parses each row as a separated BSON Document and each column is formatted/parsed as a single BSON field with column name as a key. [#42033](https://github.com/ClickHouse/ClickHouse/pull/42033) ([mark-polokhov](https://github.com/mark-polokhov)). +* close: [#37631](https://github.com/ClickHouse/ClickHouse/issues/37631). [#42265](https://github.com/ClickHouse/ClickHouse/pull/42265) ([刘陶峰](https://github.com/taofengliu)). +* Added `multiplyDecimal` and `divideDecimal` functions for decimal operations with fixed precision. [#42438](https://github.com/ClickHouse/ClickHouse/pull/42438) ([Andrey Zvonov](https://github.com/zvonand)). +* Added `system.moves` table with list of currently moving parts. [#42660](https://github.com/ClickHouse/ClickHouse/pull/42660) ([Sergei Trifonov](https://github.com/serxa)). +* Keeper feature: add support for embedded Prometheus endpoint. [#43087](https://github.com/ClickHouse/ClickHouse/pull/43087) ([Antonio Andelic](https://github.com/antonio2368)). +* Added age function to calculate difference between two dates or dates with time values expressed as number of full units. Close [#41115](https://github.com/ClickHouse/ClickHouse/issues/41115). [#43123](https://github.com/ClickHouse/ClickHouse/pull/43123) ([Roman Vasin](https://github.com/rvasin)). +* Add settings `max_streams_for_merge_tree_reading` and `allow_asynchronous_read_from_io_pool_for_merge_tree`. Setting `max_streams_for_merge_tree_reading` limits the number of reading streams for MergeTree tables. Setting `allow_asynchronous_read_from_io_pool_for_merge_tree` enables background I/O pool to read from `MergeTree` tables. This may increase performance for I/O bound queries if used together with `max_streams_to_max_threads_ratio` or `max_streams_for_merge_tree_reading`. [#43260](https://github.com/ClickHouse/ClickHouse/pull/43260) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add the expression of the index on `data_skipping_indices` system table. [#43308](https://github.com/ClickHouse/ClickHouse/pull/43308) ([Guillaume Tassery](https://github.com/YiuRULE)). +* New hash function [xxh3](https://github.com/Cyan4973/xxHash) added. Also performance of `xxHash32` and `xxHash64` improved on arm thanks to library update. [#43411](https://github.com/ClickHouse/ClickHouse/pull/43411) ([Nikita Taranov](https://github.com/nickitat)). +* - Temporary data (for external sorting, aggregation, and JOINs) can share storage with the filesystem cache for remote disks and evict it, close [#42158](https://github.com/ClickHouse/ClickHouse/issues/42158). [#43457](https://github.com/ClickHouse/ClickHouse/pull/43457) ([Vladimir C](https://github.com/vdimir)). +* Add column `engine_full` to system table `databases` so that users can access whole engine definition of database via system tables. [#43468](https://github.com/ClickHouse/ClickHouse/pull/43468) ([凌涛](https://github.com/lingtaolf)). +* Add password complexity rules and checks for creating a new user. [#43719](https://github.com/ClickHouse/ClickHouse/pull/43719) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add function concatWithSeparator , like concat_ws in spark. [#43749](https://github.com/ClickHouse/ClickHouse/pull/43749) ([李扬](https://github.com/taiyang-li)). +* Added constraints for merge tree settings. [#43903](https://github.com/ClickHouse/ClickHouse/pull/43903) ([Sergei Trifonov](https://github.com/serxa)). +* Support numeric literals with _ as separator. [#43925](https://github.com/ClickHouse/ClickHouse/pull/43925) ([jh0x](https://github.com/jh0x)). +* Add a new setting `input_format_json_read_objects_as_strings` that allows to parse nested JSON objects into Strings in all JSON input formats. This setting is disable by default. [#44052](https://github.com/ClickHouse/ClickHouse/pull/44052) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Performance Improvement +* Optimisation is getting skipped now if `max_size_to_preallocate_for_aggregation` has too small value. Default value of this setting increased to `10^8`. [#43945](https://github.com/ClickHouse/ClickHouse/pull/43945) ([Nikita Taranov](https://github.com/nickitat)). + +#### Improvement +* Support numeric literals with underscores. closes [#28967](https://github.com/ClickHouse/ClickHouse/issues/28967). [#39129](https://github.com/ClickHouse/ClickHouse/pull/39129) ([unbyte](https://github.com/unbyte)). +* Add `FROM table SELECT column` syntax. [#41095](https://github.com/ClickHouse/ClickHouse/pull/41095) ([Nikolay Degterinsky](https://github.com/evillique)). +* This PR changes how followed queries delete parts: truncate table, alter table drop part, alter table drop partition. Now these queries make empty parts which cover old parts. This makes truncate query works without exclusive lock which means concurrent reads aren't locked. Also achieved durability in all those queries. If request is succeeded then no resurrected pars appear later. Note that atomicity is achieved only with transaction scope. [#41145](https://github.com/ClickHouse/ClickHouse/pull/41145) ([Sema Checherinda](https://github.com/CheSema)). +* `SET param_x` query no longer requires manual string serialization for the value of the parameter. For example, query `SET param_a = '[\'a\', \'b\']'` can now be written like `SET param_a = ['a', 'b']`. [#41874](https://github.com/ClickHouse/ClickHouse/pull/41874) ([Nikolay Degterinsky](https://github.com/evillique)). +* `filesystemAvailable` and related functions support one optional argument with disk name, and change `filesystemFree` to `filesystemUnreserved`. Closes [#35076](https://github.com/ClickHouse/ClickHouse/issues/35076). [#42064](https://github.com/ClickHouse/ClickHouse/pull/42064) ([flynn](https://github.com/ucasfl)). +* Increased the default value of search_limit to 256, and added LDAP server config option to change that to an arbitrary value. Closes: [#42276](https://github.com/ClickHouse/ClickHouse/issues/42276). [#42461](https://github.com/ClickHouse/ClickHouse/pull/42461) ([Vasily Nemkov](https://github.com/Enmk)). +* Add cosine distance for annoy. [#42778](https://github.com/ClickHouse/ClickHouse/pull/42778) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Allow to remove sensitive information from the exception messages also. Resolves [#41418](https://github.com/ClickHouse/ClickHouse/issues/41418). [#42940](https://github.com/ClickHouse/ClickHouse/pull/42940) ([filimonov](https://github.com/filimonov)). +* Keeper improvement: Add 4lw command `rqld` which can manually assign a node as leader. [#43026](https://github.com/ClickHouse/ClickHouse/pull/43026) ([JackyWoo](https://github.com/JackyWoo)). +* Apply connection timeouts settings for Distributed async INSERT from the query. [#43156](https://github.com/ClickHouse/ClickHouse/pull/43156) ([Azat Khuzhin](https://github.com/azat)). +* unhex function support FixedString arguments. [issue42369](https://github.com/ClickHouse/ClickHouse/issues/42369). [#43207](https://github.com/ClickHouse/ClickHouse/pull/43207) ([DR](https://github.com/freedomDR)). +* Priority is given to deleting completely expired Parts,related [#42869](https://github.com/ClickHouse/ClickHouse/issues/42869). [#43222](https://github.com/ClickHouse/ClickHouse/pull/43222) ([zhongyuankai](https://github.com/zhongyuankai)). +* Follow-up to https://github.com/ClickHouse/ClickHouse/pull/42484. Mask sensitive information in logs better; mask secret parts in the output of queries `SHOW CREATE TABLE` and `SELECT FROM system.tables`. Also resolves [#41418](https://github.com/ClickHouse/ClickHouse/issues/41418). [#43227](https://github.com/ClickHouse/ClickHouse/pull/43227) ([Vitaly Baranov](https://github.com/vitlibar)). +* Enable compress marks and primary key. [#43288](https://github.com/ClickHouse/ClickHouse/pull/43288) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* resolve issue [#38075](https://github.com/ClickHouse/ClickHouse/issues/38075) . Right now async insert doesn't support deduplication, because multiple small inserts will coexist in one part, which corespond multiple `block id`s. This solution is straitfoward: The change involves: 1. mark offsets for every inserts in every chunk 2. calculate multiple `block_id`s when sinker receive a chunk 3. get block number lock by these `block_id`s 3.1. if fails, remove the dup insert(s) and dup `block_id`(s) from block and recalculate `offsets` agian. 3.2. if succeeds, commit `block_id`'s and other items into keeper a. if fails, do 3.1 b. if succeeds, everything succeeds. [#43304](https://github.com/ClickHouse/ClickHouse/pull/43304) ([Han Fei](https://github.com/hanfei1991)). +* More precise and reactive CPU load indication on client. [#43307](https://github.com/ClickHouse/ClickHouse/pull/43307) ([Sergei Trifonov](https://github.com/serxa)). +* Restrict default access to named collections for user defined in config. It must have explicit `show_named_collections=1` to be able to see them. [#43325](https://github.com/ClickHouse/ClickHouse/pull/43325) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support reading of subcolumns of nested types from storage `S3` and table function `s3` with formats `Parquet`, `Arrow` and `ORC`. [#43329](https://github.com/ClickHouse/ClickHouse/pull/43329) ([chen](https://github.com/xiedeyantu)). +* - Systemd integration now correctly notifies systemd that service is really started and is ready to server requests. [#43400](https://github.com/ClickHouse/ClickHouse/pull/43400) ([Коренберг Марк](https://github.com/socketpair)). +* Add table_uuid to system.parts. [#43404](https://github.com/ClickHouse/ClickHouse/pull/43404) ([Azat Khuzhin](https://github.com/azat)). +* Added client option to display the number of locally processed rows in non-interactive mode (--print-num-processed-rows). [#43407](https://github.com/ClickHouse/ClickHouse/pull/43407) ([jh0x](https://github.com/jh0x)). +* Show read rows while reading from stdin from client. Closes [#43423](https://github.com/ClickHouse/ClickHouse/issues/43423). [#43442](https://github.com/ClickHouse/ClickHouse/pull/43442) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Keeper improvement: try syncing logs to disk in parallel with replication. [#43450](https://github.com/ClickHouse/ClickHouse/pull/43450) ([Antonio Andelic](https://github.com/antonio2368)). +* Show progress bar while reading from s3 table function / engine. [#43454](https://github.com/ClickHouse/ClickHouse/pull/43454) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Progress bar will show both read and written rows. [#43496](https://github.com/ClickHouse/ClickHouse/pull/43496) ([Ilya Yatsishin](https://github.com/qoega)). +* Implement `aggregation-in-order` optimization on top of query plan. It is enabled by default (but works only together with `optimize_aggregation_in_order`, which is disabled by default). Set `query_plan_aggregation_in_order = 0` to use previous AST-based version. [#43592](https://github.com/ClickHouse/ClickHouse/pull/43592) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Allow to send profile events with `trace_type = 'ProfileEvent'` to `system.trace_log` on each increment with current stack, profile event name and value of increment. It can be enabled by setting `trace_profile_events` and used to debug performance of queries. [#43639](https://github.com/ClickHouse/ClickHouse/pull/43639) ([Anton Popov](https://github.com/CurtizJ)). +* Keeper improvement: requests are batched more often. The batching can be controlled with the new setting `max_requests_quick_batch_size`. [#43686](https://github.com/ClickHouse/ClickHouse/pull/43686) ([Antonio Andelic](https://github.com/antonio2368)). +* Added possibility to use array as a second parameter for cutURLParameter function. Close [#6827](https://github.com/ClickHouse/ClickHouse/issues/6827). [#43788](https://github.com/ClickHouse/ClickHouse/pull/43788) ([Roman Vasin](https://github.com/rvasin)). +* Implement referential dependencies and use them to create tables in the correct order while restoring from a backup. [#43834](https://github.com/ClickHouse/ClickHouse/pull/43834) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add a new setting `input_format_max_binary_string_size` to limit string size in RowBinary format. [#43842](https://github.com/ClickHouse/ClickHouse/pull/43842) ([Kruglov Pavel](https://github.com/Avogar)). +* - Fix some incorrect logic in ast level optimization related. [#43873](https://github.com/ClickHouse/ClickHouse/pull/43873) ([Duc Canh Le](https://github.com/canhld94)). +* Support query like `SHOW FULL TABLES ...`. [#43910](https://github.com/ClickHouse/ClickHouse/pull/43910) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* When ClickHouse requests a remote HTTP server, and it returns an error, the numeric HTTP code was not displayed correctly in the exception message. Closes [#43919](https://github.com/ClickHouse/ClickHouse/issues/43919). [#43920](https://github.com/ClickHouse/ClickHouse/pull/43920) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Settings `merge_tree_min_rows_for_concurrent_read_for_remote_filesystem/merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem` did not respect adaptive granularity. Fat rows did not decrease the number of read rows (as it is was done for `merge_tree_min_rows_for_concurrent_read/merge_tree_min_bytes_for_concurrent_read`, which could lead to high memory usage. [#43965](https://github.com/ClickHouse/ClickHouse/pull/43965) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Support `optimize_if_transform_strings_to_enum` in new analyzer. [#43999](https://github.com/ClickHouse/ClickHouse/pull/43999) ([Antonio Andelic](https://github.com/antonio2368)). +* This is to upgrade the new "DeflateQpl" compression codec which has been implemented on previous PR (details: https://github.com/ClickHouse/ClickHouse/pull/39494). This patch improves codec on below aspects: 1. QPL v0.2.0 to QPL v0.3.0 [Intel® Query Processing Library (QPL)](https://github.com/intel/qpl) 2. Improve CMake file for fixing QPL build issues for QPL v0.3.0。 3. Link the QPL library with libaccel-config at build time instead of runtime loading on QPL v0.2.0 (dlopen) 4. Fixed log print issue in CompressionCodecDeflateQpl.cpp. [#44024](https://github.com/ClickHouse/ClickHouse/pull/44024) ([jasperzhu](https://github.com/jinjunzh)). +* Follow-up to https://github.com/ClickHouse/ClickHouse/pull/43834 Fix review issues; dependencies from `Distributed` table engine and from `cluster()` function are also considered now; as well as dependencies of a dictionary defined without host & port specified. [#44158](https://github.com/ClickHouse/ClickHouse/pull/44158) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Bug Fix +* Fix mutations not making progress when checksums do not match between replicas (e.g. caused by a change in data format on an upgrade). [#36877](https://github.com/ClickHouse/ClickHouse/pull/36877) ([nvartolomei](https://github.com/nvartolomei)). +* fix skip_unavailable_shards does not work using hdfsCluster table function. [#43236](https://github.com/ClickHouse/ClickHouse/pull/43236) ([chen](https://github.com/xiedeyantu)). +* fix s3 support question mark wildcard. Closes [#42731](https://github.com/ClickHouse/ClickHouse/issues/42731). [#43253](https://github.com/ClickHouse/ClickHouse/pull/43253) ([chen](https://github.com/xiedeyantu)). +* - Fix functions arrayFirstOrNull and arrayLastOrNull or null when array is Nullable. [#43274](https://github.com/ClickHouse/ClickHouse/pull/43274) ([Duc Canh Le](https://github.com/canhld94)). +* - we create a new zk path called "async_blocks" for replicated tables in [#43304](https://github.com/ClickHouse/ClickHouse/issues/43304) . However, for tables created in older versions, this path does not exist and will cause error when doing partition operations. This PR will create this node when initializing replicated tree. - This PR created a flag `async_insert_deduplicate` with `false` default value to control whether to use this function. As mentioned in [#38075](https://github.com/ClickHouse/ClickHouse/issues/38075) , this function is not yet fully finished. I would turn off it by default. [#44223](https://github.com/ClickHouse/ClickHouse/pull/44223) ([Han Fei](https://github.com/hanfei1991)). + +#### Build/Testing/Packaging Improvement +* Add support for FreeBSD/powerpc64le. [#40422](https://github.com/ClickHouse/ClickHouse/pull/40422) ([pkubaj](https://github.com/pkubaj)). +* Bump Testcontainers for Go to v0.15.0. [#43278](https://github.com/ClickHouse/ClickHouse/pull/43278) ([Manuel de la Peña](https://github.com/mdelapenya)). +* ... Enable base64 on s390x > Information about CI checks: https://clickhouse.com/docs/en/development/continuous-integration/. [#43352](https://github.com/ClickHouse/ClickHouse/pull/43352) ([Suzy Wang](https://github.com/SuzyWangIBMer)). +* Shutdown will be much faster if do not call clearOldPartsFromFilesystem. Especially this is right for tests with zero-copy due to single thread deletion parts. clearOldPartsFromFilesystem is unnecessary after https://github.com/ClickHouse/ClickHouse/pull/41145. [#43760](https://github.com/ClickHouse/ClickHouse/pull/43760) ([Sema Checherinda](https://github.com/CheSema)). +* Integrate skim into the client/local. [#43922](https://github.com/ClickHouse/ClickHouse/pull/43922) ([Azat Khuzhin](https://github.com/azat)). +* Allow clickhouse to use openssl as a dynamic library and in-tree for development purposes. [#43991](https://github.com/ClickHouse/ClickHouse/pull/43991) ([Boris Kuschel](https://github.com/bkuschel)). +* Closes [#43912](https://github.com/ClickHouse/ClickHouse/issues/43912). [#43992](https://github.com/ClickHouse/ClickHouse/pull/43992) ([Nikolay Degterinsky](https://github.com/evillique)). +* Bring sha512 sums back to the building step. [#44017](https://github.com/ClickHouse/ClickHouse/pull/44017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Kill stress tests after 2.5h in case of hanging process. [#44214](https://github.com/ClickHouse/ClickHouse/pull/44214) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Fixed unable to log in (because of failure to create session_log entry) in rare case of messed up setting profiles. ... [#42641](https://github.com/ClickHouse/ClickHouse/pull/42641) ([Vasily Nemkov](https://github.com/Enmk)). +* Fix incorrect UserTimeMicroseconds/SystemTimeMicroseconds accounting. [#42791](https://github.com/ClickHouse/ClickHouse/pull/42791) ([Azat Khuzhin](https://github.com/azat)). +* Do not suppress exceptions in web disk. Fix retries for web disk. [#42800](https://github.com/ClickHouse/ClickHouse/pull/42800) ([Azat Khuzhin](https://github.com/azat)). +* Fixed race condition between inserts and dropping MVs. [#43161](https://github.com/ClickHouse/ClickHouse/pull/43161) ([AlfVII](https://github.com/AlfVII)). +* Fixed bug which could lead to deadlock while using asynchronous inserts. [#43233](https://github.com/ClickHouse/ClickHouse/pull/43233) ([Anton Popov](https://github.com/CurtizJ)). +* Additional check on zero uncompressed size is added to `CompressionCodecDelta`. [#43255](https://github.com/ClickHouse/ClickHouse/pull/43255) ([Nikita Taranov](https://github.com/nickitat)). +* An issue with the following exception has been reported while trying to read a Parquet file from S3 into ClickHouse:. [#43297](https://github.com/ClickHouse/ClickHouse/pull/43297) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix bad cast from LowCardinality column when using short circuit function execution. Proper fix of https://github.com/ClickHouse/ClickHouse/pull/42937. [#43311](https://github.com/ClickHouse/ClickHouse/pull/43311) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed queries with `SAMPLE BY` with prewhere optimization on tables using `Merge` engine. [#43315](https://github.com/ClickHouse/ClickHouse/pull/43315) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix `DESCRIBE` for `deltaLake` and `hudi` table functions. [#43323](https://github.com/ClickHouse/ClickHouse/pull/43323) ([Antonio Andelic](https://github.com/antonio2368)). +* Check and compare the content of `format_version` file in `MergeTreeData` so tables can be loaded even if the storage policy was changed. [#43328](https://github.com/ClickHouse/ClickHouse/pull/43328) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix possible (very unlikely) "No column to rollback" logical error during INSERT into Buffer. [#43336](https://github.com/ClickHouse/ClickHouse/pull/43336) ([Azat Khuzhin](https://github.com/azat)). +* Fix a bug that allowed FucntionParser to parse an unlimited amount of round brackets into one function if `allow_function_parameters` is set. [#43350](https://github.com/ClickHouse/ClickHouse/pull/43350) ([Nikolay Degterinsky](https://github.com/evillique)). +* MaterializeMySQL support ddl: drop table t1,t2 and Compatible with most of MySQL drop ddl. [#43366](https://github.com/ClickHouse/ClickHouse/pull/43366) ([zzsmdfj](https://github.com/zzsmdfj)). +* Fix possible `Cannot create non-empty column with type Nothing` in functions if/multiIf. Closes [#43356](https://github.com/ClickHouse/ClickHouse/issues/43356). [#43368](https://github.com/ClickHouse/ClickHouse/pull/43368) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix a bug when row level filter uses default value of column. [#43387](https://github.com/ClickHouse/ClickHouse/pull/43387) ([Alexander Gololobov](https://github.com/davenger)). +* Query with DISTINCT + LIMIT BY + LIMIT can return fewer rows than expected. Fixes [#43377](https://github.com/ClickHouse/ClickHouse/issues/43377). [#43410](https://github.com/ClickHouse/ClickHouse/pull/43410) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix sumMap() for Nullable(Decimal()). [#43414](https://github.com/ClickHouse/ClickHouse/pull/43414) ([Azat Khuzhin](https://github.com/azat)). +* Fix date_diff() for hour/minute on macOS. Close [#42742](https://github.com/ClickHouse/ClickHouse/issues/42742). [#43466](https://github.com/ClickHouse/ClickHouse/pull/43466) ([zzsmdfj](https://github.com/zzsmdfj)). +* Fix incorrect memory accounting because of merges/mutations. [#43516](https://github.com/ClickHouse/ClickHouse/pull/43516) ([Azat Khuzhin](https://github.com/azat)). +* Substitute UDFs in `CREATE` query to avoid failures during loading at the startup. Additionally, UDFs can now be used as `DEFAULT` expressions for columns. [#43539](https://github.com/ClickHouse/ClickHouse/pull/43539) ([Antonio Andelic](https://github.com/antonio2368)). +* Correctly report errors in queries even when multiple JOINs optimization is taking place. [#43583](https://github.com/ClickHouse/ClickHouse/pull/43583) ([Salvatore](https://github.com/tbsal)). +* Fixed primary key analysis with conditions involving `toString(enum)`. [#43596](https://github.com/ClickHouse/ClickHouse/pull/43596) ([Nikita Taranov](https://github.com/nickitat)). +* - Ensure consistency when copier update status and `attach_is_done` in keeper after partition attach is done. [#43602](https://github.com/ClickHouse/ClickHouse/pull/43602) ([lizhuoyu5](https://github.com/lzydmxy)). +* During recovering of the lost replica there could a situation where we need to atomically swap two table names (use EXCHANGE), but instead previously we tried to use two RENAME queries. Which was obviously failed and moreover failed the whole recovery process of the database replica. [#43628](https://github.com/ClickHouse/ClickHouse/pull/43628) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* fix s3Cluster function returns NOT_FOUND_COLUMN_IN_BLOCK error. Closes [#43534](https://github.com/ClickHouse/ClickHouse/issues/43534). [#43629](https://github.com/ClickHouse/ClickHouse/pull/43629) ([chen](https://github.com/xiedeyantu)). +* Optimized number of List requests to ZooKeeper when selecting a part to merge. Previously it could produce thousands of requests in some cases. Fixes [#43647](https://github.com/ClickHouse/ClickHouse/issues/43647). [#43675](https://github.com/ClickHouse/ClickHouse/pull/43675) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix posssible logical error 'Array sizes mismatched' while parsing JSON object with arrays with same key names but with different nesting level. Closes [#43569](https://github.com/ClickHouse/ClickHouse/issues/43569). [#43693](https://github.com/ClickHouse/ClickHouse/pull/43693) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed possible exception in case of distributed group by with an alias column among aggregation keys. [#43709](https://github.com/ClickHouse/ClickHouse/pull/43709) ([Nikita Taranov](https://github.com/nickitat)). +* Fix bug which can lead to broken projections if zero-copy replication is enabled and used. [#43764](https://github.com/ClickHouse/ClickHouse/pull/43764) ([alesapin](https://github.com/alesapin)). +* - Fix using multipart upload for large S3 objects in AWS S3. [#43824](https://github.com/ClickHouse/ClickHouse/pull/43824) ([ianton-ru](https://github.com/ianton-ru)). +* Fixed `ALTER ... RESET SETTING` with `ON CLUSTER`. It could be applied to one replica only. Fixes [#43843](https://github.com/ClickHouse/ClickHouse/issues/43843). [#43848](https://github.com/ClickHouse/ClickHouse/pull/43848) ([Elena Torró](https://github.com/elenatorro)). +* * Fix logical error in right storage join with using. [#43963](https://github.com/ClickHouse/ClickHouse/pull/43963) ([Vladimir C](https://github.com/vdimir)). +* Keeper fix: throw if interserver port for Raft is already in use. Fix segfault in Prometheus when Raft server failed to initialize. [#43984](https://github.com/ClickHouse/ClickHouse/pull/43984) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix order by positional arg in case unneeded columns pruning. Closes [#43964](https://github.com/ClickHouse/ClickHouse/issues/43964). [#43987](https://github.com/ClickHouse/ClickHouse/pull/43987) ([Kseniia Sumarokova](https://github.com/kssenii)). +* * Fix bug with wrong order of keys in Storage Join. [#44012](https://github.com/ClickHouse/ClickHouse/pull/44012) ([Vladimir C](https://github.com/vdimir)). +* Fixed exception when subquery contains having but doesn't contain actual aggregation. [#44051](https://github.com/ClickHouse/ClickHouse/pull/44051) ([Nikita Taranov](https://github.com/nickitat)). +* Fix race in s3 multipart upload. This race could cause the error `Part number must be an integer between 1 and 10000, inclusive. (S3_ERROR)` while restoring from a backup. [#44065](https://github.com/ClickHouse/ClickHouse/pull/44065) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix undefined behavior in the `quantiles` function, which might lead to uninitialized memory. Found by fuzzer. This closes [#44066](https://github.com/ClickHouse/ClickHouse/issues/44066). [#44067](https://github.com/ClickHouse/ClickHouse/pull/44067) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Prevent dropping nested column if it creates empty part. [#44159](https://github.com/ClickHouse/ClickHouse/pull/44159) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix `LOGICAL_ERROR` in case when fetch of part was stopped while fetching projection to the disk with enabled zero-copy replication. [#44173](https://github.com/ClickHouse/ClickHouse/pull/44173) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible Bad cast from type DB::IAST const* to DB::ASTLiteral const*. Closes [#44191](https://github.com/ClickHouse/ClickHouse/issues/44191). [#44192](https://github.com/ClickHouse/ClickHouse/pull/44192) ([Kruglov Pavel](https://github.com/Avogar)). +* Prevent `ReadonlyReplica` metric from having negative values. [#44220](https://github.com/ClickHouse/ClickHouse/pull/44220) ([Antonio Andelic](https://github.com/antonio2368)). + +#### Build Improvement + +* Fixed Endian issues in hex string conversion on s390x (which is not supported by ClickHouse). [#41245](https://github.com/ClickHouse/ClickHouse/pull/41245) ([Harry Lee](https://github.com/HarryLeeIBM)). +* ... toDateTime64 conversion generates wrong time on z build, add bit_cast swap fix to support toDateTime64 on s390x platform. [#42847](https://github.com/ClickHouse/ClickHouse/pull/42847) ([Suzy Wang](https://github.com/SuzyWangIBMer)). +* ... s390x support for ip coding functions. [#43078](https://github.com/ClickHouse/ClickHouse/pull/43078) ([Suzy Wang](https://github.com/SuzyWangIBMer)). +* Fix byte order issue of wide integers for s390x. [#43228](https://github.com/ClickHouse/ClickHouse/pull/43228) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Fixed endian issue in bloom filter serialization for s390x. [#43642](https://github.com/ClickHouse/ClickHouse/pull/43642) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Fixed setting TCP_KEEPIDLE of client connection for s390x. [#43850](https://github.com/ClickHouse/ClickHouse/pull/43850) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Fix endian issue in StringHashTable for s390x. [#44049](https://github.com/ClickHouse/ClickHouse/pull/44049) ([Harry Lee](https://github.com/HarryLeeIBM)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Revert "S3 request per second rate throttling""'. [#43335](https://github.com/ClickHouse/ClickHouse/pull/43335) ([Sergei Trifonov](https://github.com/serxa)). +* NO CL ENTRY: 'Update version after release'. [#43348](https://github.com/ClickHouse/ClickHouse/pull/43348) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* NO CL ENTRY: 'Revert "Add table_uuid to system.parts"'. [#43571](https://github.com/ClickHouse/ClickHouse/pull/43571) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Fix endian issue in integer hex string conversion"'. [#43613](https://github.com/ClickHouse/ClickHouse/pull/43613) ([Vladimir C](https://github.com/vdimir)). +* NO CL ENTRY: 'Update replication.md'. [#43643](https://github.com/ClickHouse/ClickHouse/pull/43643) ([Peignon Melvyn](https://github.com/melvynator)). +* NO CL ENTRY: 'Revert "Temporary files evict fs cache"'. [#43883](https://github.com/ClickHouse/ClickHouse/pull/43883) ([Vladimir C](https://github.com/vdimir)). +* NO CL ENTRY: 'Update html interface doc'. [#44064](https://github.com/ClickHouse/ClickHouse/pull/44064) ([San](https://github.com/santrancisco)). +* NO CL ENTRY: 'Revert "Add function 'age'"'. [#44203](https://github.com/ClickHouse/ClickHouse/pull/44203) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Builtin skim"'. [#44227](https://github.com/ClickHouse/ClickHouse/pull/44227) ([Azat Khuzhin](https://github.com/azat)). +* NO CL ENTRY: 'Revert "Add information about written rows in progress indicator"'. [#44255](https://github.com/ClickHouse/ClickHouse/pull/44255) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Build libcxx and libcxxabi from llvm-project [#42730](https://github.com/ClickHouse/ClickHouse/pull/42730) ([Robert Schulze](https://github.com/rschu1ze)). +* Allow release only from ready commits [#43019](https://github.com/ClickHouse/ClickHouse/pull/43019) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add global flags to base/ libraries [#43082](https://github.com/ClickHouse/ClickHouse/pull/43082) ([Raúl Marín](https://github.com/Algunenano)). +* Enable strict typing check in tests/ci [#43132](https://github.com/ClickHouse/ClickHouse/pull/43132) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add server UUID for disks access checks (read/read-by-offset/write/delete) to avoid possible races [#43143](https://github.com/ClickHouse/ClickHouse/pull/43143) ([Azat Khuzhin](https://github.com/azat)). +* Do not include libcxx library for C [#43166](https://github.com/ClickHouse/ClickHouse/pull/43166) ([Azat Khuzhin](https://github.com/azat)). +* Followup fixes for FuseFunctionsPass [#43217](https://github.com/ClickHouse/ClickHouse/pull/43217) ([Vladimir C](https://github.com/vdimir)). +* Fix bug in replication queue which can lead to premature mutation finish [#43231](https://github.com/ClickHouse/ClickHouse/pull/43231) ([alesapin](https://github.com/alesapin)). +* Support `CREATE / ALTER / DROP NAMED COLLECTION` queries under according access types [#43252](https://github.com/ClickHouse/ClickHouse/pull/43252) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix race in `IColumn::dumpStructure` [#43269](https://github.com/ClickHouse/ClickHouse/pull/43269) ([Anton Popov](https://github.com/CurtizJ)). +* Sanitize thirdparty libraries for public flags [#43275](https://github.com/ClickHouse/ClickHouse/pull/43275) ([Azat Khuzhin](https://github.com/azat)). +* stress: increase timeout for server waiting after TERM [#43277](https://github.com/ClickHouse/ClickHouse/pull/43277) ([Azat Khuzhin](https://github.com/azat)). +* Fix cloning of ASTIdentifier [#43282](https://github.com/ClickHouse/ClickHouse/pull/43282) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix race on write in `ReplicatedMergeTree` [#43289](https://github.com/ClickHouse/ClickHouse/pull/43289) ([Antonio Andelic](https://github.com/antonio2368)). +* Cancel lambda api url [#43295](https://github.com/ClickHouse/ClickHouse/pull/43295) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fixed: Typo [#43312](https://github.com/ClickHouse/ClickHouse/pull/43312) ([Raevsky Rudolf](https://github.com/lanesket)). +* Analyzer small fixes [#43321](https://github.com/ClickHouse/ClickHouse/pull/43321) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix: make test_read_only_table more stable [#43326](https://github.com/ClickHouse/ClickHouse/pull/43326) ([Igor Nikonov](https://github.com/devcrafter)). +* Make insertRangeFrom() more exception safe [#43338](https://github.com/ClickHouse/ClickHouse/pull/43338) ([Azat Khuzhin](https://github.com/azat)). +* Analyzer added indexes support [#43341](https://github.com/ClickHouse/ClickHouse/pull/43341) ([Maksim Kita](https://github.com/kitaisreal)). +* Allow to "drop tables" from s3_plain disk (so as from web disk) [#43343](https://github.com/ClickHouse/ClickHouse/pull/43343) ([Azat Khuzhin](https://github.com/azat)). +* Add --max-consecutive-errors for clickhouse-benchmark [#43344](https://github.com/ClickHouse/ClickHouse/pull/43344) ([Azat Khuzhin](https://github.com/azat)). +* Add [#43072](https://github.com/ClickHouse/ClickHouse/issues/43072) [#43345](https://github.com/ClickHouse/ClickHouse/pull/43345) ([Nikita Taranov](https://github.com/nickitat)). +* Suggest users installation troubleshooting [#43346](https://github.com/ClickHouse/ClickHouse/pull/43346) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update version_date.tsv and changelogs after v22.11.1.1360-stable [#43349](https://github.com/ClickHouse/ClickHouse/pull/43349) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Provide full stacktrace in case of uncaught exception during server startup [#43364](https://github.com/ClickHouse/ClickHouse/pull/43364) ([Azat Khuzhin](https://github.com/azat)). +* Update SECURITY.md on new stable tags [#43365](https://github.com/ClickHouse/ClickHouse/pull/43365) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Splitting checks in CI more [#43373](https://github.com/ClickHouse/ClickHouse/pull/43373) ([alesapin](https://github.com/alesapin)). +* Update version_date.tsv and changelogs after v22.8.9.24-lts [#43393](https://github.com/ClickHouse/ClickHouse/pull/43393) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix mess with signed sizes in SingleValueDataString [#43401](https://github.com/ClickHouse/ClickHouse/pull/43401) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add a comment [#43403](https://github.com/ClickHouse/ClickHouse/pull/43403) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Avoid race condition for updating system.distribution_queue values [#43406](https://github.com/ClickHouse/ClickHouse/pull/43406) ([Azat Khuzhin](https://github.com/azat)). +* Fix flaky 01926_order_by_desc_limit [#43408](https://github.com/ClickHouse/ClickHouse/pull/43408) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible heap-use-after-free in local if history file cannot be created [#43409](https://github.com/ClickHouse/ClickHouse/pull/43409) ([Azat Khuzhin](https://github.com/azat)). +* Fix flaky test [#43435](https://github.com/ClickHouse/ClickHouse/pull/43435) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix backward compatibility check [#43436](https://github.com/ClickHouse/ClickHouse/pull/43436) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix typo [#43446](https://github.com/ClickHouse/ClickHouse/pull/43446) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove noise from logs about NetLink in Docker [#43447](https://github.com/ClickHouse/ClickHouse/pull/43447) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Modify test slightly [#43448](https://github.com/ClickHouse/ClickHouse/pull/43448) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Set run_passes to 1 by default [#43451](https://github.com/ClickHouse/ClickHouse/pull/43451) ([Dmitry Novik](https://github.com/novikd)). +* Do not reuse jemalloc memory in test_global_overcommit [#43453](https://github.com/ClickHouse/ClickHouse/pull/43453) ([Dmitry Novik](https://github.com/novikd)). +* Fix createTableSharedID again [#43458](https://github.com/ClickHouse/ClickHouse/pull/43458) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Use smaller buffer for small files [#43460](https://github.com/ClickHouse/ClickHouse/pull/43460) ([Alexander Gololobov](https://github.com/davenger)). +* Merging [#42064](https://github.com/ClickHouse/ClickHouse/issues/42064) [#43461](https://github.com/ClickHouse/ClickHouse/pull/43461) ([Anton Popov](https://github.com/CurtizJ)). +* Use all parameters with prefixes from ssm [#43467](https://github.com/ClickHouse/ClickHouse/pull/43467) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Avoid possible DROP hung due to attached web disk [#43489](https://github.com/ClickHouse/ClickHouse/pull/43489) ([Azat Khuzhin](https://github.com/azat)). +* Improve fuzzy search in clickhouse-client/clickhouse-local [#43498](https://github.com/ClickHouse/ClickHouse/pull/43498) ([Azat Khuzhin](https://github.com/azat)). +* check ast limits for create_parser_fuzzer [#43504](https://github.com/ClickHouse/ClickHouse/pull/43504) ([Sema Checherinda](https://github.com/CheSema)). +* Add another test for SingleDataValueString [#43514](https://github.com/ClickHouse/ClickHouse/pull/43514) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Move password reset message from client to server [#43517](https://github.com/ClickHouse/ClickHouse/pull/43517) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Sync everything to persistent storage to avoid writeback affects perf tests [#43530](https://github.com/ClickHouse/ClickHouse/pull/43530) ([Azat Khuzhin](https://github.com/azat)). +* bump lib for diag [#43538](https://github.com/ClickHouse/ClickHouse/pull/43538) ([Dale McDiarmid](https://github.com/gingerwizard)). +* Temporarily disable `test_hive_query` [#43542](https://github.com/ClickHouse/ClickHouse/pull/43542) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Analyzer SumIfToCountIfPass fix [#43543](https://github.com/ClickHouse/ClickHouse/pull/43543) ([Maksim Kita](https://github.com/kitaisreal)). +* Analyzer UniqInjectiveFunctionsEliminationPass [#43547](https://github.com/ClickHouse/ClickHouse/pull/43547) ([Maksim Kita](https://github.com/kitaisreal)). +* Disable broken 00176_bson_parallel_parsing [#43550](https://github.com/ClickHouse/ClickHouse/pull/43550) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add benchmark for query interpretation with JOINs [#43556](https://github.com/ClickHouse/ClickHouse/pull/43556) ([Raúl Marín](https://github.com/Algunenano)). +* Analyzer table functions untuple fix [#43572](https://github.com/ClickHouse/ClickHouse/pull/43572) ([Maksim Kita](https://github.com/kitaisreal)). +* Prepare CI for universal runners preallocated pool [#43579](https://github.com/ClickHouse/ClickHouse/pull/43579) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Iterate list without index-based access [#43584](https://github.com/ClickHouse/ClickHouse/pull/43584) ([Alexander Gololobov](https://github.com/davenger)). +* Remove code that I do not understand [#43593](https://github.com/ClickHouse/ClickHouse/pull/43593) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add table_uuid to system.parts (resubmit) [#43595](https://github.com/ClickHouse/ClickHouse/pull/43595) ([Azat Khuzhin](https://github.com/azat)). +* Move perf tests for Aarch64 from PRs to master [#43623](https://github.com/ClickHouse/ClickHouse/pull/43623) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix flaky 01175_distributed_ddl_output_mode_long [#43626](https://github.com/ClickHouse/ClickHouse/pull/43626) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Speedup backup config loading [#43627](https://github.com/ClickHouse/ClickHouse/pull/43627) ([Alexander Gololobov](https://github.com/davenger)). +* Fix [#43478](https://github.com/ClickHouse/ClickHouse/issues/43478) [#43636](https://github.com/ClickHouse/ClickHouse/pull/43636) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Do not checkout submodules recursively [#43637](https://github.com/ClickHouse/ClickHouse/pull/43637) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Optimize binary-builder size [#43654](https://github.com/ClickHouse/ClickHouse/pull/43654) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix flaky `KeeperMap` integration tests [#43658](https://github.com/ClickHouse/ClickHouse/pull/43658) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix data race in `Keeper` snapshot [#43663](https://github.com/ClickHouse/ClickHouse/pull/43663) ([Antonio Andelic](https://github.com/antonio2368)). +* Use docker images cache from merged PRs in master and release branches [#43664](https://github.com/ClickHouse/ClickHouse/pull/43664) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update AsynchronousReadIndirectBufferFromRemoteFS.cpp [#43667](https://github.com/ClickHouse/ClickHouse/pull/43667) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix pagination issue in GITHUB_JOB_ID() [#43681](https://github.com/ClickHouse/ClickHouse/pull/43681) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Try fix flaky test 00176_bson_parallel_parsing [#43696](https://github.com/ClickHouse/ClickHouse/pull/43696) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix log messages in clickhouse-copier [#43707](https://github.com/ClickHouse/ClickHouse/pull/43707) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* try to remove clickhouse if already exists [#43728](https://github.com/ClickHouse/ClickHouse/pull/43728) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix 43622 [#43731](https://github.com/ClickHouse/ClickHouse/pull/43731) ([Amos Bird](https://github.com/amosbird)). +* Fix example of colored prompt in client [#43738](https://github.com/ClickHouse/ClickHouse/pull/43738) ([Azat Khuzhin](https://github.com/azat)). +* Minor fixes in annoy index documentation [#43743](https://github.com/ClickHouse/ClickHouse/pull/43743) ([Robert Schulze](https://github.com/rschu1ze)). +* Terminate lost runners [#43756](https://github.com/ClickHouse/ClickHouse/pull/43756) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update README.md [#43759](https://github.com/ClickHouse/ClickHouse/pull/43759) ([Tyler Hannan](https://github.com/tylerhannan)). +* Fix included_elements calculation in AggregateFunctionNullVariadic [#43763](https://github.com/ClickHouse/ClickHouse/pull/43763) ([Dmitry Novik](https://github.com/novikd)). +* Migrate runner_token_rotation_lambda to zip-package deployment [#43766](https://github.com/ClickHouse/ClickHouse/pull/43766) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Analyzer compound expression crash fix [#43768](https://github.com/ClickHouse/ClickHouse/pull/43768) ([Maksim Kita](https://github.com/kitaisreal)). +* Migrate termination lambda to zip-package [#43769](https://github.com/ClickHouse/ClickHouse/pull/43769) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix flaky `test_store_cleanup` [#43770](https://github.com/ClickHouse/ClickHouse/pull/43770) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Attempt to fix StyleCheck condition [#43773](https://github.com/ClickHouse/ClickHouse/pull/43773) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Rerun PullRequestCI on changed description body [#43777](https://github.com/ClickHouse/ClickHouse/pull/43777) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Yet another fix for AggregateFunctionMinMaxAny [#43778](https://github.com/ClickHouse/ClickHouse/pull/43778) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add google benchmark to contrib [#43779](https://github.com/ClickHouse/ClickHouse/pull/43779) ([Nikita Taranov](https://github.com/nickitat)). +* Fix EN doc as in [#43765](https://github.com/ClickHouse/ClickHouse/issues/43765) [#43780](https://github.com/ClickHouse/ClickHouse/pull/43780) ([Alexander Gololobov](https://github.com/davenger)). +* Detach threads from thread group [#43781](https://github.com/ClickHouse/ClickHouse/pull/43781) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Try making `test_keeper_zookeeper_converter` less flaky [#43789](https://github.com/ClickHouse/ClickHouse/pull/43789) ([Antonio Andelic](https://github.com/antonio2368)). +* Polish UDF substitution visitor [#43790](https://github.com/ClickHouse/ClickHouse/pull/43790) ([Antonio Andelic](https://github.com/antonio2368)). +* Analyzer ConstantNode refactoring [#43793](https://github.com/ClickHouse/ClickHouse/pull/43793) ([Maksim Kita](https://github.com/kitaisreal)). +* Update Poco [#43802](https://github.com/ClickHouse/ClickHouse/pull/43802) ([Alexander Gololobov](https://github.com/davenger)). +* Add another BC check suppression [#43810](https://github.com/ClickHouse/ClickHouse/pull/43810) ([Alexander Tokmakov](https://github.com/tavplubix)). +* tests: fix 01676_long_clickhouse_client_autocomplete flakiness [#43819](https://github.com/ClickHouse/ClickHouse/pull/43819) ([Azat Khuzhin](https://github.com/azat)). +* Use disk operation to serialize and deserialize meta files of StorageFilelog [#43826](https://github.com/ClickHouse/ClickHouse/pull/43826) ([flynn](https://github.com/ucasfl)). +* Add constexpr [#43827](https://github.com/ClickHouse/ClickHouse/pull/43827) ([zhanglistar](https://github.com/zhanglistar)). +* Do not postpone removal of in-memory tables [#43833](https://github.com/ClickHouse/ClickHouse/pull/43833) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Increase some logging level for keeper client. [#43835](https://github.com/ClickHouse/ClickHouse/pull/43835) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* FuseFunctionsPass small fix [#43837](https://github.com/ClickHouse/ClickHouse/pull/43837) ([Maksim Kita](https://github.com/kitaisreal)). +* Followup fixes for XML helpers [#43845](https://github.com/ClickHouse/ClickHouse/pull/43845) ([Alexander Gololobov](https://github.com/davenger)). +* Hold ProcessListEntry a bit longer in case of exception from Interpreter [#43847](https://github.com/ClickHouse/ClickHouse/pull/43847) ([Alexander Tokmakov](https://github.com/tavplubix)). +* A little improve performance of PODArray [#43860](https://github.com/ClickHouse/ClickHouse/pull/43860) ([zhanglistar](https://github.com/zhanglistar)). +* Change email for robot-clickhouse to immutable one [#43861](https://github.com/ClickHouse/ClickHouse/pull/43861) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Rerun DocsCheck on edited PR description [#43862](https://github.com/ClickHouse/ClickHouse/pull/43862) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Temporarily disable misc-* slow clang-tidy checks [#43863](https://github.com/ClickHouse/ClickHouse/pull/43863) ([Robert Schulze](https://github.com/rschu1ze)). +* do not leave tmp part on disk, do not go to the keeper for remove it [#43866](https://github.com/ClickHouse/ClickHouse/pull/43866) ([Sema Checherinda](https://github.com/CheSema)). +* do not read part status just for logging [#43868](https://github.com/ClickHouse/ClickHouse/pull/43868) ([Sema Checherinda](https://github.com/CheSema)). +* Analyzer Context refactoring [#43884](https://github.com/ClickHouse/ClickHouse/pull/43884) ([Maksim Kita](https://github.com/kitaisreal)). +* Analyzer CTE resolution fix [#43893](https://github.com/ClickHouse/ClickHouse/pull/43893) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve release script [#43894](https://github.com/ClickHouse/ClickHouse/pull/43894) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Use only PRs to our repository in pr_info on push [#43895](https://github.com/ClickHouse/ClickHouse/pull/43895) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Join engine works with analyzer [#43897](https://github.com/ClickHouse/ClickHouse/pull/43897) ([Vladimir C](https://github.com/vdimir)). +* Fix reports [#43904](https://github.com/ClickHouse/ClickHouse/pull/43904) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix vim settings (and make it compatible with neovim) [#43909](https://github.com/ClickHouse/ClickHouse/pull/43909) ([Azat Khuzhin](https://github.com/azat)). +* Fix clang tidy errors introduced in [#43834](https://github.com/ClickHouse/ClickHouse/issues/43834) [#43911](https://github.com/ClickHouse/ClickHouse/pull/43911) ([Nikita Taranov](https://github.com/nickitat)). +* Fix BACKUP TO S3 for Google Cloud Storage [#43940](https://github.com/ClickHouse/ClickHouse/pull/43940) ([Azat Khuzhin](https://github.com/azat)). +* Fix tags workflow [#43942](https://github.com/ClickHouse/ClickHouse/pull/43942) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Generate missed changelogs for latest releases [#43944](https://github.com/ClickHouse/ClickHouse/pull/43944) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix typo in tests/ci/bugfix_validate_check.py [#43973](https://github.com/ClickHouse/ClickHouse/pull/43973) ([Vladimir C](https://github.com/vdimir)). +* Remove test logging of signal "EINTR" [#44001](https://github.com/ClickHouse/ClickHouse/pull/44001) ([Kruglov Pavel](https://github.com/Avogar)). +* Some cleanup of isDeterministic(InScopeOfQuery)() [#44011](https://github.com/ClickHouse/ClickHouse/pull/44011) ([Robert Schulze](https://github.com/rschu1ze)). +* Try to keep runners alive for longer [#44015](https://github.com/ClickHouse/ClickHouse/pull/44015) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix relaxed "too many parts" threshold [#44021](https://github.com/ClickHouse/ClickHouse/pull/44021) ([Sergei Trifonov](https://github.com/serxa)). +* Correct CompressionCodecGorilla exception message [#44023](https://github.com/ClickHouse/ClickHouse/pull/44023) ([Duc Canh Le](https://github.com/canhld94)). +* Fix exception message [#44034](https://github.com/ClickHouse/ClickHouse/pull/44034) ([Nikolay Degterinsky](https://github.com/evillique)). +* Update version_date.tsv and changelogs after v22.8.11.15-lts [#44035](https://github.com/ClickHouse/ClickHouse/pull/44035) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* do not hardlink serialization.json in new part [#44036](https://github.com/ClickHouse/ClickHouse/pull/44036) ([Sema Checherinda](https://github.com/CheSema)). +* Fix tracing of profile events [#44045](https://github.com/ClickHouse/ClickHouse/pull/44045) ([Anton Popov](https://github.com/CurtizJ)). +* Slightly better clickhouse disks and remove DiskMemory [#44050](https://github.com/ClickHouse/ClickHouse/pull/44050) ([alesapin](https://github.com/alesapin)). +* Assign release PRs [#44055](https://github.com/ClickHouse/ClickHouse/pull/44055) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Merging [#36877](https://github.com/ClickHouse/ClickHouse/issues/36877) [#44059](https://github.com/ClickHouse/ClickHouse/pull/44059) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* add changelogs [#44061](https://github.com/ClickHouse/ClickHouse/pull/44061) ([Dan Roscigno](https://github.com/DanRoscigno)). +* Fix the CACHE_PATH creation for default value [#44079](https://github.com/ClickHouse/ClickHouse/pull/44079) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix aspell [#44090](https://github.com/ClickHouse/ClickHouse/pull/44090) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix another bug in AggregateFunctionMinMaxAny [#44091](https://github.com/ClickHouse/ClickHouse/pull/44091) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Analyzer aggregate function lambda crash fix [#44098](https://github.com/ClickHouse/ClickHouse/pull/44098) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix -Wshorten-64-to-32 on FreeBSD and enable -Werror [#44121](https://github.com/ClickHouse/ClickHouse/pull/44121) ([Azat Khuzhin](https://github.com/azat)). +* Fix flaky test `02497_trace_events_stress_long` [#44124](https://github.com/ClickHouse/ClickHouse/pull/44124) ([Anton Popov](https://github.com/CurtizJ)). +* Minor file renaming [#44125](https://github.com/ClickHouse/ClickHouse/pull/44125) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix typo [#44127](https://github.com/ClickHouse/ClickHouse/pull/44127) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better descriptions of signals [#44129](https://github.com/ClickHouse/ClickHouse/pull/44129) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* make calls to be sure that parts are deleted [#44156](https://github.com/ClickHouse/ClickHouse/pull/44156) ([Sema Checherinda](https://github.com/CheSema)). +* Ignore "session expired" errors after BC check [#44157](https://github.com/ClickHouse/ClickHouse/pull/44157) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix incorrect assertion [#44160](https://github.com/ClickHouse/ClickHouse/pull/44160) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Close GRPC channels in tests [#44184](https://github.com/ClickHouse/ClickHouse/pull/44184) ([Antonio Andelic](https://github.com/antonio2368)). +* Remove misleading message from logs [#44190](https://github.com/ClickHouse/ClickHouse/pull/44190) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Minor clang-tidy fixes in fromUnixTimestamp64() [#44194](https://github.com/ClickHouse/ClickHouse/pull/44194) ([Igor Nikonov](https://github.com/devcrafter)). +* Hotfix for "check_status.tsv doesn't exists" in stress tests [#44197](https://github.com/ClickHouse/ClickHouse/pull/44197) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix documentation after [#42438](https://github.com/ClickHouse/ClickHouse/issues/42438) [#44200](https://github.com/ClickHouse/ClickHouse/pull/44200) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix an assertion in transactions [#44202](https://github.com/ClickHouse/ClickHouse/pull/44202) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add log message [#44237](https://github.com/ClickHouse/ClickHouse/pull/44237) ([Alexander Tokmakov](https://github.com/tavplubix)). + diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 33d11091660..1614fb1a8b4 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 574e9bfa62e..db17a6a7bee 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -60,7 +60,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/AggregateFunctions/AggregateFunctionFlameGraph.cpp b/src/AggregateFunctions/AggregateFunctionFlameGraph.cpp new file mode 100644 index 00000000000..5fc6b21926e --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionFlameGraph.cpp @@ -0,0 +1,647 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int FUNCTION_NOT_ALLOWED; + extern const int NOT_IMPLEMENTED; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +struct AggregateFunctionFlameGraphTree +{ + struct ListNode; + + struct TreeNode + { + TreeNode * parent = nullptr; + ListNode * children = nullptr; + UInt64 ptr = 0; + size_t allocated = 0; + }; + + struct ListNode + { + ListNode * next = nullptr; + TreeNode * child = nullptr; + }; + + TreeNode root; + + static ListNode * createChild(TreeNode * parent, UInt64 ptr, Arena * arena) + { + + ListNode * list_node = reinterpret_cast(arena->alloc(sizeof(ListNode))); + TreeNode * tree_node = reinterpret_cast(arena->alloc(sizeof(TreeNode))); + + list_node->child = tree_node; + list_node->next = nullptr; + + tree_node->parent =parent; + tree_node->children = nullptr; + tree_node->ptr = ptr; + tree_node->allocated = 0; + + return list_node; + } + + TreeNode * find(const UInt64 * stack, size_t stack_size, Arena * arena) + { + TreeNode * node = &root; + for (size_t i = 0; i < stack_size; ++i) + { + UInt64 ptr = stack[i]; + if (ptr == 0) + break; + + if (!node->children) + { + node->children = createChild(node, ptr, arena); + node = node->children->child; + } + else + { + ListNode * list = node->children; + while (list->child->ptr != ptr && list->next) + list = list->next; + + if (list->child->ptr != ptr) + { + list->next = createChild(node, ptr, arena); + list = list->next; + } + + node = list->child; + } + } + + return node; + } + + static void append(DB::PaddedPODArray & values, DB::PaddedPODArray & offsets, std::vector & frame) + { + UInt64 prev = offsets.empty() ? 0 : offsets.back(); + offsets.push_back(prev + frame.size()); + for (UInt64 val : frame) + values.push_back(val); + } + + struct Trace + { + using Frames = std::vector; + + Frames frames; + + /// The total number of bytes allocated for traces with the same prefix. + size_t allocated_total = 0; + /// This counter is relevant in case we want to filter some traces with small amount of bytes. + /// It shows the total number of bytes for *filtered* traces with the same prefix. + /// This is the value which is used in flamegraph. + size_t allocated_self = 0; + }; + + using Traces = std::vector; + + Traces dump(size_t max_depth, size_t min_bytes) const + { + Traces traces; + Trace::Frames frames; + std::vector allocated_total; + std::vector allocated_self; + std::vector nodes; + + nodes.push_back(root.children); + allocated_total.push_back(root.allocated); + allocated_self.push_back(root.allocated); + + while (!nodes.empty()) + { + if (nodes.back() == nullptr) + { + traces.push_back({frames, allocated_total.back(), allocated_self.back()}); + + nodes.pop_back(); + allocated_total.pop_back(); + allocated_self.pop_back(); + + /// We don't have root's frame so framers are empty in the end. + if (!frames.empty()) + frames.pop_back(); + + continue; + } + + TreeNode * current = nodes.back()->child; + nodes.back() = nodes.back()->next; + + bool enough_bytes = current->allocated >= min_bytes; + bool enough_depth = max_depth == 0 || nodes.size() < max_depth; + + if (enough_bytes) + { + frames.push_back(current->ptr); + allocated_self.back() -= current->allocated; + + if (enough_depth) + { + allocated_total.push_back(current->allocated); + allocated_self.push_back(current->allocated); + nodes.push_back(current->children); + } + else + { + traces.push_back({frames, current->allocated, current->allocated}); + frames.pop_back(); + } + } + } + + return traces; + } +}; + +static void insertData(DB::PaddedPODArray & chars, DB::PaddedPODArray & offsets, const char * pos, size_t length) +{ + const size_t old_size = chars.size(); + const size_t new_size = old_size + length + 1; + + chars.resize(new_size); + if (length) + memcpy(chars.data() + old_size, pos, length); + chars[old_size + length] = 0; + offsets.push_back(new_size); +} + +/// Split str by line feed and write as separate row to ColumnString. +static void fillColumn(DB::PaddedPODArray & chars, DB::PaddedPODArray & offsets, const std::string & str) +{ + size_t start = 0; + size_t end = 0; + size_t size = str.size(); + + while (end < size) + { + if (str[end] == '\n') + { + insertData(chars, offsets, str.data() + start, end - start); + start = end + 1; + } + + ++end; + } + + if (start < end) + insertData(chars, offsets, str.data() + start, end - start); +} + +void dumpFlameGraph( + const AggregateFunctionFlameGraphTree::Traces & traces, + DB::PaddedPODArray & chars, + DB::PaddedPODArray & offsets) +{ + DB::WriteBufferFromOwnString out; + + std::unordered_map mapping; + +#if defined(__ELF__) && !defined(OS_FREEBSD) + auto symbol_index_ptr = DB::SymbolIndex::instance(); + const DB::SymbolIndex & symbol_index = *symbol_index_ptr; +#endif + + for (const auto & trace : traces) + { + if (trace.allocated_self == 0) + continue; + + for (size_t i = 0; i < trace.frames.size(); ++i) + { + if (i) + out << ";"; + + const void * ptr = reinterpret_cast(trace.frames[i]); + +#if defined(__ELF__) && !defined(OS_FREEBSD) + if (const auto * symbol = symbol_index.findSymbol(ptr)) + writeString(demangle(symbol->name), out); + else + DB::writePointerHex(ptr, out); +#else + DB::writePointerHex(ptr, out); +#endif + } + + out << ' ' << trace.allocated_self << "\n"; + } + + fillColumn(chars, offsets, out.str()); +} + +struct AggregateFunctionFlameGraphData +{ + struct Entry + { + AggregateFunctionFlameGraphTree::TreeNode * trace; + UInt64 size; + Entry * next = nullptr; + }; + + struct Pair + { + Entry * allocation = nullptr; + Entry * deallocation = nullptr; + }; + + using Entries = HashMap; + + AggregateFunctionFlameGraphTree tree; + Entries entries; + Entry * free_list = nullptr; + + Entry * alloc(Arena * arena) + { + if (free_list) + { + auto * res = free_list; + free_list = free_list->next; + return res; + } + + return reinterpret_cast(arena->alloc(sizeof(Entry))); + } + + void release(Entry * entry) + { + entry->next = free_list; + free_list = entry; + } + + static void track(Entry * allocation) + { + auto * node = allocation->trace; + while (node) + { + node->allocated += allocation->size; + node = node->parent; + } + } + + static void untrack(Entry * allocation) + { + auto * node = allocation->trace; + while (node) + { + node->allocated -= allocation->size; + node = node->parent; + } + } + + static Entry * tryFindMatchAndRemove(Entry *& list, UInt64 size) + { + if (!list) + return nullptr; + + if (list->size == size) + { + Entry * entry = list; + list = list->next; + return entry; + } + else + { + Entry * parent = list; + while (parent->next && parent->next->size != size) + parent = parent->next; + + if (parent->next && parent->next->size == size) + { + Entry * entry = parent->next; + parent->next = entry->next; + return entry; + } + + return nullptr; + } + } + + void add(UInt64 ptr, Int64 size, const UInt64 * stack, size_t stack_size, Arena * arena) + { + /// In case if argument is nullptr, only track allocations. + if (ptr == 0) + { + if (size > 0) + { + auto * node = tree.find(stack, stack_size, arena); + Entry entry{.trace = node, .size = UInt64(size)}; + track(&entry); + } + + return; + } + + auto & place = entries[ptr]; + if (size > 0) + { + if (auto * deallocation = tryFindMatchAndRemove(place.deallocation, size)) + { + release(deallocation); + } + else + { + auto * node = tree.find(stack, stack_size, arena); + + auto * allocation = alloc(arena); + allocation->size = UInt64(size); + allocation->trace = node; + + track(allocation); + + allocation->next = place.allocation; + place.allocation = allocation; + } + } + else if (size < 0) + { + UInt64 abs_size = -size; + if (auto * allocation = tryFindMatchAndRemove(place.allocation, abs_size)) + { + untrack(allocation); + release(allocation); + } + else + { + auto * deallocation = alloc(arena); + deallocation->size = abs_size; + + deallocation->next = place.deallocation; + place.deallocation = deallocation; + } + } + } + + void merge(const AggregateFunctionFlameGraphTree & other_tree, Arena * arena) + { + AggregateFunctionFlameGraphTree::Trace::Frames frames; + std::vector nodes; + + nodes.push_back(other_tree.root.children); + + while (!nodes.empty()) + { + if (nodes.back() == nullptr) + { + nodes.pop_back(); + + /// We don't have root's frame so framers are empty in the end. + if (!frames.empty()) + frames.pop_back(); + + continue; + } + + AggregateFunctionFlameGraphTree::TreeNode * current = nodes.back()->child; + nodes.back() = nodes.back()->next; + + frames.push_back(current->ptr); + + if (current->children) + nodes.push_back(current->children); + else + { + if (current->allocated) + add(0, current->allocated, frames.data(), frames.size(), arena); + + frames.pop_back(); + } + } + } + + void merge(const AggregateFunctionFlameGraphData & other, Arena * arena) + { + AggregateFunctionFlameGraphTree::Trace::Frames frames; + for (const auto & entry : other.entries) + { + for (auto * allocation = entry.value.second.allocation; allocation; allocation = allocation->next) + { + frames.clear(); + const auto * node = allocation->trace; + while (node->ptr) + { + frames.push_back(node->ptr); + node = node->parent; + } + + std::reverse(frames.begin(), frames.end()); + add(entry.value.first, allocation->size, frames.data(), frames.size(), arena); + untrack(allocation); + } + + for (auto * deallocation = entry.value.second.deallocation; deallocation; deallocation = deallocation->next) + { + add(entry.value.first, -Int64(deallocation->size), nullptr, 0, arena); + } + } + + merge(other.tree, arena); + } + + void dumpFlameGraph( + DB::PaddedPODArray & chars, + DB::PaddedPODArray & offsets, + size_t max_depth, size_t min_bytes) const + { + DB::dumpFlameGraph(tree.dump(max_depth, min_bytes), chars, offsets); + } +}; + +/// Aggregate function which builds a flamegraph using the list of stacktraces. +/// The output is an array of strings which can be used by flamegraph.pl util. +/// See https://github.com/brendangregg/FlameGraph +/// +/// Syntax: flameGraph(traces, [size = 1], [ptr = 0]) +/// - trace : Array(UInt64), a stacktrace +/// - size : Int64, an allocation size (for memory profiling) +/// - ptr : UInt64, an allocation address +/// In case if ptr != 0, a flameGraph will map allocations (size > 0) and deallocations (size < 0) with the same size and ptr. +/// Only allocations which were not freed are shown. Not mapped deallocations are ignored. +/// +/// Usage: +/// +/// * Build a flamegraph based on CPU query profiler +/// set query_profiler_cpu_time_period_ns=10000000; +/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +/// clickhouse client --allow_introspection_functions=1 +/// -q "select arrayJoin(flameGraph(arrayReverse(trace))) from system.trace_log where trace_type = 'CPU' and query_id = 'xxx'" +/// | ~/dev/FlameGraph/flamegraph.pl > flame_cpu.svg +/// +/// * Build a flamegraph based on memory query profiler, showing all allocations +/// set memory_profiler_sample_probability=1, max_untracked_memory=1; +/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +/// clickhouse client --allow_introspection_functions=1 +/// -q "select arrayJoin(flameGraph(trace, size)) from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx'" +/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem.svg +/// +/// * Build a flamegraph based on memory query profiler, showing allocations which were not deallocated in query context +/// set memory_profiler_sample_probability=1, max_untracked_memory=1, use_uncompressed_cache=1, merge_tree_max_rows_to_use_cache=100000000000, merge_tree_max_bytes_to_use_cache=1000000000000; +/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +/// clickhouse client --allow_introspection_functions=1 +/// -q "select arrayJoin(flameGraph(trace, size, ptr)) from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx'" +/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_untracked.svg +/// +/// * Build a flamegraph based on memory query profiler, showing active allocations at the fixed point of time +/// set memory_profiler_sample_probability=1, max_untracked_memory=1; +/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +/// 1. Memory usage per second +/// select event_time, m, formatReadableSize(max(s) as m) from (select event_time, sum(size) over (order by event_time) as s from system.trace_log where query_id = 'xxx' and trace_type = 'MemorySample') group by event_time order by event_time; +/// 2. Find a time point with maximal memory usage +/// select argMax(event_time, s), max(s) from (select event_time, sum(size) over (order by event_time) as s from system.trace_log where query_id = 'xxx' and trace_type = 'MemorySample'); +/// 3. Fix active allocations at fixed point of time +/// clickhouse client --allow_introspection_functions=1 +/// -q "select arrayJoin(flameGraph(trace, size, ptr)) from (select * from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx' and event_time <= 'yyy' order by event_time)" +/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_time_point_pos.svg +/// 4. Find deallocations at fixed point of time +/// clickhouse client --allow_introspection_functions=1 +/// -q "select arrayJoin(flameGraph(trace, -size, ptr)) from (select * from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx' and event_time > 'yyy' order by event_time desc)" +/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_time_point_neg.svg +class AggregateFunctionFlameGraph final : public IAggregateFunctionDataHelper +{ +public: + explicit AggregateFunctionFlameGraph(const DataTypes & argument_types_) + : IAggregateFunctionDataHelper(argument_types_, {}) + {} + + String getName() const override { return "flameGraph"; } + + DataTypePtr getReturnType() const override + { + return std::make_shared(std::make_shared()); + } + + bool allocatesMemoryInArena() const override { return true; } + + void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override + { + const auto * trace = typeid_cast(columns[0]); + + const auto & trace_offsets = trace->getOffsets(); + const auto & trace_values = typeid_cast(&trace->getData())->getData(); + UInt64 prev_offset = 0; + if (row_num) + prev_offset = trace_offsets[row_num - 1]; + UInt64 trace_size = trace_offsets[row_num] - prev_offset; + + Int64 allocated = 1; + if (argument_types.size() >= 2) + { + const auto & sizes = typeid_cast(columns[1])->getData(); + allocated = sizes[row_num]; + } + + UInt64 ptr = 0; + if (argument_types.size() >= 3) + { + const auto & ptrs = typeid_cast(columns[2])->getData(); + ptr = ptrs[row_num]; + } + + this->data(place).add(ptr, allocated, trace_values.data() + prev_offset, trace_size, arena); + } + + void addManyDefaults( + AggregateDataPtr __restrict /*place*/, + const IColumn ** /*columns*/, + size_t /*length*/, + Arena * /*arena*/) const override + { + } + + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override + { + this->data(place).merge(this->data(rhs), arena); + } + + void serialize(ConstAggregateDataPtr __restrict, WriteBuffer &, std::optional /* version */) const override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Serialization for function flameGraph is not implemented."); + } + + void deserialize(AggregateDataPtr __restrict, ReadBuffer &, std::optional /* version */, Arena *) const override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Deserialization for function flameGraph is not implemented."); + } + + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override + { + auto & array = assert_cast(to); + auto & str = assert_cast(array.getData()); + + this->data(place).dumpFlameGraph(str.getChars(), str.getOffsets(), 0, 0); + + array.getOffsets().push_back(str.size()); + } +}; + +static void check(const std::string & name, const DataTypes & argument_types, const Array & params) +{ + assertNoParameters(name, params); + + if (argument_types.empty() || argument_types.size() > 3) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Aggregate function {} requires 1 to 3 arguments : trace, [size = 1], [ptr = 0]", + name); + + auto ptr_type = std::make_shared(); + auto trace_type = std::make_shared(ptr_type); + auto size_type = std::make_shared(); + + if (!argument_types[0]->equals(*trace_type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument (trace) for function {} must be Array(UInt64), but it has type {}", + name, argument_types[0]->getName()); + + if (argument_types.size() >= 2 && !argument_types[1]->equals(*size_type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Second argument (size) for function {} must be Int64, but it has type {}", + name, argument_types[1]->getName()); + + if (argument_types.size() >= 3 && !argument_types[2]->equals(*ptr_type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Third argument (ptr) for function {} must be UInt64, but it has type {}", + name, argument_types[2]->getName()); +} + +AggregateFunctionPtr createAggregateFunctionFlameGraph(const std::string & name, const DataTypes & argument_types, const Array & params, const Settings * settings) +{ + if (!settings->allow_introspection_functions) + throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, + "Introspection functions are disabled, because setting 'allow_introspection_functions' is set to 0"); + + check(name, argument_types, params); + return std::make_shared(argument_types); +} + +void registerAggregateFunctionFlameGraph(AggregateFunctionFactory & factory) +{ + AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = true }; + + factory.registerFunction("flameGraph", { createAggregateFunctionFlameGraph, properties }); +} + +} diff --git a/src/AggregateFunctions/registerAggregateFunctions.cpp b/src/AggregateFunctions/registerAggregateFunctions.cpp index ecf6ab51367..08524cc9f97 100644 --- a/src/AggregateFunctions/registerAggregateFunctions.cpp +++ b/src/AggregateFunctions/registerAggregateFunctions.cpp @@ -73,6 +73,7 @@ void registerAggregateFunctionExponentialMovingAverage(AggregateFunctionFactory void registerAggregateFunctionSparkbar(AggregateFunctionFactory &); void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &); void registerAggregateFunctionAnalysisOfVariance(AggregateFunctionFactory &); +void registerAggregateFunctionFlameGraph(AggregateFunctionFactory &); class AggregateFunctionCombinatorFactory; void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &); @@ -158,6 +159,7 @@ void registerAggregateFunctions() registerAggregateFunctionExponentialMovingAverage(factory); registerAggregateFunctionSparkbar(factory); registerAggregateFunctionAnalysisOfVariance(factory); + registerAggregateFunctionFlameGraph(factory); registerWindowFunctions(factory); } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index bd8b221e2ba..99c3c0c3fa2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -106,6 +106,7 @@ if (TARGET ch_contrib::nats_io) endif() add_headers_and_sources(dbms Storages/MeiliSearch) +add_headers_and_sources(dbms Storages/NamedCollections) if (TARGET ch_contrib::amqp_cpp) add_headers_and_sources(dbms Storages/RabbitMQ) diff --git a/src/Common/AllocationTrace.h b/src/Common/AllocationTrace.h new file mode 100644 index 00000000000..332808c8015 --- /dev/null +++ b/src/Common/AllocationTrace.h @@ -0,0 +1,16 @@ +#pragma once +#include + +/// This is a structure which is returned by MemoryTracker. +/// Methods onAlloc/onFree should be called after actual memory allocation if it succeed. +/// For now, it will only collect allocation trace with sample_probability. +struct AllocationTrace +{ + AllocationTrace() = default; + explicit AllocationTrace(double sample_probability_); + + void onAlloc(void * ptr, size_t size) const; + void onFree(void * ptr, size_t size) const; + + double sample_probability = 0; +}; diff --git a/src/Common/Allocator.h b/src/Common/Allocator.h index c348eaea006..8c4f2ef1690 100644 --- a/src/Common/Allocator.h +++ b/src/Common/Allocator.h @@ -92,8 +92,10 @@ public: void * alloc(size_t size, size_t alignment = 0) { checkSize(size); - CurrentMemoryTracker::alloc(size); - return allocNoTrack(size, alignment); + auto trace = CurrentMemoryTracker::alloc(size); + void * ptr = allocNoTrack(size, alignment); + trace.onAlloc(ptr, size); + return ptr; } /// Free memory range. @@ -103,7 +105,8 @@ public: { checkSize(size); freeNoTrack(buf, size); - CurrentMemoryTracker::free(size); + auto trace = CurrentMemoryTracker::free(size); + trace.onFree(buf, size); } catch (...) { @@ -129,13 +132,16 @@ public: && alignment <= MALLOC_MIN_ALIGNMENT) { /// Resize malloc'd memory region with no special alignment requirement. - CurrentMemoryTracker::realloc(old_size, new_size); + auto trace = CurrentMemoryTracker::realloc(old_size, new_size); + trace.onFree(buf, old_size); void * new_buf = ::realloc(buf, new_size); if (nullptr == new_buf) DB::throwFromErrno(fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); buf = new_buf; + trace.onAlloc(buf, new_size); + if constexpr (clear_memory) if (new_size > old_size) memset(reinterpret_cast(buf) + old_size, 0, new_size - old_size); @@ -143,7 +149,8 @@ public: else if (old_size >= MMAP_THRESHOLD && new_size >= MMAP_THRESHOLD) { /// Resize mmap'd memory region. - CurrentMemoryTracker::realloc(old_size, new_size); + auto trace = CurrentMemoryTracker::realloc(old_size, new_size); + trace.onFree(buf, old_size); // On apple and freebsd self-implemented mremap used (common/mremap.h) buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE, @@ -152,14 +159,17 @@ public: DB::throwFromErrno(fmt::format("Allocator: Cannot mremap memory chunk from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_MREMAP); + trace.onAlloc(buf, new_size); /// No need for zero-fill, because mmap guarantees it. } else if (new_size < MMAP_THRESHOLD) { /// Small allocs that requires a copy. Assume there's enough memory in system. Call CurrentMemoryTracker once. - CurrentMemoryTracker::realloc(old_size, new_size); + auto trace = CurrentMemoryTracker::realloc(old_size, new_size); + trace.onFree(buf, old_size); void * new_buf = allocNoTrack(new_size, alignment); + trace.onAlloc(new_buf, new_size); memcpy(new_buf, buf, std::min(old_size, new_size)); freeNoTrack(buf, old_size); buf = new_buf; diff --git a/src/Common/AllocatorWithMemoryTracking.h b/src/Common/AllocatorWithMemoryTracking.h index 815c326ed62..b43870e05b2 100644 --- a/src/Common/AllocatorWithMemoryTracking.h +++ b/src/Common/AllocatorWithMemoryTracking.h @@ -30,21 +30,24 @@ struct AllocatorWithMemoryTracking throw std::bad_alloc(); size_t bytes = n * sizeof(T); - CurrentMemoryTracker::alloc(bytes); + auto trace = CurrentMemoryTracker::alloc(bytes); T * p = static_cast(malloc(bytes)); if (!p) throw std::bad_alloc(); + trace.onAlloc(p, bytes); + return p; } void deallocate(T * p, size_t n) noexcept { - free(p); - size_t bytes = n * sizeof(T); - CurrentMemoryTracker::free(bytes); + + free(p); + auto trace = CurrentMemoryTracker::free(bytes); + trace.onFree(p, bytes); } }; diff --git a/src/Common/CurrentMemoryTracker.cpp b/src/Common/CurrentMemoryTracker.cpp index 720df07efb9..0147a095185 100644 --- a/src/Common/CurrentMemoryTracker.cpp +++ b/src/Common/CurrentMemoryTracker.cpp @@ -37,7 +37,7 @@ MemoryTracker * getMemoryTracker() using DB::current_thread; -void CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded) +AllocationTrace CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded) { #ifdef MEMORY_TRACKER_DEBUG_CHECKS if (unlikely(memory_tracker_always_throw_logical_error_on_allocation)) @@ -55,8 +55,9 @@ void CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded) if (will_be > current_thread->untracked_memory_limit) { - memory_tracker->allocImpl(will_be, throw_if_memory_exceeded); + auto res = memory_tracker->allocImpl(will_be, throw_if_memory_exceeded); current_thread->untracked_memory = 0; + return res; } else { @@ -68,36 +69,40 @@ void CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded) /// total_memory_tracker only, ignore untracked_memory else { - memory_tracker->allocImpl(size, throw_if_memory_exceeded); + return memory_tracker->allocImpl(size, throw_if_memory_exceeded); } + + return AllocationTrace(memory_tracker->getSampleProbability()); } + + return AllocationTrace(0); } void CurrentMemoryTracker::check() { if (auto * memory_tracker = getMemoryTracker()) - memory_tracker->allocImpl(0, true); + std::ignore = memory_tracker->allocImpl(0, true); } -void CurrentMemoryTracker::alloc(Int64 size) +AllocationTrace CurrentMemoryTracker::alloc(Int64 size) { bool throw_if_memory_exceeded = true; - allocImpl(size, throw_if_memory_exceeded); + return allocImpl(size, throw_if_memory_exceeded); } -void CurrentMemoryTracker::allocNoThrow(Int64 size) +AllocationTrace CurrentMemoryTracker::allocNoThrow(Int64 size) { bool throw_if_memory_exceeded = false; - allocImpl(size, throw_if_memory_exceeded); + return allocImpl(size, throw_if_memory_exceeded); } -void CurrentMemoryTracker::realloc(Int64 old_size, Int64 new_size) +AllocationTrace CurrentMemoryTracker::realloc(Int64 old_size, Int64 new_size) { Int64 addition = new_size - old_size; - addition > 0 ? alloc(addition) : free(-addition); + return addition > 0 ? alloc(addition) : free(-addition); } -void CurrentMemoryTracker::free(Int64 size) +AllocationTrace CurrentMemoryTracker::free(Int64 size) { if (auto * memory_tracker = getMemoryTracker()) { @@ -106,15 +111,20 @@ void CurrentMemoryTracker::free(Int64 size) current_thread->untracked_memory -= size; if (current_thread->untracked_memory < -current_thread->untracked_memory_limit) { - memory_tracker->free(-current_thread->untracked_memory); + Int64 untracked_memory = current_thread->untracked_memory; current_thread->untracked_memory = 0; + return memory_tracker->free(-untracked_memory); } } /// total_memory_tracker only, ignore untracked_memory else { - memory_tracker->free(size); + return memory_tracker->free(size); } + + return AllocationTrace(memory_tracker->getSampleProbability()); } + + return AllocationTrace(0); } diff --git a/src/Common/CurrentMemoryTracker.h b/src/Common/CurrentMemoryTracker.h index e125e4cbe4a..ba46f458e4a 100644 --- a/src/Common/CurrentMemoryTracker.h +++ b/src/Common/CurrentMemoryTracker.h @@ -1,19 +1,20 @@ #pragma once #include +#include /// Convenience methods, that use current thread's memory_tracker if it is available. struct CurrentMemoryTracker { /// Call the following functions before calling of corresponding operations with memory allocators. - static void alloc(Int64 size); - static void allocNoThrow(Int64 size); - static void realloc(Int64 old_size, Int64 new_size); + [[nodiscard]] static AllocationTrace alloc(Int64 size); + [[nodiscard]] static AllocationTrace allocNoThrow(Int64 size); + [[nodiscard]] static AllocationTrace realloc(Int64 old_size, Int64 new_size); /// This function should be called after memory deallocation. - static void free(Int64 size); + [[nodiscard]] static AllocationTrace free(Int64 size); static void check(); private: - static void allocImpl(Int64 size, bool throw_if_memory_exceeded); + [[nodiscard]] static AllocationTrace allocImpl(Int64 size, bool throw_if_memory_exceeded); }; diff --git a/src/Common/FiberStack.h b/src/Common/FiberStack.h index c55608311d0..91bb632d807 100644 --- a/src/Common/FiberStack.h +++ b/src/Common/FiberStack.h @@ -57,7 +57,8 @@ public: } /// Do not count guard page in memory usage. - CurrentMemoryTracker::alloc(num_pages * page_size); + auto trace = CurrentMemoryTracker::alloc(num_pages * page_size); + trace.onAlloc(vp, num_pages * page_size); boost::context::stack_context sctx; sctx.size = num_bytes; @@ -77,6 +78,7 @@ public: ::munmap(vp, sctx.size); /// Do not count guard page in memory usage. - CurrentMemoryTracker::free(sctx.size - page_size); + auto trace = CurrentMemoryTracker::free(sctx.size - page_size); + trace.onFree(vp, sctx.size - page_size); } }; diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 27d0adcf24f..d0d0d6b8686 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -1,6 +1,7 @@ #include "MemoryTracker.h" #include +#include #include #include #include @@ -82,6 +83,53 @@ inline std::string_view toDescription(OvercommitResult result) } } +bool shouldTrackAllocation(DB::Float64 probability, void * ptr) +{ + return sipHash64(uintptr_t(ptr)) < std::numeric_limits::max() * probability; +} + +AllocationTrace updateAllocationTrace(AllocationTrace trace, const std::optional & sample_probability) +{ + if (unlikely(sample_probability)) + return AllocationTrace(*sample_probability); + + return trace; +} + +AllocationTrace getAllocationTrace(std::optional & sample_probability) +{ + if (unlikely(sample_probability)) + return AllocationTrace(*sample_probability); + + return AllocationTrace(0); +} + +} + +AllocationTrace::AllocationTrace(double sample_probability_) : sample_probability(sample_probability_) {} + +void AllocationTrace::onAlloc(void * ptr, size_t size) const +{ + if (likely(sample_probability == 0)) + return; + + if (sample_probability < 1 && !shouldTrackAllocation(sample_probability, ptr)) + return; + + MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); + DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = Int64(size), .ptr = ptr}); +} + +void AllocationTrace::onFree(void * ptr, size_t size) const +{ + if (likely(sample_probability == 0)) + return; + + if (sample_probability < 1 && !shouldTrackAllocation(sample_probability, ptr)) + return; + + MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); + DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = -Int64(size), .ptr = ptr}); } namespace ProfileEvents @@ -135,7 +183,7 @@ void MemoryTracker::logMemoryUsage(Int64 current) const } -void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker) +AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker) { if (size < 0) throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Negative size ({}) is passed to MemoryTracker. It is a bug.", size); @@ -154,9 +202,14 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT /// Since the MemoryTrackerBlockerInThread should respect the level, we should go to the next parent. if (auto * loaded_next = parent.load(std::memory_order_relaxed)) - loaded_next->allocImpl(size, throw_if_memory_exceeded, - level == VariableContext::Process ? this : query_tracker); - return; + { + MemoryTracker * tracker = level == VariableContext::Process ? this : query_tracker; + return updateAllocationTrace( + loaded_next->allocImpl(size, throw_if_memory_exceeded, tracker), + sample_probability); + } + + return getAllocationTrace(sample_probability); } /** Using memory_order_relaxed means that if allocations are done simultaneously, @@ -183,14 +236,6 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT allocation_traced = true; } - std::bernoulli_distribution sample(sample_probability); - if (unlikely(sample_probability > 0.0 && sample(thread_local_rng))) - { - MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); - DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = size}); - allocation_traced = true; - } - std::bernoulli_distribution fault(fault_probability); if (unlikely(fault_probability > 0.0 && fault(thread_local_rng))) { @@ -309,16 +354,22 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT } if (auto * loaded_next = parent.load(std::memory_order_relaxed)) - loaded_next->allocImpl(size, throw_if_memory_exceeded, - level == VariableContext::Process ? this : query_tracker); + { + MemoryTracker * tracker = level == VariableContext::Process ? this : query_tracker; + return updateAllocationTrace( + loaded_next->allocImpl(size, throw_if_memory_exceeded, tracker), + sample_probability); + } + + return getAllocationTrace(sample_probability); } void MemoryTracker::adjustWithUntrackedMemory(Int64 untracked_memory) { if (untracked_memory > 0) - allocImpl(untracked_memory, /*throw_if_memory_exceeded*/ false); + std::ignore = allocImpl(untracked_memory, /*throw_if_memory_exceeded*/ false); else - free(-untracked_memory); + std::ignore = free(-untracked_memory); } bool MemoryTracker::updatePeak(Int64 will_be, bool log_memory_usage) @@ -337,8 +388,7 @@ bool MemoryTracker::updatePeak(Int64 will_be, bool log_memory_usage) return false; } - -void MemoryTracker::free(Int64 size) +AllocationTrace MemoryTracker::free(Int64 size) { if (MemoryTrackerBlockerInThread::isBlocked(level)) { @@ -353,15 +403,9 @@ void MemoryTracker::free(Int64 size) /// Since the MemoryTrackerBlockerInThread should respect the level, we should go to the next parent. if (auto * loaded_next = parent.load(std::memory_order_relaxed)) - loaded_next->free(size); - return; - } + return updateAllocationTrace(loaded_next->free(size), sample_probability); - std::bernoulli_distribution sample(sample_probability); - if (unlikely(sample_probability > 0.0 && sample(thread_local_rng))) - { - MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); - DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = -size}); + return getAllocationTrace(sample_probability); } Int64 accounted_size = size; @@ -389,12 +433,15 @@ void MemoryTracker::free(Int64 size) if (auto * overcommit_tracker_ptr = overcommit_tracker.load(std::memory_order_relaxed)) overcommit_tracker_ptr->tryContinueQueryExecutionAfterFree(accounted_size); + AllocationTrace res = getAllocationTrace(sample_probability); if (auto * loaded_next = parent.load(std::memory_order_relaxed)) - loaded_next->free(size); + res = updateAllocationTrace(loaded_next->free(size), sample_probability); auto metric_loaded = metric.load(std::memory_order_relaxed); if (metric_loaded != CurrentMetrics::end()) CurrentMetrics::sub(metric_loaded, accounted_size); + + return res; } @@ -478,3 +525,14 @@ void MemoryTracker::setOrRaiseProfilerLimit(Int64 value) while ((value == 0 || old_value < value) && !profiler_limit.compare_exchange_weak(old_value, value)) ; } + +double MemoryTracker::getSampleProbability() +{ + if (sample_probability) + return *sample_probability; + + if (auto * loaded_next = parent.load(std::memory_order_relaxed)) + return loaded_next->getSampleProbability(); + + return 0; +} diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h index f6113d31423..e1f61b1585a 100644 --- a/src/Common/MemoryTracker.h +++ b/src/Common/MemoryTracker.h @@ -2,9 +2,11 @@ #include #include +#include #include #include #include +#include #if !defined(NDEBUG) #define MEMORY_TRACKER_DEBUG_CHECKS @@ -65,7 +67,7 @@ private: double fault_probability = 0; /// To randomly sample allocations and deallocations in trace_log. - double sample_probability = 0; + std::optional sample_probability; /// Singly-linked list. All information will be passed to subsequent memory trackers also (it allows to implement trackers hierarchy). /// In terms of tree nodes it is the list of parents. Lifetime of these trackers should "include" lifetime of current tracker. @@ -90,8 +92,8 @@ private: /// allocImpl(...) and free(...) should not be used directly friend struct CurrentMemoryTracker; - void allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker = nullptr); - void free(Int64 size); + [[nodiscard]] AllocationTrace allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker = nullptr); + [[nodiscard]] AllocationTrace free(Int64 size); public: static constexpr auto USAGE_EVENT_NAME = "MemoryTrackerUsage"; @@ -146,6 +148,8 @@ public: sample_probability = value; } + double getSampleProbability(); + void setProfilerStep(Int64 value) { profiler_step = value; diff --git a/src/Common/MemoryTrackerBlockerInThread.h b/src/Common/MemoryTrackerBlockerInThread.h index d3882056f54..73794049007 100644 --- a/src/Common/MemoryTrackerBlockerInThread.h +++ b/src/Common/MemoryTrackerBlockerInThread.h @@ -28,4 +28,5 @@ public: } friend class MemoryTracker; + friend struct AllocationTrace; }; diff --git a/src/Common/TraceSender.cpp b/src/Common/TraceSender.cpp index 64d7b2b0eaf..91d07367a82 100644 --- a/src/Common/TraceSender.cpp +++ b/src/Common/TraceSender.cpp @@ -33,6 +33,7 @@ void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Ext + sizeof(TraceType) /// trace type + sizeof(UInt64) /// thread_id + sizeof(Int64) /// size + + sizeof(void *) /// ptr + sizeof(ProfileEvents::Event) /// event + sizeof(ProfileEvents::Count); /// increment @@ -74,6 +75,7 @@ void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Ext writePODBinary(trace_type, out); writePODBinary(thread_id, out); writePODBinary(extras.size, out); + writePODBinary(UInt64(extras.ptr), out); writePODBinary(extras.event, out); writePODBinary(extras.increment, out); diff --git a/src/Common/TraceSender.h b/src/Common/TraceSender.h index 21b44b651dd..68ba15ee400 100644 --- a/src/Common/TraceSender.h +++ b/src/Common/TraceSender.h @@ -28,8 +28,9 @@ class TraceSender public: struct Extras { - /// size - for memory tracing is the amount of memory allocated; for other trace types it is 0. + /// size, ptr - for memory tracing is the amount of memory allocated; for other trace types it is 0. Int64 size{}; + void * ptr = nullptr; /// Event type and increment for 'ProfileEvent' trace type; for other trace types defaults. ProfileEvents::Event event{ProfileEvents::end()}; ProfileEvents::Count increment{}; diff --git a/src/Common/clickhouse_malloc.cpp b/src/Common/clickhouse_malloc.cpp index 3f69ebdf58d..afdad3c6599 100644 --- a/src/Common/clickhouse_malloc.cpp +++ b/src/Common/clickhouse_malloc.cpp @@ -9,7 +9,11 @@ extern "C" void * clickhouse_malloc(size_t size) { void * res = malloc(size); if (res) - Memory::trackMemory(size); + { + AllocationTrace trace; + size_t actual_size = Memory::trackMemory(size, trace); + trace.onAlloc(res, actual_size); + } return res; } @@ -17,17 +21,29 @@ extern "C" void * clickhouse_calloc(size_t number_of_members, size_t size) { void * res = calloc(number_of_members, size); if (res) - Memory::trackMemory(number_of_members * size); + { + AllocationTrace trace; + size_t actual_size = Memory::trackMemory(number_of_members * size, trace); + trace.onAlloc(res, actual_size); + } return res; } extern "C" void * clickhouse_realloc(void * ptr, size_t size) { if (ptr) - Memory::untrackMemory(ptr); + { + AllocationTrace trace; + size_t actual_size = Memory::untrackMemory(ptr, trace); + trace.onFree(ptr, actual_size); + } void * res = realloc(ptr, size); if (res) - Memory::trackMemory(size); + { + AllocationTrace trace; + size_t actual_size = Memory::trackMemory(size, trace); + trace.onAlloc(res, actual_size); + } return res; } @@ -42,7 +58,9 @@ extern "C" void * clickhouse_reallocarray(void * ptr, size_t number_of_members, extern "C" void clickhouse_free(void * ptr) { - Memory::untrackMemory(ptr); + AllocationTrace trace; + size_t actual_size = Memory::untrackMemory(ptr, trace); + trace.onFree(ptr, actual_size); free(ptr); } @@ -50,6 +68,10 @@ extern "C" int clickhouse_posix_memalign(void ** memptr, size_t alignment, size_ { int res = posix_memalign(memptr, alignment, size); if (res == 0) - Memory::trackMemory(size); + { + AllocationTrace trace; + size_t actual_size = Memory::trackMemory(size, trace); + trace.onAlloc(*memptr, actual_size); + } return res; } diff --git a/src/Common/memory.h b/src/Common/memory.h index 4cb1c535e56..87ccdce070a 100644 --- a/src/Common/memory.h +++ b/src/Common/memory.h @@ -112,16 +112,19 @@ inline ALWAYS_INLINE size_t getActualAllocationSize(size_t size, TAlign... align template ... TAlign> requires DB::OptionalArgument -inline ALWAYS_INLINE void trackMemory(std::size_t size, TAlign... align) +inline ALWAYS_INLINE size_t trackMemory(std::size_t size, AllocationTrace & trace, TAlign... align) { std::size_t actual_size = getActualAllocationSize(size, align...); - CurrentMemoryTracker::allocNoThrow(actual_size); + trace = CurrentMemoryTracker::allocNoThrow(actual_size); + return actual_size; } template ... TAlign> requires DB::OptionalArgument -inline ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t size [[maybe_unused]] = 0, TAlign... align [[maybe_unused]]) noexcept +inline ALWAYS_INLINE size_t untrackMemory(void * ptr [[maybe_unused]], AllocationTrace & trace, std::size_t size [[maybe_unused]] = 0, TAlign... align [[maybe_unused]]) noexcept { + std::size_t actual_size = 0; + try { #if USE_JEMALLOC @@ -130,23 +133,26 @@ inline ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t if (likely(ptr != nullptr)) { if constexpr (sizeof...(TAlign) == 1) - CurrentMemoryTracker::free(sallocx(ptr, MALLOCX_ALIGN(alignToSizeT(align...)))); + actual_size = sallocx(ptr, MALLOCX_ALIGN(alignToSizeT(align...))); else - CurrentMemoryTracker::free(sallocx(ptr, 0)); + actual_size = sallocx(ptr, 0); } #else if (size) - CurrentMemoryTracker::free(size); + actual_size = size; # if defined(_GNU_SOURCE) /// It's innaccurate resource free for sanitizers. malloc_usable_size() result is greater or equal to allocated size. else - CurrentMemoryTracker::free(malloc_usable_size(ptr)); + actual_size = malloc_usable_size(ptr); # endif #endif + trace = CurrentMemoryTracker::free(actual_size); } catch (...) { } + + return actual_size; } } diff --git a/src/Common/new_delete.cpp b/src/Common/new_delete.cpp index 871ab750907..d0170bd820c 100644 --- a/src/Common/new_delete.cpp +++ b/src/Common/new_delete.cpp @@ -50,50 +50,74 @@ static struct InitializeJemallocZoneAllocatorForOSX void * operator new(std::size_t size) { - Memory::trackMemory(size); - return Memory::newImpl(size); + AllocationTrace trace; + std::size_t actual_size = Memory::trackMemory(size, trace); + void * ptr = Memory::newImpl(size); + trace.onAlloc(ptr, actual_size); + return ptr; } void * operator new(std::size_t size, std::align_val_t align) { - Memory::trackMemory(size, align); - return Memory::newImpl(size, align); + AllocationTrace trace; + std::size_t actual_size = Memory::trackMemory(size, trace, align); + void * ptr = Memory::newImpl(size, align); + trace.onAlloc(ptr, actual_size); + return ptr; } void * operator new[](std::size_t size) { - Memory::trackMemory(size); - return Memory::newImpl(size); + AllocationTrace trace; + std::size_t actual_size = Memory::trackMemory(size, trace); + void * ptr = Memory::newImpl(size); + trace.onAlloc(ptr, actual_size); + return ptr; } void * operator new[](std::size_t size, std::align_val_t align) { - Memory::trackMemory(size, align); - return Memory::newImpl(size, align); + AllocationTrace trace; + std::size_t actual_size = Memory::trackMemory(size, trace, align); + void * ptr = Memory::newImpl(size, align); + trace.onAlloc(ptr, actual_size); + return ptr; } void * operator new(std::size_t size, const std::nothrow_t &) noexcept { - Memory::trackMemory(size); - return Memory::newNoExept(size); + AllocationTrace trace; + std::size_t actual_size = Memory::trackMemory(size, trace); + void * ptr = Memory::newNoExept(size); + trace.onAlloc(ptr, actual_size); + return ptr; } void * operator new[](std::size_t size, const std::nothrow_t &) noexcept { - Memory::trackMemory(size); - return Memory::newNoExept(size); + AllocationTrace trace; + std::size_t actual_size = Memory::trackMemory(size, trace); + void * ptr = Memory::newNoExept(size); + trace.onAlloc(ptr, actual_size); + return ptr; } void * operator new(std::size_t size, std::align_val_t align, const std::nothrow_t &) noexcept { - Memory::trackMemory(size, align); - return Memory::newNoExept(size, align); + AllocationTrace trace; + std::size_t actual_size = Memory::trackMemory(size, trace, align); + void * ptr = Memory::newNoExept(size, align); + trace.onAlloc(ptr, actual_size); + return ptr; } void * operator new[](std::size_t size, std::align_val_t align, const std::nothrow_t &) noexcept { - Memory::trackMemory(size, align); - return Memory::newNoExept(size, align); + AllocationTrace trace; + std::size_t actual_size = Memory::trackMemory(size, trace, align); + void * ptr = Memory::newNoExept(size, align); + trace.onAlloc(ptr, actual_size); + return ptr; } /// delete @@ -109,48 +133,64 @@ void * operator new[](std::size_t size, std::align_val_t align, const std::nothr void operator delete(void * ptr) noexcept { - Memory::untrackMemory(ptr); + AllocationTrace trace; + std::size_t actual_size = Memory::untrackMemory(ptr, trace); + trace.onFree(ptr, actual_size); Memory::deleteImpl(ptr); } void operator delete(void * ptr, std::align_val_t align) noexcept { - Memory::untrackMemory(ptr, 0, align); + AllocationTrace trace; + std::size_t actual_size = Memory::untrackMemory(ptr, trace, 0, align); + trace.onFree(ptr, actual_size); Memory::deleteImpl(ptr); } void operator delete[](void * ptr) noexcept { - Memory::untrackMemory(ptr); + AllocationTrace trace; + std::size_t actual_size = Memory::untrackMemory(ptr, trace); + trace.onFree(ptr, actual_size); Memory::deleteImpl(ptr); } void operator delete[](void * ptr, std::align_val_t align) noexcept { - Memory::untrackMemory(ptr, 0, align); + AllocationTrace trace; + std::size_t actual_size = Memory::untrackMemory(ptr, trace, 0, align); + trace.onFree(ptr, actual_size); Memory::deleteImpl(ptr); } void operator delete(void * ptr, std::size_t size) noexcept { - Memory::untrackMemory(ptr, size); + AllocationTrace trace; + std::size_t actual_size = Memory::untrackMemory(ptr, trace, size); + trace.onFree(ptr, actual_size); Memory::deleteSized(ptr, size); } void operator delete(void * ptr, std::size_t size, std::align_val_t align) noexcept { - Memory::untrackMemory(ptr, size, align); + AllocationTrace trace; + std::size_t actual_size = Memory::untrackMemory(ptr, trace, size, align); + trace.onFree(ptr, actual_size); Memory::deleteSized(ptr, size, align); } void operator delete[](void * ptr, std::size_t size) noexcept { - Memory::untrackMemory(ptr, size); + AllocationTrace trace; + std::size_t actual_size = Memory::untrackMemory(ptr, trace, size); + trace.onFree(ptr, actual_size); Memory::deleteSized(ptr, size); } void operator delete[](void * ptr, std::size_t size, std::align_val_t align) noexcept { - Memory::untrackMemory(ptr, size, align); + AllocationTrace trace; + std::size_t actual_size = Memory::untrackMemory(ptr, trace, size, align); + trace.onFree(ptr, actual_size); Memory::deleteSized(ptr, size, align); } diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index 8a57c4bc9a1..7f494a694bd 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -525,7 +525,7 @@ ReturnType SerializationNullable::deserializeTextCSVImpl(IColumn & column, ReadB } /// Check if we have enough data in buffer to check if it's a null. - if (istr.available() > null_representation.size()) + if (settings.csv.custom_delimiter.empty() && istr.available() > null_representation.size()) { auto check_for_null = [&istr, &null_representation, &settings]() { @@ -550,8 +550,21 @@ ReturnType SerializationNullable::deserializeTextCSVImpl(IColumn & column, ReadB { buf.setCheckpoint(); SCOPE_EXIT(buf.dropCheckpoint()); - if (checkString(null_representation, buf) && (buf.eof() || *buf.position() == settings.csv.delimiter || *buf.position() == '\r' || *buf.position() == '\n')) - return true; + if (checkString(null_representation, buf)) + { + if (!settings.csv.custom_delimiter.empty()) + { + if (checkString(settings.csv.custom_delimiter, buf)) + { + /// Rollback to the beginning of custom delimiter. + buf.rollbackToCheckpoint(); + assertString(null_representation, buf); + return true; + } + } + else if (buf.eof() || *buf.position() == settings.csv.delimiter || *buf.position() == '\r' || *buf.position() == '\n') + return true; + } buf.rollbackToCheckpoint(); return false; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 98f9e486141..963213f31ad 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -119,6 +119,7 @@ struct FormatSettings char tuple_delimiter = ','; bool use_best_effort_in_schema_inference = true; UInt64 skip_first_lines = 0; + String custom_delimiter; } csv; struct HiveText diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp index 7e52c55e5b0..f66a193dea2 100644 --- a/src/Functions/FunctionsLogical.cpp +++ b/src/Functions/FunctionsLogical.cpp @@ -16,6 +16,7 @@ #include #include +#include #include @@ -94,7 +95,7 @@ void convertAnyColumnToBool(const IColumn * column, UInt8Container & res) } -template +template bool extractConstColumns(ColumnRawPtrs & in, UInt8 & res, Func && func) { bool has_res = false; @@ -112,7 +113,10 @@ bool extractConstColumns(ColumnRawPtrs & in, UInt8 & res, Func && func) if (has_res) { - res = Op::apply(res, x); + if constexpr (IsTernary) + res = Op::ternaryApply(res, x); + else + res = Op::apply(res, x); } else { @@ -129,7 +133,7 @@ bool extractConstColumns(ColumnRawPtrs & in, UInt8 & res, Func && func) template inline bool extractConstColumnsAsBool(ColumnRawPtrs & in, UInt8 & res) { - return extractConstColumns( + return extractConstColumns( in, res, [](const Field & value) { @@ -141,7 +145,7 @@ inline bool extractConstColumnsAsBool(ColumnRawPtrs & in, UInt8 & res) template inline bool extractConstColumnsAsTernary(ColumnRawPtrs & in, UInt8 & res_3v) { - return extractConstColumns( + return extractConstColumns( in, res_3v, [](const Field & value) { @@ -192,47 +196,74 @@ private: }; -/// A helper class used by AssociativeGenericApplierImpl -/// Allows for on-the-fly conversion of any data type into intermediate ternary representation -using TernaryValueGetter = std::function; - template -struct ValueGetterBuilderImpl; +struct TernaryValueBuilderImpl; template -struct ValueGetterBuilderImpl +struct TernaryValueBuilderImpl { - static TernaryValueGetter build(const IColumn * x) + static void build(const IColumn * x, UInt8* __restrict ternary_column_data) { + size_t size = x->size(); if (x->onlyNull()) { - return [](size_t){ return Ternary::Null; }; + memset(ternary_column_data, Ternary::Null, size); } else if (const auto * nullable_column = typeid_cast(x)) { if (const auto * nested_column = typeid_cast *>(nullable_column->getNestedColumnPtr().get())) { - return [ - &null_data = nullable_column->getNullMapData(), - &column_data = nested_column->getData()](size_t i) + const auto& null_data = nullable_column->getNullMapData(); + const auto& column_data = nested_column->getData(); + + if constexpr (sizeof(Type) == 1) { - return Ternary::makeValue(column_data[i], null_data[i]); - }; + for (size_t i = 0; i < size; ++i) + { + auto has_value = static_cast(column_data[i] != 0); + auto is_null = !!null_data[i]; + + ternary_column_data[i] = ((has_value << 1) | is_null) & (1 << !is_null); + } + } + else + { + for (size_t i = 0; i < size; ++i) + { + auto has_value = static_cast(column_data[i] != 0); + ternary_column_data[i] = has_value; + } + + for (size_t i = 0; i < size; ++i) + { + auto has_value = ternary_column_data[i]; + auto is_null = !!null_data[i]; + + ternary_column_data[i] = ((has_value << 1) | is_null) & (1 << !is_null); + } + } } else - return ValueGetterBuilderImpl::build(x); + TernaryValueBuilderImpl::build(x, ternary_column_data); } else if (const auto column = typeid_cast *>(x)) - return [&column_data = column->getData()](size_t i) { return Ternary::makeValue(column_data[i]); }; + { + auto &column_data = column->getData(); + + for (size_t i = 0; i < size; ++i) + { + ternary_column_data[i] = (column_data[i] != 0) << 1; + } + } else - return ValueGetterBuilderImpl::build(x); + TernaryValueBuilderImpl::build(x, ternary_column_data); } }; template <> -struct ValueGetterBuilderImpl<> +struct TernaryValueBuilderImpl<> { - static TernaryValueGetter build(const IColumn * x) + [[noreturn]] static void build(const IColumn * x, UInt8 * /* nullable_ternary_column_data */) { throw Exception( std::string("Unknown numeric column of type: ") + demangle(typeid(*x).name()), @@ -240,12 +271,12 @@ struct ValueGetterBuilderImpl<> } }; -using ValueGetterBuilder = - ValueGetterBuilderImpl; +using TernaryValueBuilder = + TernaryValueBuilderImpl; -/// This class together with helper class ValueGetterBuilder can be used with columns of arbitrary data type -/// Allows for on-the-fly conversion of any type of data into intermediate ternary representation -/// and eliminates the need to materialize data columns in intermediate representation +/// This class together with helper class TernaryValueBuilder can be used with columns of arbitrary data type +/// Converts column of any data type into an intermediate UInt8Column of ternary representation for the +/// vectorized ternary logic evaluation. template class AssociativeGenericApplierImpl { @@ -254,20 +285,19 @@ class AssociativeGenericApplierImpl public: /// Remembers the last N columns from `in`. explicit AssociativeGenericApplierImpl(const ColumnRawPtrs & in) - : val_getter{ValueGetterBuilder::build(in[in.size() - N])}, next{in} {} + : vec(in[in.size() - N]->size()), next{in} + { + TernaryValueBuilder::build(in[in.size() - N], vec.data()); + } /// Returns a combination of values in the i-th row of all columns stored in the constructor. inline ResultValueType apply(const size_t i) const { - const auto a = val_getter(i); - if constexpr (Op::isSaturable()) - return Op::isSaturatedValueTernary(a) ? a : Op::apply(a, next.apply(i)); - else - return Op::apply(a, next.apply(i)); + return Op::ternaryApply(vec[i], next.apply(i)); } private: - const TernaryValueGetter val_getter; + UInt8Container vec; const AssociativeGenericApplierImpl next; }; @@ -280,12 +310,15 @@ class AssociativeGenericApplierImpl public: /// Remembers the last N columns from `in`. explicit AssociativeGenericApplierImpl(const ColumnRawPtrs & in) - : val_getter{ValueGetterBuilder::build(in[in.size() - 1])} {} + : vec(UInt8Container(in[in.size() - 1]->size())) + { + TernaryValueBuilder::build(in[in.size() - 1], vec.data()); + } - inline ResultValueType apply(const size_t i) const { return val_getter(i); } + inline ResultValueType apply(const size_t i) const { return vec[i]; } private: - const TernaryValueGetter val_getter; + UInt8Container vec; }; @@ -318,7 +351,12 @@ struct OperationApplier for (size_t i = 0; i < size; ++i) { if constexpr (CarryResult) - result_data[i] = Op::apply(result_data[i], operation_applier_impl.apply(i)); + { + if constexpr (std::is_same_v, AssociativeApplierImpl>) + result_data[i] = Op::apply(result_data[i], operation_applier_impl.apply(i)); + else + result_data[i] = Op::ternaryApply(result_data[i], operation_applier_impl.apply(i)); + } else result_data[i] = operation_applier_impl.apply(i); } diff --git a/src/Functions/FunctionsLogical.h b/src/Functions/FunctionsLogical.h index 22471a151d2..30d8983b8cc 100644 --- a/src/Functions/FunctionsLogical.h +++ b/src/Functions/FunctionsLogical.h @@ -44,21 +44,29 @@ namespace Ternary { using ResultType = UInt8; - /** These carefully picked values magically work so bitwise "and", "or" on them - * corresponds to the expected results in three-valued logic. + /** These values are carefully picked so that they could be efficiently evaluated with bitwise operations, which + * are feasible for auto-vectorization by the compiler. The expression for the ternary value evaluation writes: * - * False and True are represented by all-0 and all-1 bits, so all bitwise operations on them work as expected. - * Null is represented as single 1 bit. So, it is something in between False and True. - * And "or" works like maximum and "and" works like minimum: - * "or" keeps True as is and lifts False with Null to Null. - * "and" keeps False as is and downs True with Null to Null. + * ternary_value = ((value << 1) | is_null) & (1 << !is_null) + * + * The truth table of the above formula lists: + * +---------------+--------------+-------------+ + * | is_null\value | 0 | 1 | + * +---------------+--------------+-------------+ + * | 0 | 0b00 (False) | 0b10 (True) | + * | 1 | 0b01 (Null) | 0b01 (Null) | + * +---------------+--------------+-------------+ + * + * As the numerical values of False, Null and True are assigned in ascending order, the "and" and "or" of + * ternary logic could be implemented with minimum and maximum respectively, which are also vectorizable. + * https://en.wikipedia.org/wiki/Three-valued_logic * * This logic does not apply for "not" and "xor" - they work with default implementation for NULLs: * anything with NULL returns NULL, otherwise use conventional two-valued logic. */ - static constexpr UInt8 False = 0; /// All zero bits. - static constexpr UInt8 True = -1; /// All one bits. - static constexpr UInt8 Null = 1; /// Single one bit. + static constexpr UInt8 False = 0; /// 0b00 + static constexpr UInt8 Null = 1; /// 0b01 + static constexpr UInt8 True = 2; /// 0b10 template inline ResultType makeValue(T value) @@ -90,6 +98,8 @@ struct AndImpl static inline constexpr ResultType apply(UInt8 a, UInt8 b) { return a & b; } + static inline constexpr ResultType ternaryApply(UInt8 a, UInt8 b) { return std::min(a, b); } + /// Will use three-valued logic for NULLs (see above) or default implementation (any operation with NULL returns NULL). static inline constexpr bool specialImplementationForNulls() { return true; } }; @@ -102,6 +112,7 @@ struct OrImpl static inline constexpr bool isSaturatedValue(bool a) { return a; } static inline constexpr bool isSaturatedValueTernary(UInt8 a) { return a == Ternary::True; } static inline constexpr ResultType apply(UInt8 a, UInt8 b) { return a | b; } + static inline constexpr ResultType ternaryApply(UInt8 a, UInt8 b) { return std::max(a, b); } static inline constexpr bool specialImplementationForNulls() { return true; } }; @@ -113,6 +124,7 @@ struct XorImpl static inline constexpr bool isSaturatedValue(bool) { return false; } static inline constexpr bool isSaturatedValueTernary(UInt8) { return false; } static inline constexpr ResultType apply(UInt8 a, UInt8 b) { return a != b; } + static inline constexpr ResultType ternaryApply(UInt8 a, UInt8 b) { return a != b; } static inline constexpr bool specialImplementationForNulls() { return false; } #if USE_EMBEDDED_COMPILER diff --git a/src/Functions/tests/gtest_ternary_logic.cpp b/src/Functions/tests/gtest_ternary_logic.cpp new file mode 100644 index 00000000000..5ecafabb361 --- /dev/null +++ b/src/Functions/tests/gtest_ternary_logic.cpp @@ -0,0 +1,354 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// I know that inclusion of .cpp is not good at all +#include // NOLINT + +using namespace DB; +using TernaryValues = std::vector; + +struct LinearCongruentialGenerator +{ + /// Constants from `man lrand48_r`. + static constexpr UInt64 a = 0x5DEECE66D; + static constexpr UInt64 c = 0xB; + + /// And this is from `head -c8 /dev/urandom | xxd -p` + UInt64 current = 0x09826f4a081cee35ULL; + + UInt32 next() + { + current = current * a + c; + return static_cast(current >> 16); + } +}; + +void generateRandomTernaryValue(LinearCongruentialGenerator & gen, Ternary::ResultType * output, size_t size, double false_ratio, double null_ratio) +{ + /// The LinearCongruentialGenerator generates nonnegative integers uniformly distributed over the interval [0, 2^32). + /// See https://linux.die.net/man/3/nrand48 + + double false_percentile = false_ratio; + double null_percentile = false_ratio + null_ratio; + + false_percentile = false_percentile > 1 ? 1 : false_percentile; + null_percentile = null_percentile > 1 ? 1 : null_percentile; + + UInt32 false_threshold = static_cast(static_cast(std::numeric_limits::max()) * false_percentile); + UInt32 null_threshold = static_cast(static_cast(std::numeric_limits::max()) * null_percentile); + + for (Ternary::ResultType * end = output + size; output != end; ++output) + { + UInt32 val = gen.next(); + *output = val < false_threshold ? Ternary::False : (val < null_threshold ? Ternary::Null : Ternary::True); + } +} + +template +ColumnPtr createColumnNullable(const Ternary::ResultType * ternary_values, size_t size) +{ + auto nested_column = ColumnVector::create(size); + auto null_map = ColumnUInt8::create(size); + auto & nested_column_data = nested_column->getData(); + auto & null_map_data = null_map->getData(); + + for (size_t i = 0; i < size; ++i) + { + if (ternary_values[i] == Ternary::Null) + { + null_map_data[i] = 1; + nested_column_data[i] = 0; + } + else if (ternary_values[i] == Ternary::True) + { + null_map_data[i] = 0; + nested_column_data[i] = 100; + } + else + { + null_map_data[i] = 0; + nested_column_data[i] = 0; + } + } + + return ColumnNullable::create(std::move(nested_column), std::move(null_map)); +} + +template +ColumnPtr createColumnVector(const Ternary::ResultType * ternary_values, size_t size) +{ + auto column = ColumnVector::create(size); + auto & column_data = column->getData(); + + for (size_t i = 0; i < size; ++i) + { + if (ternary_values[i] == Ternary::True) + { + column_data[i] = 100; + } + else + { + column_data[i] = 0; + } + } + + return column; +} + +template +ColumnPtr createRandomColumn(LinearCongruentialGenerator & gen, TernaryValues & ternary_values) +{ + size_t size = ternary_values.size(); + Ternary::ResultType * ternary_data = ternary_values.data(); + + if constexpr (std::is_same_v) + { + generateRandomTernaryValue(gen, ternary_data, size, 0.3, 0.7); + return createColumnNullable(ternary_data, size); + } + else if constexpr (std::is_same_v>) + { + generateRandomTernaryValue(gen, ternary_data, size, 0.5, 0); + return createColumnVector(ternary_data, size); + } + else + { + auto nested_col = ColumnNothing::create(size); + auto null_map = ColumnUInt8::create(size); + + memset(ternary_data, Ternary::Null, size); + + return ColumnNullable::create(std::move(nested_col), std::move(null_map)); + } +} + +/* The truth table of ternary And and Or operations: + * +-------+-------+---------+--------+ + * | a | b | a And b | a Or b | + * +-------+-------+---------+--------+ + * | False | False | False | False | + * | False | Null | False | Null | + * | False | True | False | True | + * | Null | False | False | Null | + * | Null | Null | Null | Null | + * | Null | True | Null | True | + * | True | False | False | True | + * | True | Null | Null | True | + * | True | True | True | True | + * +-------+-------+---------+--------+ + * + * https://en.wikibooks.org/wiki/Structured_Query_Language/NULLs_and_the_Three_Valued_Logic + */ +template +bool testTernaryLogicTruthTable() +{ + constexpr size_t size = 9; + + Ternary::ResultType col_a_ternary[] = {Ternary::False, Ternary::False, Ternary::False, Ternary::Null, Ternary::Null, Ternary::Null, Ternary::True, Ternary::True, Ternary::True}; + Ternary::ResultType col_b_ternary[] = {Ternary::False, Ternary::Null, Ternary::True, Ternary::False, Ternary::Null, Ternary::True,Ternary::False, Ternary::Null, Ternary::True}; + Ternary::ResultType and_expected_ternary[] = {Ternary::False, Ternary::False, Ternary::False, Ternary::False, Ternary::Null, Ternary::Null,Ternary::False, Ternary::Null, Ternary::True}; + Ternary::ResultType or_expected_ternary[] = {Ternary::False, Ternary::Null, Ternary::True, Ternary::Null, Ternary::Null, Ternary::True,Ternary::True, Ternary::True, Ternary::True}; + Ternary::ResultType * expected_ternary; + + + if constexpr (std::is_same_v) + { + expected_ternary = and_expected_ternary; + } + else + { + expected_ternary = or_expected_ternary; + } + + auto col_a = createColumnNullable(col_a_ternary, size); + auto col_b = createColumnNullable(col_b_ternary, size); + ColumnRawPtrs arguments = {col_a.get(), col_b.get()}; + + auto col_res = ColumnUInt8::create(size); + auto & col_res_data = col_res->getData(); + + OperationApplier::apply(arguments, col_res->getData(), false); + + for (size_t i = 0; i < size; ++i) + { + if (col_res_data[i] != expected_ternary[i]) return false; + } + + return true; +} + +template +bool testTernaryLogicOfTwoColumns(size_t size) +{ + LinearCongruentialGenerator gen; + + TernaryValues left_column_ternary(size); + TernaryValues right_column_ternary(size); + TernaryValues expected_ternary(size); + + ColumnPtr left = createRandomColumn(gen, left_column_ternary); + ColumnPtr right = createRandomColumn(gen, right_column_ternary); + + for (size_t i = 0; i < size; ++i) + { + /// Given that False is less than Null and Null is less than True, the And operation can be implemented + /// with std::min, and the Or operation can be implemented with std::max. + if constexpr (std::is_same_v) + { + expected_ternary[i] = std::min(left_column_ternary[i], right_column_ternary[i]); + } + else + { + expected_ternary[i] = std::max(left_column_ternary[i], right_column_ternary[i]); + } + } + + ColumnRawPtrs arguments = {left.get(), right.get()}; + + auto col_res = ColumnUInt8::create(size); + auto & col_res_data = col_res->getData(); + + OperationApplier::apply(arguments, col_res->getData(), false); + + for (size_t i = 0; i < size; ++i) + { + if (col_res_data[i] != expected_ternary[i]) return false; + } + + return true; +} + +TEST(TernaryLogicTruthTable, NestedUInt8) +{ + bool test_1 = testTernaryLogicTruthTable(); + bool test_2 = testTernaryLogicTruthTable(); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} + +TEST(TernaryLogicTruthTable, NestedUInt16) +{ + bool test_1 = testTernaryLogicTruthTable(); + bool test_2 = testTernaryLogicTruthTable(); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} + +TEST(TernaryLogicTruthTable, NestedUInt32) +{ + bool test_1 = testTernaryLogicTruthTable(); + bool test_2 = testTernaryLogicTruthTable(); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} + +TEST(TernaryLogicTruthTable, NestedUInt64) +{ + bool test_1 = testTernaryLogicTruthTable(); + bool test_2 = testTernaryLogicTruthTable(); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} + +TEST(TernaryLogicTruthTable, NestedInt8) +{ + bool test_1 = testTernaryLogicTruthTable(); + bool test_2 = testTernaryLogicTruthTable(); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} + +TEST(TernaryLogicTruthTable, NestedInt16) +{ + bool test_1 = testTernaryLogicTruthTable(); + bool test_2 = testTernaryLogicTruthTable(); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} + +TEST(TernaryLogicTruthTable, NestedInt32) +{ + bool test_1 = testTernaryLogicTruthTable(); + bool test_2 = testTernaryLogicTruthTable(); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} + +TEST(TernaryLogicTruthTable, NestedInt64) +{ + bool test_1 = testTernaryLogicTruthTable(); + bool test_2 = testTernaryLogicTruthTable(); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} + +TEST(TernaryLogicTruthTable, NestedFloat32) +{ + bool test_1 = testTernaryLogicTruthTable(); + bool test_2 = testTernaryLogicTruthTable(); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} + +TEST(TernaryLogicTruthTable, NestedFloat64) +{ + bool test_1 = testTernaryLogicTruthTable(); + bool test_2 = testTernaryLogicTruthTable(); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} + +TEST(TernaryLogicTwoColumns, TwoNullable) +{ + bool test_1 = testTernaryLogicOfTwoColumns(100 /*size*/); + bool test_2 = testTernaryLogicOfTwoColumns(100 /*size*/); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} + +TEST(TernaryLogicTwoColumns, TwoVector) +{ + bool test_1 = testTernaryLogicOfTwoColumns(100 /*size*/); + bool test_2 = testTernaryLogicOfTwoColumns(100 /*size*/); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} + +TEST(TernaryLogicTwoColumns, TwoNothing) +{ + bool test_1 = testTernaryLogicOfTwoColumns(100 /*size*/); + bool test_2 = testTernaryLogicOfTwoColumns(100 /*size*/); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} + +TEST(TernaryLogicTwoColumns, NullableVector) +{ + bool test_1 = testTernaryLogicOfTwoColumns(100 /*size*/); + bool test_2 = testTernaryLogicOfTwoColumns(100 /*size*/); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} + +TEST(TernaryLogicTwoColumns, NullableNothing) +{ + bool test_1 = testTernaryLogicOfTwoColumns(100 /*size*/); + bool test_2 = testTernaryLogicOfTwoColumns(100 /*size*/); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} + +TEST(TernaryLogicTwoColumns, VectorNothing) +{ + bool test_1 = testTernaryLogicOfTwoColumns(100 /*size*/); + bool test_2 = testTernaryLogicOfTwoColumns(100 /*size*/); + ASSERT_EQ(test_1, true); + ASSERT_EQ(test_2, true); +} diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 127912a0b2a..8de1b85c8b9 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -35,6 +35,8 @@ namespace ErrorCodes extern const int CANNOT_PARSE_DATE; extern const int INCORRECT_DATA; extern const int ATTEMPT_TO_READ_AFTER_EOF; + extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; } template @@ -642,9 +644,10 @@ void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & const char delimiter = settings.delimiter; const char maybe_quote = *buf.position(); + const String & custom_delimiter = settings.custom_delimiter; /// Emptiness and not even in quotation marks. - if (maybe_quote == delimiter) + if (custom_delimiter.empty() && maybe_quote == delimiter) return; if ((settings.allow_single_quotes && maybe_quote == '\'') || (settings.allow_double_quotes && maybe_quote == '"')) @@ -682,6 +685,42 @@ void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & } else { + /// If custom_delimiter is specified, we should read until first occurrences of + /// custom_delimiter in buffer. + if (!custom_delimiter.empty()) + { + PeekableReadBuffer * peekable_buf = dynamic_cast(&buf); + if (!peekable_buf) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Reading CSV string with custom delimiter is allowed only when using PeekableReadBuffer"); + + while (true) + { + if (peekable_buf->eof()) + throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected EOF while reading CSV string, expected custom delimiter \"{}\"", custom_delimiter); + + char * next_pos = reinterpret_cast(memchr(peekable_buf->position(), custom_delimiter[0], peekable_buf->available())); + if (!next_pos) + next_pos = peekable_buf->buffer().end(); + + appendToStringOrVector(s, *peekable_buf, next_pos); + peekable_buf->position() = next_pos; + + if (!buf.hasPendingData()) + continue; + + { + PeekableReadBufferCheckpoint checkpoint{*peekable_buf, true}; + if (checkString(custom_delimiter, *peekable_buf)) + return; + } + + s.push_back(*peekable_buf->position()); + ++peekable_buf->position(); + } + + return; + } + /// Unquoted case. Look for delimiter or \r or \n. while (!buf.eof()) { @@ -776,6 +815,72 @@ void readCSVField(String & s, ReadBuffer & buf, const FormatSettings::CSV & sett s.push_back(quote); } +void readCSVWithTwoPossibleDelimitersImpl(String & s, PeekableReadBuffer & buf, const String & first_delimiter, const String & second_delimiter) +{ + /// Check that delimiters are not empty. + if (first_delimiter.empty() || second_delimiter.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read CSV field with two possible delimiters, one of delimiters '{}' and '{}' is empty", first_delimiter, second_delimiter); + + /// Read all data until first_delimiter or second_delimiter + while (true) + { + if (buf.eof()) + throw Exception(ErrorCodes::INCORRECT_DATA, R"(Unexpected EOF while reading CSV string, expected on of delimiters "{}" or "{}")", first_delimiter, second_delimiter); + + char * next_pos = buf.position(); + while (next_pos != buf.buffer().end() && *next_pos != first_delimiter[0] && *next_pos != second_delimiter[0]) + ++next_pos; + + appendToStringOrVector(s, buf, next_pos); + buf.position() = next_pos; + if (!buf.hasPendingData()) + continue; + + if (*buf.position() == first_delimiter[0]) + { + PeekableReadBufferCheckpoint checkpoint(buf, true); + if (checkString(first_delimiter, buf)) + return; + } + + if (*buf.position() == second_delimiter[0]) + { + PeekableReadBufferCheckpoint checkpoint(buf, true); + if (checkString(second_delimiter, buf)) + return; + } + + s.push_back(*buf.position()); + ++buf.position(); + } +} + +String readCSVStringWithTwoPossibleDelimiters(PeekableReadBuffer & buf, const FormatSettings::CSV & settings, const String & first_delimiter, const String & second_delimiter) +{ + String res; + + /// If value is quoted, use regular CSV reading since we need to read only data inside quotes. + if (!buf.eof() && ((settings.allow_single_quotes && *buf.position() == '\'') || (settings.allow_double_quotes && *buf.position() == '"'))) + readCSVStringInto(res, buf, settings); + else + readCSVWithTwoPossibleDelimitersImpl(res, buf, first_delimiter, second_delimiter); + + return res; +} + +String readCSVFieldWithTwoPossibleDelimiters(PeekableReadBuffer & buf, const FormatSettings::CSV & settings, const String & first_delimiter, const String & second_delimiter) +{ + String res; + + /// If value is quoted, use regular CSV reading since we need to read only data inside quotes. + if (!buf.eof() && ((settings.allow_single_quotes && *buf.position() == '\'') || (settings.allow_double_quotes && *buf.position() == '"'))) + readCSVField(res, buf, settings); + else + readCSVWithTwoPossibleDelimitersImpl(res, buf, first_delimiter, second_delimiter); + + return res; +} + template void readCSVStringInto>(PaddedPODArray & s, ReadBuffer & buf, const FormatSettings::CSV & settings); template void readCSVStringInto(NullOutput & s, ReadBuffer & buf, const FormatSettings::CSV & settings); diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index f7d5ca9b00c..4225c01bbd4 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -558,9 +558,10 @@ void readStringUntilWhitespace(String & s, ReadBuffer & buf); * - string could be placed in quotes; quotes could be single: ' if FormatSettings::CSV::allow_single_quotes is true * or double: " if FormatSettings::CSV::allow_double_quotes is true; * - or string could be unquoted - this is determined by first character; - * - if string is unquoted, then it is read until next delimiter, - * either until end of line (CR or LF), - * or until end of stream; + * - if string is unquoted, then: + * - If settings.custom_delimiter is not specified, it is read until next settings.delimiter, either until end of line (CR or LF) or until end of stream; + * - If settings.custom_delimiter is specified it reads until first occurrences of settings.custom_delimiter in buffer. + * This works only if provided buffer is PeekableReadBuffer. * but spaces and tabs at begin and end of unquoted string are consumed but ignored (note that this behaviour differs from RFC). * - if string is in quotes, then it will be read until closing quote, * but sequences of two consecutive quotes are parsed as single quote inside string; @@ -570,6 +571,13 @@ void readCSVString(String & s, ReadBuffer & buf, const FormatSettings::CSV & set /// Differ from readCSVString in that it doesn't remove quotes around field if any. void readCSVField(String & s, ReadBuffer & buf, const FormatSettings::CSV & settings); +/// Read string in CSV format until the first occurrence of first_delimiter or second_delimiter. +/// Similar to readCSVString if string is in quotes, we read only data in quotes. +String readCSVStringWithTwoPossibleDelimiters(PeekableReadBuffer & buf, const FormatSettings::CSV & settings, const String & first_delimiter, const String & second_delimiter); + +/// Same as above but includes quotes in the result if any. +String readCSVFieldWithTwoPossibleDelimiters(PeekableReadBuffer & buf, const FormatSettings::CSV & settings, const String & first_delimiter, const String & second_delimiter); + /// Read and append result to array of characters. template void readStringInto(Vector & s, ReadBuffer & buf); diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 72fa1b3c324..db95b161a4f 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -1107,8 +1107,10 @@ void FileCache::reduceSizeToDownloaded( file_segment->getInfoForLogUnlocked(segment_lock)); } + CreateFileSegmentSettings create_settings{ .is_persistent = file_segment->is_persistent }; + cell->file_segment = std::make_shared( - offset, downloaded_size, key, this, FileSegment::State::DOWNLOADED, CreateFileSegmentSettings{}); + offset, downloaded_size, key, this, FileSegment::State::DOWNLOADED, create_settings); assert(file_segment->reserved_size == downloaded_size); } diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 418bcee05d9..177c6aecf7c 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -56,6 +56,7 @@ FileSegment::FileSegment( { reserved_size = downloaded_size = size_; is_downloaded = true; + chassert(std::filesystem::file_size(getPathInLocalCache()) == size_); break; } case (State::SKIP_CACHE): @@ -331,6 +332,8 @@ void FileSegment::write(const char * from, size_t size, size_t offset) cache_writer->next(); downloaded_size += size; + + chassert(std::filesystem::file_size(getPathInLocalCache()) == downloaded_size); } catch (Exception & e) { @@ -345,9 +348,7 @@ void FileSegment::write(const char * from, size_t size, size_t offset) throw; } -#ifndef NDEBUG chassert(getFirstNonDownloadedOffset() == offset + size); -#endif } FileSegment::State FileSegment::wait() @@ -545,6 +546,13 @@ void FileSegment::completeBasedOnCurrentState(std::lock_guard & cach resetDownloaderUnlocked(segment_lock); } + if (cache_writer && (is_downloader || is_last_holder)) + { + cache_writer->finalize(); + cache_writer.reset(); + remote_file_reader.reset(); + } + switch (download_state) { case State::SKIP_CACHE: @@ -557,8 +565,9 @@ void FileSegment::completeBasedOnCurrentState(std::lock_guard & cach case State::DOWNLOADED: { chassert(getDownloadedSizeUnlocked(segment_lock) == range().size()); - assert(is_downloaded); - assert(!cache_writer); + chassert(getDownloadedSizeUnlocked(segment_lock) == std::filesystem::file_size(getPathInLocalCache())); + chassert(is_downloaded); + chassert(!cache_writer); break; } case State::DOWNLOADING: diff --git a/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp b/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp index cda91cd4ba1..866d38f3aa5 100644 --- a/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp b/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp index c7397d3d64c..ccbbac71279 100644 --- a/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp b/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp index cb237287dc3..2205d6cff88 100644 --- a/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Interpreters/TraceCollector.cpp b/src/Interpreters/TraceCollector.cpp index 050dea02717..367249f1289 100644 --- a/src/Interpreters/TraceCollector.cpp +++ b/src/Interpreters/TraceCollector.cpp @@ -97,6 +97,9 @@ void TraceCollector::run() Int64 size; readPODBinary(size, in); + UInt64 ptr; + readPODBinary(ptr, in); + ProfileEvents::Event event; readPODBinary(event, in); @@ -112,7 +115,7 @@ void TraceCollector::run() UInt64 time = static_cast(ts.tv_sec * 1000000000LL + ts.tv_nsec); UInt64 time_in_microseconds = static_cast((ts.tv_sec * 1000000LL) + (ts.tv_nsec / 1000)); - TraceLogElement element{time_t(time / 1000000000), time_in_microseconds, time, trace_type, thread_id, query_id, trace, size, event, increment}; + TraceLogElement element{time_t(time / 1000000000), time_in_microseconds, time, trace_type, thread_id, query_id, trace, size, ptr, event, increment}; trace_log->add(element); } } diff --git a/src/Interpreters/TraceLog.cpp b/src/Interpreters/TraceLog.cpp index 0408ebe504b..cd5f965a679 100644 --- a/src/Interpreters/TraceLog.cpp +++ b/src/Interpreters/TraceLog.cpp @@ -38,6 +38,7 @@ NamesAndTypesList TraceLogElement::getNamesAndTypes() {"query_id", std::make_shared()}, {"trace", std::make_shared(std::make_shared())}, {"size", std::make_shared()}, + {"ptr", std::make_shared()}, {"event", std::make_shared(std::make_shared())}, {"increment", std::make_shared()}, }; @@ -57,6 +58,7 @@ void TraceLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insertData(query_id.data(), query_id.size()); columns[i++]->insert(trace); columns[i++]->insert(size); + columns[i++]->insert(ptr); String event_name; if (event != ProfileEvents::end()) diff --git a/src/Interpreters/TraceLog.h b/src/Interpreters/TraceLog.h index c481f033a72..71aec0b50c4 100644 --- a/src/Interpreters/TraceLog.h +++ b/src/Interpreters/TraceLog.h @@ -27,8 +27,10 @@ struct TraceLogElement UInt64 thread_id{}; String query_id{}; Array trace{}; - /// Allocation size in bytes for TraceType::Memory. + /// Allocation size in bytes for TraceType::Memory and TraceType::MemorySample. Int64 size{}; + /// Allocation ptr for TraceType::MemorySample. + UInt64 ptr{}; /// ProfileEvent for TraceType::ProfileEvent. ProfileEvents::Event event{ProfileEvents::end()}; /// Increment of profile event for TraceType::ProfileEvent. diff --git a/src/Interpreters/tests/gtest_lru_file_cache.cpp b/src/Interpreters/tests/gtest_lru_file_cache.cpp index 22150b9f656..5bad3e9bba2 100644 --- a/src/Interpreters/tests/gtest_lru_file_cache.cpp +++ b/src/Interpreters/tests/gtest_lru_file_cache.cpp @@ -80,7 +80,7 @@ void complete(const DB::FileSegmentsHolder & holder) { ASSERT_TRUE(file_segment->getOrSetDownloader() == DB::FileSegment::getCallerId()); prepareAndDownload(file_segment); - file_segment->completeWithState(DB::FileSegment::State::DOWNLOADED); + file_segment->completeWithoutState(); } } @@ -127,7 +127,7 @@ TEST(FileCache, get) assertRange(2, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADING); download(segments[0]); - segments[0]->completeWithState(DB::FileSegment::State::DOWNLOADED); + segments[0]->completeWithoutState(); assertRange(3, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADED); } @@ -148,7 +148,7 @@ TEST(FileCache, get) ASSERT_TRUE(segments[1]->getOrSetDownloader() == DB::FileSegment::getCallerId()); prepareAndDownload(segments[1]); - segments[1]->completeWithState(DB::FileSegment::State::DOWNLOADED); + segments[1]->completeWithoutState(); assertRange(6, segments[1], DB::FileSegment::Range(10, 14), DB::FileSegment::State::DOWNLOADED); } @@ -205,7 +205,7 @@ TEST(FileCache, get) ASSERT_TRUE(segments[2]->getOrSetDownloader() == DB::FileSegment::getCallerId()); prepareAndDownload(segments[2]); - segments[2]->completeWithState(DB::FileSegment::State::DOWNLOADED); + segments[2]->completeWithoutState(); assertRange(14, segments[3], DB::FileSegment::Range(17, 20), DB::FileSegment::State::DOWNLOADED); @@ -246,7 +246,7 @@ TEST(FileCache, get) ASSERT_TRUE(segments[3]->getOrSetDownloader() == DB::FileSegment::getCallerId()); prepareAndDownload(segments[3]); - segments[3]->completeWithState(DB::FileSegment::State::DOWNLOADED); + segments[3]->completeWithoutState(); ASSERT_TRUE(segments[3]->state() == DB::FileSegment::State::DOWNLOADED); } @@ -269,8 +269,8 @@ TEST(FileCache, get) ASSERT_TRUE(segments[2]->getOrSetDownloader() == DB::FileSegment::getCallerId()); prepareAndDownload(segments[0]); prepareAndDownload(segments[2]); - segments[0]->completeWithState(DB::FileSegment::State::DOWNLOADED); - segments[2]->completeWithState(DB::FileSegment::State::DOWNLOADED); + segments[0]->completeWithoutState(); + segments[2]->completeWithoutState(); } /// Current cache: [____][_] [][___][__] @@ -292,8 +292,8 @@ TEST(FileCache, get) ASSERT_TRUE(s1[0]->getOrSetDownloader() == DB::FileSegment::getCallerId()); prepareAndDownload(s5[0]); prepareAndDownload(s1[0]); - s5[0]->completeWithState(DB::FileSegment::State::DOWNLOADED); - s1[0]->completeWithState(DB::FileSegment::State::DOWNLOADED); + s5[0]->completeWithoutState(); + s1[0]->completeWithoutState(); /// Current cache: [___] [_][___][_] [__] /// ^ ^ ^ ^ ^ ^ ^ ^ @@ -395,7 +395,7 @@ TEST(FileCache, get) } prepareAndDownload(segments[2]); - segments[2]->completeWithState(DB::FileSegment::State::DOWNLOADED); + segments[2]->completeWithoutState(); ASSERT_TRUE(segments[2]->state() == DB::FileSegment::State::DOWNLOADED); other_1.join(); @@ -460,7 +460,7 @@ TEST(FileCache, get) ASSERT_TRUE(segments_2[1]->getOrSetDownloader() == DB::FileSegment::getCallerId()); prepareAndDownload(segments_2[1]); - segments_2[1]->completeWithState(DB::FileSegment::State::DOWNLOADED); + segments_2[1]->completeWithoutState(); }); { diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp index 16df132b9d8..7583bf72457 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp @@ -12,16 +12,6 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -static FormatSettings updateFormatSettings(const FormatSettings & settings) -{ - if (settings.custom.escaping_rule != FormatSettings::EscapingRule::CSV || settings.custom.field_delimiter.empty()) - return settings; - - auto updated = settings; - updated.csv.delimiter = settings.custom.field_delimiter.front(); - return updated; -} - CustomSeparatedRowInputFormat::CustomSeparatedRowInputFormat( const Block & header_, ReadBuffer & in_buf_, @@ -31,7 +21,7 @@ CustomSeparatedRowInputFormat::CustomSeparatedRowInputFormat( bool ignore_spaces_, const FormatSettings & format_settings_) : CustomSeparatedRowInputFormat( - header_, std::make_unique(in_buf_), params_, with_names_, with_types_, ignore_spaces_, updateFormatSettings(format_settings_)) + header_, std::make_unique(in_buf_), params_, with_names_, with_types_, ignore_spaces_, format_settings_) { } @@ -171,15 +161,31 @@ bool CustomSeparatedFormatReader::checkEndOfRow() } template -String CustomSeparatedFormatReader::readFieldIntoString(bool is_first) +String CustomSeparatedFormatReader::readFieldIntoString(bool is_first, bool is_last, bool is_unknown) { if (!is_first) skipFieldDelimiter(); skipSpaces(); + updateFormatSettings(is_last); if constexpr (is_header) + { + /// If the number of columns is unknown and we use CSV escaping rule, + /// we don't know what delimiter to expect after the value, + /// so we should read until we meet field_delimiter or row_after_delimiter. + if (is_unknown && format_settings.custom.escaping_rule == FormatSettings::EscapingRule::CSV) + return readCSVStringWithTwoPossibleDelimiters( + *buf, format_settings.csv, format_settings.custom.field_delimiter, format_settings.custom.row_after_delimiter); + return readStringByEscapingRule(*buf, format_settings.custom.escaping_rule, format_settings); + } else + { + if (is_unknown && format_settings.custom.escaping_rule == FormatSettings::EscapingRule::CSV) + return readCSVFieldWithTwoPossibleDelimiters( + *buf, format_settings.csv, format_settings.custom.field_delimiter, format_settings.custom.row_after_delimiter); + return readFieldByEscapingRule(*buf, format_settings.custom.escaping_rule, format_settings); + } } template @@ -192,14 +198,14 @@ std::vector CustomSeparatedFormatReader::readRowImpl() { do { - values.push_back(readFieldIntoString(values.empty())); + values.push_back(readFieldIntoString(values.empty(), false, true)); } while (!checkEndOfRow()); columns = values.size(); } else { for (size_t i = 0; i != columns; ++i) - values.push_back(readFieldIntoString(i == 0)); + values.push_back(readFieldIntoString(i == 0, i + 1 == columns, false)); } skipRowEndDelimiter(); @@ -223,9 +229,41 @@ void CustomSeparatedFormatReader::skipHeaderRow() skipRowEndDelimiter(); } -bool CustomSeparatedFormatReader::readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool, const String &) +void CustomSeparatedFormatReader::updateFormatSettings(bool is_last_column) +{ + if (format_settings.custom.escaping_rule != FormatSettings::EscapingRule::CSV) + return; + + /// Clean custom delimiter from previous delimiter. + format_settings.csv.custom_delimiter.clear(); + + /// If delimiter has length = 1, it will be more efficient to use csv.delimiter. + /// If we have some complex delimiter, normal CSV reading will now work properly if we will + /// use just the first character of delimiter (for example, if delimiter='||' and we have data 'abc|d||') + /// We have special implementation for such case that uses custom delimiter, it's not so efficient, + /// but works properly. + + if (is_last_column) + { + /// If field delimiter has length = 1, it will be more efficient to use csv.delimiter. + if (format_settings.custom.row_after_delimiter.size() == 1) + format_settings.csv.delimiter = format_settings.custom.row_after_delimiter.front(); + else + format_settings.csv.custom_delimiter = format_settings.custom.row_after_delimiter; + } + else + { + if (format_settings.custom.field_delimiter.size() == 1) + format_settings.csv.delimiter = format_settings.custom.field_delimiter.front(); + else + format_settings.csv.custom_delimiter = format_settings.custom.field_delimiter; + } +} + +bool CustomSeparatedFormatReader::readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool is_last_file_column, const String &) { skipSpaces(); + updateFormatSettings(is_last_file_column); return deserializeFieldByEscapingRule(type, serialization, column, *buf, format_settings.custom.escaping_rule, format_settings); } @@ -237,6 +275,8 @@ bool CustomSeparatedFormatReader::checkForSuffixImpl(bool check_eof) if (!check_eof) return false; + /// Allow optional \n before eof. + checkChar('\n', *buf); return buf->eof(); } @@ -246,6 +286,8 @@ bool CustomSeparatedFormatReader::checkForSuffixImpl(bool check_eof) if (!check_eof) return true; + /// Allow optional \n before eof. + checkChar('\n', *buf); if (buf->eof()) return true; } @@ -312,7 +354,7 @@ CustomSeparatedSchemaReader::CustomSeparatedSchemaReader( &reader, getDefaultDataTypeForEscapingRule(format_setting_.custom.escaping_rule)) , buf(in_) - , reader(buf, ignore_spaces_, updateFormatSettings(format_setting_)) + , reader(buf, ignore_spaces_, format_setting_) { } diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h index e7e96ab87b1..625278631a5 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h @@ -83,7 +83,9 @@ private: std::vector readRowImpl(); template - String readFieldIntoString(bool is_first); + String readFieldIntoString(bool is_first, bool is_last, bool is_unknown); + + void updateFormatSettings(bool is_last_column); PeekableReadBuffer * buf; bool ignore_spaces; diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp index 76fd0d2a907..1532b16525f 100644 --- a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp @@ -25,6 +25,27 @@ namespace ErrorCodes ErrorCodes::CANNOT_READ_ALL_DATA); } +static void updateFormatSettingsIfNeeded(FormatSettings::EscapingRule escaping_rule, FormatSettings & settings, const ParsedTemplateFormatString & row_format, char default_csv_delimiter, size_t file_column) +{ + if (escaping_rule != FormatSettings::EscapingRule::CSV) + return; + + /// Clean custom_delimiter from previous column. + settings.csv.custom_delimiter.clear(); + /// If field delimiter is empty, we read until default csv delimiter. + if (row_format.delimiters[file_column + 1].empty()) + settings.csv.delimiter = default_csv_delimiter; + /// If field delimiter has length = 1, it will be more efficient to use csv.delimiter. + else if (row_format.delimiters[file_column + 1].size() == 1) + settings.csv.delimiter = row_format.delimiters[file_column + 1].front(); + /// If we have some complex delimiter, normal CSV reading will now work properly if we will + /// use the first character of delimiter (for example, if delimiter='||' and we have data 'abc|d||') + /// We have special implementation for such case that uses custom delimiter, it's not so efficient, + /// but works properly. + else + settings.csv.custom_delimiter = row_format.delimiters[file_column + 1]; +} + TemplateRowInputFormat::TemplateRowInputFormat( const Block & header_, ReadBuffer & in_, @@ -129,10 +150,8 @@ bool TemplateRowInputFormat::deserializeField(const DataTypePtr & type, const SerializationPtr & serialization, IColumn & column, size_t file_column) { EscapingRule escaping_rule = row_format.escaping_rules[file_column]; - if (escaping_rule == EscapingRule::CSV) - /// Will read unquoted string until settings.csv.delimiter - settings.csv.delimiter = row_format.delimiters[file_column + 1].empty() ? default_csv_delimiter : - row_format.delimiters[file_column + 1].front(); + updateFormatSettingsIfNeeded(escaping_rule, settings, row_format, default_csv_delimiter, file_column); + try { return deserializeFieldByEscapingRule(type, serialization, column, *buf, escaping_rule, settings); @@ -466,6 +485,7 @@ TemplateSchemaReader::TemplateSchemaReader( , format(format_) , row_format(row_format_) , format_reader(buf, ignore_spaces_, format, row_format, row_between_delimiter, format_settings) + , default_csv_delimiter(format_settings_.csv.delimiter) { setColumnNames(row_format.column_names); } @@ -489,9 +509,7 @@ DataTypes TemplateSchemaReader::readRowAndGetDataTypes() for (size_t i = 0; i != row_format.columnsCount(); ++i) { format_reader.skipDelimiter(i); - if (row_format.escaping_rules[i] == FormatSettings::EscapingRule::CSV) - format_settings.csv.delimiter = row_format.delimiters[i + 1].empty() ? format_settings.csv.delimiter : row_format.delimiters[i + 1].front(); - + updateFormatSettingsIfNeeded(row_format.escaping_rules[i], format_settings, row_format, default_csv_delimiter, i); field = readFieldByEscapingRule(buf, row_format.escaping_rules[i], format_settings); data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, row_format.escaping_rules[i])); } diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.h b/src/Processors/Formats/Impl/TemplateRowInputFormat.h index 740683ad95d..cf12eb8d136 100644 --- a/src/Processors/Formats/Impl/TemplateRowInputFormat.h +++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.h @@ -128,6 +128,7 @@ private: const ParsedTemplateFormatString row_format; TemplateFormatReader format_reader; bool first_row = true; + const char default_csv_delimiter; }; bool parseDelimiterWithDiagnosticInfo(WriteBuffer & out, ReadBuffer & buf, const String & delimiter, const String & description, bool skip_spaces); diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h index e7dda957b04..b3066f0bdbb 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h @@ -111,7 +111,7 @@ public: protected: ReadBuffer * in; - const FormatSettings format_settings; + FormatSettings format_settings; }; /// Base class for schema inference for formats with -WithNames and -WithNamesAndTypes suffixes. diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index e6c6f02b098..963e874b2a3 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -46,6 +46,17 @@ class MarkCache; class UncompressedCache; class MergeTreeTransaction; + +enum class DataPartRemovalState +{ + NOT_ATTEMPTED, + VISIBLE_TO_TRANSACTIONS, + NON_UNIQUE_OWNERSHIP, + NOT_REACHED_REMOVAL_TIME, + HAS_SKIPPED_MUTATION_PARENT, + REMOVED, +}; + /// Description of the data part. class IMergeTreeDataPart : public std::enable_shared_from_this, public DataPartStorageHolder { @@ -446,6 +457,10 @@ public: void removeDeleteOnDestroyMarker(); void removeVersionMetadata(); + mutable std::atomic removal_state = DataPartRemovalState::NOT_ATTEMPTED; + + mutable std::atomic last_removal_attemp_time = 0; + protected: /// Total size of all columns, calculated once in calcuateColumnSizesOnDisk diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp index 76d69cc6b7d..a833da7064f 100644 --- a/src/Storages/MergeTree/MergeList.cpp +++ b/src/Storages/MergeTree/MergeList.cpp @@ -88,6 +88,10 @@ MergeListElement::MergeListElement( /// thread_group::memory_tracker, but MemoryTrackerThreadSwitcher will reset parent). memory_tracker.setProfilerStep(settings.memory_profiler_step); memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability); + /// Specify sample probability also for current thread to track more deallocations. + if (auto * thread_memory_tracker = DB::CurrentThread::getMemoryTracker()) + thread_memory_tracker->setSampleProbability(settings.memory_profiler_sample_probability); + memory_tracker.setSoftLimit(settings.memory_overcommit_ratio_denominator); if (settings.memory_tracker_fault_probability > 0.0) memory_tracker.setFaultProbability(settings.memory_tracker_fault_probability); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 3841295b875..e60781efa9c 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -84,6 +84,7 @@ #include #include +#include #include #include #include @@ -1762,9 +1763,12 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force) { const DataPartPtr & part = *it; + part->last_removal_attemp_time.store(time_now, std::memory_order_relaxed); + /// Do not remove outdated part if it may be visible for some transaction if (!part->version.canBeRemoved()) { + part->removal_state.store(DataPartRemovalState::VISIBLE_TO_TRANSACTIONS, std::memory_order_relaxed); skipped_parts.push_back(part->info); continue; } @@ -1772,20 +1776,27 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force) /// Grab only parts that are not used by anyone (SELECTs for example). if (!part.unique()) { + part->removal_state.store(DataPartRemovalState::NON_UNIQUE_OWNERSHIP, std::memory_order_relaxed); skipped_parts.push_back(part->info); continue; } auto part_remove_time = part->remove_time.load(std::memory_order_relaxed); - if ((part_remove_time < time_now && time_now - part_remove_time > getSettings()->old_parts_lifetime.totalSeconds() && !has_skipped_mutation_parent(part)) + bool reached_removal_time = part_remove_time < time_now && time_now - part_remove_time > getSettings()->old_parts_lifetime.totalSeconds(); + if ((reached_removal_time && !has_skipped_mutation_parent(part)) || force || isInMemoryPart(part) /// Remove in-memory parts immediately to not store excessive data in RAM || (part->version.creation_csn == Tx::RolledBackCSN && getSettings()->remove_rolled_back_parts_immediately)) { + part->removal_state.store(DataPartRemovalState::REMOVED, std::memory_order_relaxed); parts_to_delete.emplace_back(it); } else { + if (!reached_removal_time) + part->removal_state.store(DataPartRemovalState::NOT_REACHED_REMOVAL_TIME, std::memory_order_relaxed); + else + part->removal_state.store(DataPartRemovalState::HAS_SKIPPED_MUTATION_PARENT, std::memory_order_relaxed); skipped_parts.push_back(part->info); continue; } diff --git a/src/Storages/NamedCollectionConfiguration.cpp b/src/Storages/NamedCollections/NamedCollectionConfiguration.cpp similarity index 88% rename from src/Storages/NamedCollectionConfiguration.cpp rename to src/Storages/NamedCollections/NamedCollectionConfiguration.cpp index b0e7bdce32a..6875458958b 100644 --- a/src/Storages/NamedCollectionConfiguration.cpp +++ b/src/Storages/NamedCollections/NamedCollectionConfiguration.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -35,19 +35,30 @@ template T getConfigValueOrDefault( return *default_value; } - if constexpr (std::is_same_v) - return config.getString(path); - else if constexpr (std::is_same_v) - return config.getUInt64(path); - else if constexpr (std::is_same_v) - return config.getInt64(path); - else if constexpr (std::is_same_v) - return config.getDouble(path); - else + try + { + if constexpr (std::is_same_v) + return config.getString(path); + else if constexpr (std::is_same_v) + return config.getUInt64(path); + else if constexpr (std::is_same_v) + return config.getInt64(path); + else if constexpr (std::is_same_v) + return config.getDouble(path); + else + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, + "Unsupported type in getConfigValueOrDefault(). " + "Supported types are String, UInt64, Int64, Float64"); + } + catch (const Poco::SyntaxException &) + { throw Exception( - ErrorCodes::NOT_IMPLEMENTED, - "Unsupported type in getConfigValueOrDefault(). " - "Supported types are String, UInt64, Int64, Float64"); + ErrorCodes::BAD_ARGUMENTS, + "Cannot extract {} from {}", + toString(magic_enum::enum_name(Field::TypeToEnum>::value)), + path); + } } template void setConfigValue( diff --git a/src/Storages/NamedCollectionConfiguration.h b/src/Storages/NamedCollections/NamedCollectionConfiguration.h similarity index 100% rename from src/Storages/NamedCollectionConfiguration.h rename to src/Storages/NamedCollections/NamedCollectionConfiguration.h diff --git a/src/Storages/NamedCollectionUtils.cpp b/src/Storages/NamedCollections/NamedCollectionUtils.cpp similarity index 97% rename from src/Storages/NamedCollectionUtils.cpp rename to src/Storages/NamedCollections/NamedCollectionUtils.cpp index 75d5aace664..c4caa5c95f6 100644 --- a/src/Storages/NamedCollectionUtils.cpp +++ b/src/Storages/NamedCollections/NamedCollectionUtils.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -13,8 +13,8 @@ #include #include #include -#include -#include +#include +#include namespace fs = std::filesystem; @@ -78,7 +78,7 @@ public: /// (`enumerate_result` == ). const bool collection_is_empty = enumerate_result.size() == 1 && *enumerate_result.begin() == collection_prefix; - std::set keys; + std::set> keys; if (!collection_is_empty) { /// Skip collection prefix and add +1 to avoid '.' in the beginning. @@ -296,7 +296,7 @@ private: const auto config = NamedCollectionConfiguration::createConfiguration( collection_name, query.changes); - std::set keys; + std::set> keys; for (const auto & [name, _] : query.changes) keys.insert(name); diff --git a/src/Storages/NamedCollectionUtils.h b/src/Storages/NamedCollections/NamedCollectionUtils.h similarity index 100% rename from src/Storages/NamedCollectionUtils.h rename to src/Storages/NamedCollections/NamedCollectionUtils.h diff --git a/src/Storages/NamedCollections.cpp b/src/Storages/NamedCollections/NamedCollections.cpp similarity index 92% rename from src/Storages/NamedCollections.cpp rename to src/Storages/NamedCollections/NamedCollections.cpp index d90225547ac..03633bbd370 100644 --- a/src/Storages/NamedCollections.cpp +++ b/src/Storages/NamedCollections/NamedCollections.cpp @@ -3,8 +3,8 @@ #include #include #include -#include -#include +#include +#include #include #include @@ -234,6 +234,16 @@ public: return keys; } + Keys::const_iterator begin() const + { + return keys.begin(); + } + + Keys::const_iterator end() const + { + return keys.end(); + } + std::string dumpStructure() const { /// Convert a collection config like @@ -375,6 +385,22 @@ NamedCollection::Keys NamedCollection::getKeys() const return pimpl->getKeys(); } +template NamedCollection::const_iterator NamedCollection::begin() const +{ + std::unique_lock lock(mutex, std::defer_lock); + if constexpr (!Locked) + lock.lock(); + return pimpl->begin(); +} + +template NamedCollection::const_iterator NamedCollection::end() const +{ + std::unique_lock lock(mutex, std::defer_lock); + if constexpr (!Locked) + lock.lock(); + return pimpl->end(); +} + std::string NamedCollection::dumpStructure() const { std::lock_guard lock(mutex); @@ -417,4 +443,8 @@ template void NamedCollection::setOrUpdate(const NamedCollection template void NamedCollection::remove(const Key & key); template void NamedCollection::remove(const Key & key); +template NamedCollection::const_iterator NamedCollection::begin() const; +template NamedCollection::const_iterator NamedCollection::begin() const; +template NamedCollection::const_iterator NamedCollection::end() const; +template NamedCollection::const_iterator NamedCollection::end() const; } diff --git a/src/Storages/NamedCollections.h b/src/Storages/NamedCollections/NamedCollections.h similarity index 89% rename from src/Storages/NamedCollections.h rename to src/Storages/NamedCollections/NamedCollections.h index f7181c2b539..53b7a828a63 100644 --- a/src/Storages/NamedCollections.h +++ b/src/Storages/NamedCollections/NamedCollections.h @@ -1,7 +1,7 @@ #pragma once #include -#include -#include +#include +#include namespace Poco { namespace Util { class AbstractConfiguration; } } @@ -22,7 +22,7 @@ class NamedCollection { public: using Key = std::string; - using Keys = std::set; + using Keys = std::set>; using SourceId = NamedCollectionUtils::SourceId; static MutableNamedCollectionPtr create( @@ -49,6 +49,13 @@ public: Keys getKeys() const; + using iterator = typename Keys::iterator; + using const_iterator = typename Keys::const_iterator; + + template const_iterator begin() const; + + template const_iterator end() const; + std::string dumpStructure() const; bool isMutable() const { return is_mutable; } diff --git a/src/Storages/NamedCollections/NamedCollectionsHelpers.cpp b/src/Storages/NamedCollections/NamedCollectionsHelpers.cpp new file mode 100644 index 00000000000..cceabdfd7bf --- /dev/null +++ b/src/Storages/NamedCollections/NamedCollectionsHelpers.cpp @@ -0,0 +1,112 @@ +#include "NamedCollectionsHelpers.h" +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +namespace +{ + NamedCollectionPtr tryGetNamedCollectionFromASTs(ASTs asts) + { + if (asts.empty()) + return nullptr; + + const auto * identifier = asts[0]->as(); + if (!identifier) + return nullptr; + + const auto & collection_name = identifier->name(); + return NamedCollectionFactory::instance().tryGet(collection_name); + } + + std::optional> getKeyValueFromAST(ASTPtr ast) + { + const auto * function = ast->as(); + if (!function || function->name != "equals") + return std::nullopt; + + const auto * function_args_expr = assert_cast(function->arguments.get()); + const auto & function_args = function_args_expr->children; + + if (function_args.size() != 2) + return std::nullopt; + + auto literal_key = evaluateConstantExpressionOrIdentifierAsLiteral( + function_args[0], Context::getGlobalContextInstance()); + auto key = checkAndGetLiteralArgument(literal_key, "key"); + + auto literal_value = evaluateConstantExpressionOrIdentifierAsLiteral( + function_args[1], Context::getGlobalContextInstance()); + auto value = literal_value->as()->value; + + return std::pair{key, value}; + } +} + + +NamedCollectionPtr tryGetNamedCollectionWithOverrides(ASTs asts) +{ + if (asts.empty()) + return nullptr; + + auto collection = tryGetNamedCollectionFromASTs(asts); + if (!collection) + return nullptr; + + if (asts.size() == 1) + return collection; + + auto collection_copy = collection->duplicate(); + + for (const auto & ast : asts) + { + auto value_override = getKeyValueFromAST(ast); + if (!value_override) + continue; + + const auto & [key, value] = *value_override; + collection_copy->setOrUpdate(key, toString(value)); + } + + return collection_copy; +} + +void validateNamedCollection( + const NamedCollection & collection, + const std::unordered_set & required_keys, + const std::unordered_set & optional_keys) +{ + const auto & keys = collection.getKeys(); + for (const auto & key : keys) + { + if (!required_keys.contains(key) && !optional_keys.contains(key)) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Unexpected key `{}` in named collection. Required keys: {}, optional keys: {}", + key, fmt::join(required_keys, ", "), fmt::join(optional_keys, ", ")); + } + } + + for (const auto & key : required_keys) + { + if (!keys.contains(key)) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Key `{}` is required, but not specified. Required keys: {}, optional keys: {}", + key, fmt::join(required_keys, ", "), fmt::join(optional_keys, ", ")); + } + } +} + +} diff --git a/src/Storages/NamedCollections/NamedCollectionsHelpers.h b/src/Storages/NamedCollections/NamedCollectionsHelpers.h new file mode 100644 index 00000000000..39baafa9039 --- /dev/null +++ b/src/Storages/NamedCollections/NamedCollectionsHelpers.h @@ -0,0 +1,18 @@ +#pragma once +#include +#include +#include +#include + + +namespace DB +{ + +NamedCollectionPtr tryGetNamedCollectionWithOverrides(ASTs asts); + +void validateNamedCollection( + const NamedCollection & collection, + const std::unordered_set & required_keys, + const std::unordered_set & optional_keys); + +} diff --git a/src/Storages/NamedCollections_fwd.h b/src/Storages/NamedCollections/NamedCollections_fwd.h similarity index 100% rename from src/Storages/NamedCollections_fwd.h rename to src/Storages/NamedCollections/NamedCollections_fwd.h diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index afd7cf180a9..61437f07c65 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -28,6 +28,8 @@ #include #include #include +#include +#include #include #include @@ -64,8 +66,6 @@ namespace fs = std::filesystem; -static const String PARTITION_ID_WILDCARD = "{_partition_id}"; - namespace ProfileEvents { extern const Event S3DeleteObjects; @@ -75,6 +75,28 @@ namespace ProfileEvents namespace DB { +static const String PARTITION_ID_WILDCARD = "{_partition_id}"; + +static const std::unordered_set required_configuration_keys = { + "url", +}; +static std::unordered_set optional_configuration_keys = { + "format", + "compression", + "compression_method", + "structure", + "access_key_id", + "secret_access_key", + "filename", + "use_environment_credentials", + "max_single_read_retries", + "min_upload_part_size", + "upload_part_size_multiply_factor", + "upload_part_size_multiply_parts_count_threshold", + "max_single_part_upload_size", + "max_connections", +}; + namespace ErrorCodes { extern const int CANNOT_PARSE_TEXT; @@ -1175,48 +1197,60 @@ void StorageS3::updateS3Configuration(ContextPtr ctx, StorageS3::S3Configuration upd.auth_settings.use_insecure_imds_request.value_or(ctx->getConfigRef().getBool("s3.use_insecure_imds_request", false))); } - -void StorageS3::processNamedCollectionResult(StorageS3Configuration & configuration, const std::vector> & key_value_args) +void StorageS3::processNamedCollectionResult(StorageS3Configuration & configuration, const NamedCollection & collection) { - for (const auto & [arg_name, arg_value] : key_value_args) - { - if (arg_name == "access_key_id") - configuration.auth_settings.access_key_id = checkAndGetLiteralArgument(arg_value, "access_key_id"); - else if (arg_name == "secret_access_key") - configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument(arg_value, "secret_access_key"); - else if (arg_name == "filename") - configuration.url = std::filesystem::path(configuration.url) / checkAndGetLiteralArgument(arg_value, "filename"); - else if (arg_name == "use_environment_credentials") - configuration.auth_settings.use_environment_credentials = checkAndGetLiteralArgument(arg_value, "use_environment_credentials"); - else if (arg_name == "max_single_read_retries") - configuration.request_settings.max_single_read_retries = checkAndGetLiteralArgument(arg_value, "max_single_read_retries"); - else if (arg_name == "min_upload_part_size") - configuration.request_settings.min_upload_part_size = checkAndGetLiteralArgument(arg_value, "min_upload_part_size"); - else if (arg_name == "upload_part_size_multiply_factor") - configuration.request_settings.upload_part_size_multiply_factor = checkAndGetLiteralArgument(arg_value, "upload_part_size_multiply_factor"); - else if (arg_name == "upload_part_size_multiply_parts_count_threshold") - configuration.request_settings.upload_part_size_multiply_parts_count_threshold = checkAndGetLiteralArgument(arg_value, "upload_part_size_multiply_parts_count_threshold"); - else if (arg_name == "max_single_part_upload_size") - configuration.request_settings.max_single_part_upload_size = checkAndGetLiteralArgument(arg_value, "max_single_part_upload_size"); - else if (arg_name == "max_connections") - configuration.request_settings.max_connections = checkAndGetLiteralArgument(arg_value, "max_connections"); - else - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Unknown key-value argument `{}` for StorageS3, expected: url, [access_key_id, secret_access_key], name of used format and [compression_method].", - arg_name); - } -} + validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); + std::string filename; + for (const auto & key : collection) + { + if (key == "url") + configuration.url = collection.get(key); + else if (key == "access_key_id") + configuration.auth_settings.access_key_id = collection.get(key); + else if (key == "secret_access_key") + configuration.auth_settings.secret_access_key = collection.get(key); + else if (key == "filename") + filename = collection.get(key); + else if (key == "format") + configuration.format = collection.get(key); + else if (key == "compression" || key == "compression_method") + configuration.compression_method = collection.get(key); + else if (key == "structure") + configuration.structure = collection.get(key); + else if (key == "use_environment_credentials") + configuration.auth_settings.use_environment_credentials = collection.get(key); + else if (key == "max_single_read_retries") + configuration.request_settings.max_single_read_retries = collection.get(key); + else if (key == "min_upload_part_size") + configuration.request_settings.min_upload_part_size = collection.get(key); + else if (key == "upload_part_size_multiply_factor") + configuration.request_settings.upload_part_size_multiply_factor = collection.get(key); + else if (key == "upload_part_size_multiply_parts_count_threshold") + configuration.request_settings.upload_part_size_multiply_parts_count_threshold = collection.get(key); + else if (key == "max_single_part_upload_size") + configuration.request_settings.max_single_part_upload_size = collection.get(key); + else if (key == "max_connections") + configuration.request_settings.max_connections = collection.get(key); + else + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Unknown configuration key `{}` for StorageS3, " + "expected: url, [access_key_id, secret_access_key], " + "name of used format and [compression_method].", + key); + } + if (!filename.empty()) + configuration.url = std::filesystem::path(configuration.url) / filename; +} StorageS3Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPtr local_context) { StorageS3Configuration configuration; - if (auto named_collection = getURLBasedDataSourceConfiguration(engine_args, local_context)) + if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args)) { - auto [common_configuration, storage_specific_args] = named_collection.value(); - configuration.set(common_configuration); - processNamedCollectionResult(configuration, storage_specific_args); + processNamedCollectionResult(configuration, *named_collection); } else { diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index fa58ff9809e..5bf714d5602 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -30,6 +30,8 @@ namespace DB class PullingPipelineExecutor; class StorageS3SequentialSource; +class NamedCollection; + class StorageS3Source : public ISource, WithContext { public: @@ -216,7 +218,7 @@ public: ContextPtr ctx, std::unordered_map * object_infos = nullptr); - static void processNamedCollectionResult(StorageS3Configuration & configuration, const std::vector> & key_value_args); + static void processNamedCollectionResult(StorageS3Configuration & configuration, const NamedCollection & collection); struct S3Configuration { diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index f5b6829c7ef..f69f9f8ee7f 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -94,6 +94,7 @@ const char * auto_contributors[] { "Aliaksandr Shylau", "Alina Terekhova", "Amesaru", + "Amila Welihinda", "Amir Vaza", "Amos Bird", "Amr Alaa", @@ -174,6 +175,7 @@ const char * auto_contributors[] { "Avogar", "Azat Khuzhin", "BSD_Conqueror", + "BSWaterB", "Babacar Diassé", "Bakhtiyor Ruziev", "BanyRule", @@ -186,6 +188,7 @@ const char * auto_contributors[] { "Bharat Nallan", "Bharat Nallan Chakravarthy", "Big Elephant", + "BigRedEye", "Bill", "BiteTheDDDDt", "BlahGeek", @@ -203,6 +206,7 @@ const char * auto_contributors[] { "Brett Hoerner", "Brian Hunter", "Bulat Gaifullin", + "Camden Cheek", "Camilo Sierra", "Carbyn", "Carlos Rodríguez Hernández", @@ -291,6 +295,7 @@ const char * auto_contributors[] { "Eldar Zaitov", "Elena", "Elena Baskakova", + "Elena Torró", "Elghazal Ahmed", "Elizaveta Mironyuk", "Elykov Alexandr", @@ -525,6 +530,7 @@ const char * auto_contributors[] { "Maksim Kita", "Mallik Hassan", "Malte", + "Manuel de la Peña", "Marat IDRISOV", "Marcelo Rodriguez", "Marek Vavrusa", @@ -534,6 +540,7 @@ const char * auto_contributors[] { "Mark Andreev", "Mark Frost", "Mark Papadakis", + "Mark Polokhov", "Maroun Maroun", "Marquitos", "Marsel Arduanov", @@ -709,6 +716,7 @@ const char * auto_contributors[] { "Quanfa Fu", "Quid37", "Radistka-75", + "Raevsky Rudolf", "Rafael Acevedo", "Rafael David Tinoco", "Rajkumar", @@ -779,6 +787,7 @@ const char * auto_contributors[] { "Sergey Mirvoda", "Sergey Ryzhkov", "Sergey Shtykov", + "Sergey Skvortsov", "Sergey Tulentsev", "Sergey V. Galtsev", "Sergey Zaikin", @@ -790,6 +799,7 @@ const char * auto_contributors[] { "Sherry Wang", "Shoh Jahon", "SiderZhang", + "Sidorov Pavel", "Silviu Caragea", "Simeon Emanuilov", "Simon Liu", @@ -878,6 +888,7 @@ const char * auto_contributors[] { "Viktor Taranenko", "Vincent Bernat", "Vitalii S", + "Vitaliy", "Vitaliy Fedorchenko", "Vitaliy Karnienko", "Vitaliy Kozlovskiy", @@ -922,6 +933,7 @@ const char * auto_contributors[] { "Weiqing Xu", "William Shallum", "Winter Zhang", + "Xbitz29", "XenoAmess", "Xianda Ke", "Xiang Zhou", @@ -1013,6 +1025,7 @@ const char * auto_contributors[] { "benbiti", "bgranvea", "bharatnc", + "bit-ranger", "bkuschel", "blazerer", "bluebirddm", @@ -1238,6 +1251,7 @@ const char * auto_contributors[] { "luc1ph3r", "lulichao", "luocongkai", + "lzydmxy", "m-ves", "madianjun", "maiha", @@ -1313,6 +1327,7 @@ const char * auto_contributors[] { "peter279k", "philip.han", "pingyu", + "pkubaj", "potya", "presto53", "proller", @@ -1378,6 +1393,7 @@ const char * auto_contributors[] { "taiyang-li", "tangjiangling", "tao jiang", + "taofengliu", "taojiatao", "tavplubix", "tchepavel", @@ -1394,6 +1410,7 @@ const char * auto_contributors[] { "turbo jason", "tyrionhuang", "ubuntu", + "unbyte", "unegare", "unknown", "urgordeadbeef", @@ -1481,6 +1498,7 @@ const char * auto_contributors[] { "Дмитрий Канатников", "Иванов Евгений", "Илья Исаев", + "Коренберг ☢️ Марк", "Павел Литвиненко", "Смитюх Вячеслав", "Сундуков Алексей", diff --git a/src/Storages/System/StorageSystemDatabases.cpp b/src/Storages/System/StorageSystemDatabases.cpp index 2353be9b69f..432d2c4ac64 100644 --- a/src/Storages/System/StorageSystemDatabases.cpp +++ b/src/Storages/System/StorageSystemDatabases.cpp @@ -46,7 +46,7 @@ static String getEngineFull(const DatabasePtr & database) break; /// Database was dropped - if (!locked_database && name == database->getDatabaseName()) + if (name == database->getDatabaseName()) return {}; guard.reset(); diff --git a/src/Storages/System/StorageSystemNamedCollections.cpp b/src/Storages/System/StorageSystemNamedCollections.cpp index 6f4078369d2..16c259796e6 100644 --- a/src/Storages/System/StorageSystemNamedCollections.cpp +++ b/src/Storages/System/StorageSystemNamedCollections.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index 0be44219c7d..b205b7c224d 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -1,4 +1,7 @@ #include "StorageSystemParts.h" +#include +#include +#include #include #include @@ -15,6 +18,29 @@ #include #include +namespace +{ +std::string_view getRemovalStateDescription(DB::DataPartRemovalState state) +{ + switch (state) + { + case DB::DataPartRemovalState::NOT_ATTEMPTED: + return "Cleanup thread hasn't seen this part yet"; + case DB::DataPartRemovalState::VISIBLE_TO_TRANSACTIONS: + return "Part maybe visible for transactions"; + case DB::DataPartRemovalState::NON_UNIQUE_OWNERSHIP: + return "Part ownership is not unique"; + case DB::DataPartRemovalState::NOT_REACHED_REMOVAL_TIME: + return "Part hasn't reached removal time yet"; + case DB::DataPartRemovalState::HAS_SKIPPED_MUTATION_PARENT: + return "Waiting mutation parent to be removed"; + case DB::DataPartRemovalState::REMOVED: + return "Part was selected to be removed"; + } +} + +} + namespace DB { @@ -92,6 +118,9 @@ StorageSystemParts::StorageSystemParts(const StorageID & table_id_) {"removal_csn", std::make_shared()}, {"has_lightweight_delete", std::make_shared()}, + + {"last_removal_attemp_time", std::make_shared()}, + {"removal_state", std::make_shared()}, } ) { @@ -310,6 +339,10 @@ void StorageSystemParts::processNextStorage( columns[res_index++]->insert(part->version.removal_csn.load(std::memory_order_relaxed)); if (columns_mask[src_index++]) columns[res_index++]->insert(part->hasLightweightDelete()); + if (columns_mask[src_index++]) + columns[res_index++]->insert(static_cast(part->last_removal_attemp_time.load(std::memory_order_relaxed))); + if (columns_mask[src_index++]) + columns[res_index++]->insert(getRemovalStateDescription(part->removal_state.load(std::memory_order_relaxed))); /// _state column should be the latest. /// Do not use part->getState*, it can be changed from different thread diff --git a/src/Storages/tests/gtest_named_collections.cpp b/src/Storages/tests/gtest_named_collections.cpp index 369e8ec44f6..d5fe5010991 100644 --- a/src/Storages/tests/gtest_named_collections.cpp +++ b/src/Storages/tests/gtest_named_collections.cpp @@ -1,6 +1,6 @@ #include -#include -#include +#include +#include #include #include #include diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index 3b68a0766aa..23822486c29 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "registerTableFunctions.h" #include @@ -30,11 +31,9 @@ namespace ErrorCodes /// This is needed to avoid copy-pase. Because s3Cluster arguments only differ in additional argument (first) - cluster name void TableFunctionS3::parseArgumentsImpl(const String & error_message, ASTs & args, ContextPtr context, StorageS3Configuration & s3_configuration) { - if (auto named_collection = getURLBasedDataSourceConfiguration(args, context)) + if (auto named_collection = tryGetNamedCollectionWithOverrides(args)) { - auto [common_configuration, storage_specific_args] = named_collection.value(); - s3_configuration.set(common_configuration); - StorageS3::processNamedCollectionResult(s3_configuration, storage_specific_args); + StorageS3::processNamedCollectionResult(s3_configuration, *named_collection); } else { diff --git a/tests/ci/release.py b/tests/ci/release.py index 502efd79173..57d5c4cdd6e 100755 --- a/tests/ci/release.py +++ b/tests/ci/release.py @@ -32,8 +32,6 @@ from version_helper import ( RELEASE_READY_STATUS = "Ready for release" -git = Git() - class Repo: VALID = ("ssh", "https", "origin") @@ -79,7 +77,7 @@ class Release: self.release_commit = release_commit assert release_type in self.BIG + self.SMALL self.release_type = release_type - self._git = git + self._git = Git() self._version = get_version_from_repo(git=self._git) self._release_branch = "" self._rollback_stack = [] # type: List[str] diff --git a/tests/queries/0_stateless/01195_formats_diagnostic_info.reference b/tests/queries/0_stateless/01195_formats_diagnostic_info.reference index 139926db671..0d4705dc9bf 100644 --- a/tests/queries/0_stateless/01195_formats_diagnostic_info.reference +++ b/tests/queries/0_stateless/01195_formats_diagnostic_info.reference @@ -27,7 +27,6 @@ Column 2, name: d, type: Decimal(18, 10), parsed text: "123456789"ERROR ERROR: There is no delimiter between fields: expected "", got "7Hello123" ERROR: There is no delimiter after last field: expected "", got "1" ERROR: There is no delimiter after last field: expected "", got "Hello" -Column 0, name: t, type: DateTime, ERROR: text "" is not like DateTime JSONCompactEachRow Column 2, name: d, type: Decimal(18, 10), parsed text: "123456789"ERROR Column 0, name: t, type: DateTime, parsed text: "2020-04-21 12:34:56"ERROR: DateTime must be in YYYY-MM-DD hh:mm:ss or NNNNNNNNNN (unix timestamp, exactly 10 digits) format. diff --git a/tests/queries/0_stateless/01195_formats_diagnostic_info.sh b/tests/queries/0_stateless/01195_formats_diagnostic_info.sh index dde410d95c4..a6b3ebf4087 100755 --- a/tests/queries/0_stateless/01195_formats_diagnostic_info.sh +++ b/tests/queries/0_stateless/01195_formats_diagnostic_info.sh @@ -37,7 +37,6 @@ echo -e '2020-04-21 12:34:56\tHello\t123456789' | "${PARSER[@]}" 2>&1| grep "ERR echo -e '2020-04-21 12:34:567\tHello\t123456789' | "${PARSER[@]}" 2>&1| grep "ERROR" echo -e '2020-04-21 12:34:56\tHello\t12345678\t1' | "${PARSER[@]}" 2>&1| grep "ERROR" echo -e '2020-04-21 12:34:56\t\t123Hello' | "${PARSER[@]}" 2>&1| grep "ERROR" -echo -e '2020-04-21 12:34:56\tHello\t12345678\n' | "${PARSER[@]}" 2>&1| grep "ERROR" PARSER=(${CLICKHOUSE_LOCAL} --query 'SELECT t, s, d FROM table' --structure 't DateTime, s String, d Decimal64(10)' --input-format JSONCompactEachRow) echo '["2020-04-21 12:34:56", "Hello", 12345678]' | "${PARSER[@]}" 2>&1| grep "ERROR" || echo "JSONCompactEachRow" diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 17102b47c1a..c866f3e7b52 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -504,6 +504,8 @@ CREATE TABLE system.parts `creation_csn` UInt64, `removal_csn` UInt64, `has_lightweight_delete` UInt8, + `last_removal_attemp_time` DateTime, + `removal_state` String, `bytes` UInt64, `marks_size` UInt64 ) diff --git a/tests/queries/0_stateless/02481_custom_separated_and_template_with_csv_field.reference b/tests/queries/0_stateless/02481_custom_separated_and_template_with_csv_field.reference new file mode 100644 index 00000000000..5bad96d521c --- /dev/null +++ b/tests/queries/0_stateless/02481_custom_separated_and_template_with_csv_field.reference @@ -0,0 +1,7 @@ +1 2 +3 4 +1 2 +3 4 +ab|c de&f +ab|c de*f gh&k +|av *ad &ad diff --git a/tests/queries/0_stateless/02481_custom_separated_and_template_with_csv_field.sh b/tests/queries/0_stateless/02481_custom_separated_and_template_with_csv_field.sh new file mode 100755 index 00000000000..ec986a4d16e --- /dev/null +++ b/tests/queries/0_stateless/02481_custom_separated_and_template_with_csv_field.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +echo "1||2&&3||4&&" | $CLICKHOUSE_LOCAL --input-format=CustomSeparated --format_custom_field_delimiter='||' --format_custom_row_after_delimiter='&&' --format_custom_escaping_rule='CSV' -q "select * from table" + +echo "1||2|||3||4|||" | $CLICKHOUSE_LOCAL --input-format=CustomSeparated --format_custom_field_delimiter='||' --format_custom_row_after_delimiter='|||' --format_custom_escaping_rule='CSV' -q "select * from table" + +echo "ab|c||de&f&&" | $CLICKHOUSE_LOCAL --input-format=CustomSeparated --format_custom_field_delimiter='||' --format_custom_row_after_delimiter='&&' --format_custom_escaping_rule='CSV' -q "select * from table" + +echo -e "\${column_1:CSV}||\${column_2:CSV}**\${column_3:CSV}&&" > row_format_02481 + +echo -e "ab|c||de*f**gh&k&&\n|av||*ad**&ad&&" | $CLICKHOUSE_LOCAL -q "select * from table" --input-format=Template --format_template_row='row_format_02481' --format_template_rows_between_delimiter "" + +rm row_format_02481 + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 23aa0d1dbaf..025993f9039 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v22.12.1.1752-stable 2022-12-15 v22.11.2.30-stable 2022-12-02 v22.11.1.1360-stable 2022-11-17 v22.10.4.23-stable 2022-12-02