Merge branch 'master' into fix-comp-setting

This commit is contained in:
Kruglov Pavel 2022-12-16 19:10:53 +01:00 committed by GitHub
commit 33583e10a6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
158 changed files with 3367 additions and 796 deletions

View File

@ -61,7 +61,6 @@
* `SET param_x` query no longer requires manual string serialization for the value of the parameter. For example, query `SET param_a = '[\'a\', \'b\']'` can now be written like `SET param_a = ['a', 'b']`. [#41874](https://github.com/ClickHouse/ClickHouse/pull/41874) ([Nikolay Degterinsky](https://github.com/evillique)).
* Show read rows in the progress indication while reading from STDIN from client. Closes [#43423](https://github.com/ClickHouse/ClickHouse/issues/43423). [#43442](https://github.com/ClickHouse/ClickHouse/pull/43442) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Show progress bar while reading from s3 table function / engine. [#43454](https://github.com/ClickHouse/ClickHouse/pull/43454) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Progress bar will show both read and written rows. [#43496](https://github.com/ClickHouse/ClickHouse/pull/43496) ([Ilya Yatsishin](https://github.com/qoega)).
* `filesystemAvailable` and related functions support one optional argument with disk name, and change `filesystemFree` to `filesystemUnreserved`. Closes [#35076](https://github.com/ClickHouse/ClickHouse/issues/35076). [#42064](https://github.com/ClickHouse/ClickHouse/pull/42064) ([flynn](https://github.com/ucasfl)).
* Integration with LDAP: increased the default value of search_limit to 256, and added LDAP server config option to change that to an arbitrary value. Closes: [#42276](https://github.com/ClickHouse/ClickHouse/issues/42276). [#42461](https://github.com/ClickHouse/ClickHouse/pull/42461) ([Vasily Nemkov](https://github.com/Enmk)).
* Allow the removal of sensitive information (see the `query_masking_rules` in the configuration file) from the exception messages as well. Resolves [#41418](https://github.com/ClickHouse/ClickHouse/issues/41418). [#42940](https://github.com/ClickHouse/ClickHouse/pull/42940) ([filimonov](https://github.com/filimonov)).

View File

@ -13,9 +13,10 @@ The following versions of ClickHouse server are currently being supported with s
| Version | Supported |
|:-|:-|
| 22.12 | ✔️ |
| 22.11 | ✔️ |
| 22.10 | ✔️ |
| 22.9 | ✔️ |
| 22.9 | |
| 22.8 | ✔️ |
| 22.7 | ❌ |
| 22.6 | ❌ |

View File

@ -2,11 +2,11 @@
# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
SET(VERSION_REVISION 54469)
SET(VERSION_REVISION 54470)
SET(VERSION_MAJOR 22)
SET(VERSION_MINOR 12)
SET(VERSION_MINOR 13)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH 0d211ed19849fe44b0e43fdebe2c15d76d560a77)
SET(VERSION_DESCRIBE v22.12.1.1-testing)
SET(VERSION_STRING 22.12.1.1)
SET(VERSION_GITHASH 688e488e930c83eefeac4f87c4cc029cc5b231e3)
SET(VERSION_DESCRIBE v22.13.1.1-testing)
SET(VERSION_STRING 22.13.1.1)
# end of autochange

View File

@ -1,3 +1,6 @@
# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file.
include_guard(GLOBAL)
set (CMAKE_SYSTEM_NAME "Darwin")
set (CMAKE_SYSTEM_PROCESSOR "aarch64")
set (CMAKE_C_COMPILER_TARGET "aarch64-apple-darwin")

View File

@ -1,3 +1,6 @@
# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file.
include_guard(GLOBAL)
set (CMAKE_SYSTEM_NAME "Darwin")
set (CMAKE_SYSTEM_PROCESSOR "x86_64")
set (CMAKE_C_COMPILER_TARGET "x86_64-apple-darwin")

View File

@ -1,3 +1,6 @@
# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file.
include_guard(GLOBAL)
set (CMAKE_SYSTEM_NAME "FreeBSD")
set (CMAKE_SYSTEM_PROCESSOR "aarch64")
set (CMAKE_C_COMPILER_TARGET "aarch64-unknown-freebsd12")

View File

@ -1,3 +1,6 @@
# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file.
include_guard(GLOBAL)
set (CMAKE_SYSTEM_NAME "FreeBSD")
set (CMAKE_SYSTEM_PROCESSOR "ppc64le")
set (CMAKE_C_COMPILER_TARGET "powerpc64le-unknown-freebsd13")

View File

@ -1,3 +1,6 @@
# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file.
include_guard(GLOBAL)
set (CMAKE_SYSTEM_NAME "FreeBSD")
set (CMAKE_SYSTEM_PROCESSOR "x86_64")
set (CMAKE_C_COMPILER_TARGET "x86_64-pc-freebsd11")

View File

@ -1,3 +1,6 @@
# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file.
include_guard(GLOBAL)
set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
set (CMAKE_SYSTEM_NAME "Linux")

View File

@ -1,3 +1,6 @@
# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file.
include_guard(GLOBAL)
set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
set (CMAKE_SYSTEM_NAME "Linux")

View File

@ -1,3 +1,6 @@
# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file.
include_guard(GLOBAL)
set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
set (CMAKE_SYSTEM_NAME "Linux")

View File

@ -1,3 +1,6 @@
# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file.
include_guard(GLOBAL)
set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
set (CMAKE_SYSTEM_NAME "Linux")

View File

@ -1,18 +1,15 @@
if (_CLICKHOUSE_TOOLCHAIN_FILE_LOADED)
# During first run of cmake the toolchain file will be loaded twice,
# - /usr/share/cmake-3.23/Modules/CMakeDetermineSystem.cmake
# - /bld/CMakeFiles/3.23.2/CMakeSystem.cmake
#
# But once you already have non-empty cmake cache it will be loaded only
# once:
# - /bld/CMakeFiles/3.23.2/CMakeSystem.cmake
#
# This has no harm except for double load of toolchain will add
# --gcc-toolchain multiple times that will not allow ccache to reuse the
# cache.
return()
endif()
set (_CLICKHOUSE_TOOLCHAIN_FILE_LOADED ON)
# During first run of cmake the toolchain file will be loaded twice,
# - /usr/share/cmake-3.23/Modules/CMakeDetermineSystem.cmake
# - /bld/CMakeFiles/3.23.2/CMakeSystem.cmake
#
# But once you already have non-empty cmake cache it will be loaded only
# once:
# - /bld/CMakeFiles/3.23.2/CMakeSystem.cmake
#
# This has no harm except for double load of toolchain will add
# --gcc-toolchain multiple times that will not allow ccache to reuse the
# cache.
include_guard(GLOBAL)
set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)

View File

@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="22.11.2.30"
ARG VERSION="22.12.1.1752"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# user/group precreated explicitly with fixed uid/gid on purpose.

View File

@ -21,7 +21,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="22.11.2.30"
ARG VERSION="22.12.1.1752"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image

View File

@ -127,23 +127,24 @@ EOL
function stop()
{
local max_tries=""
if [ -n "$1" ]
then
max_tries="--max-tries $1"
fi
local pid
# Preserve the pid, since the server can hung after the PID will be deleted.
pid="$(cat /var/run/clickhouse-server/clickhouse-server.pid)"
clickhouse stop $max_tries --do-not-kill && return
if [ -n "$1" ]
then
# temporarily disable it in BC check
clickhouse stop --force
return
fi
# We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces.
kill -TERM "$(pidof gdb)" ||:
sleep 5
echo "thread apply all backtrace (on stop)" >> /test_output/gdb.log
gdb -batch -ex 'thread apply all backtrace' -p "$pid" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log
timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$pid" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log
clickhouse stop --force
}
@ -431,7 +432,7 @@ else
clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables"
stop 180
stop 1
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.stress.log
# Start new server

View File

@ -0,0 +1,320 @@
---
sidebar_position: 1
sidebar_label: 2022
---
# 2022 Changelog
### ClickHouse release v22.12.1.1752-stable (688e488e930) FIXME as compared to v22.11.1.1360-stable (0d211ed1984)
#### Backward Incompatible Change
* Fixed backward incompatibility in (de)serialization of states of `min`, `max`, `any*`, `argMin`, `argMax` aggregate functions with `String` argument. The incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/41431 and affects 22.9, 22.10 and 22.11 branches (fixed since 22.9.6, 22.10.4 and 22.11.2 correspondingly). Some minor releases of 22.3, 22.7 and 22.8 branches are also affected: 22.3.13...22.3.14 (fixed since 22.3.15), 22.8.6...22.8.9 (fixed since 22.8.10), 22.7.6 and newer (will not be fixed in 22.7, we recommend to upgrade from 22.7.* to 22.8.10 or newer). This release note does not concern users that have never used affected versions. Incompatible versions append extra `'\0'` to strings when reading states of the aggregate functions mentioned above. For example, if an older version saved state of `anyState('foobar')` to `state_column` then incompatible version will print `'foobar\0'` on `anyMerge(state_column)`. Also incompatible versions write states of the aggregate functions without trailing `'\0'`. Newer versions (that have the fix) can correctly read data written by all versions including incompatible versions, except one corner case. If an incompatible version saved a state with a string that actually ends with null character, then newer version will trim trailing `'\0'` when reading state of affected aggregate function. For example, if an incompatible version saved state of `anyState('abrac\0dabra\0')` to `state_column` then newer versions will print `'abrac\0dabra'` on `anyMerge(state_column)`. The issue also affects distributed queries when an incompatible version works in a cluster together with older or newer versions. [#43038](https://github.com/ClickHouse/ClickHouse/pull/43038) ([Raúl Marín](https://github.com/Algunenano)).
#### New Feature
* Add "grace_hash" join_algorithm. [#38191](https://github.com/ClickHouse/ClickHouse/pull/38191) ([BigRedEye](https://github.com/BigRedEye)).
* Merging on initiator now uses the same memory bound approach as merging of local aggregation results if `enable_memory_bound_merging_of_aggregation_results` is set. [#40879](https://github.com/ClickHouse/ClickHouse/pull/40879) ([Nikita Taranov](https://github.com/nickitat)).
* Add BSONEachRow input/output format. In this format, ClickHouse formats/parses each row as a separated BSON Document and each column is formatted/parsed as a single BSON field with column name as a key. [#42033](https://github.com/ClickHouse/ClickHouse/pull/42033) ([mark-polokhov](https://github.com/mark-polokhov)).
* close: [#37631](https://github.com/ClickHouse/ClickHouse/issues/37631). [#42265](https://github.com/ClickHouse/ClickHouse/pull/42265) ([刘陶峰](https://github.com/taofengliu)).
* Added `multiplyDecimal` and `divideDecimal` functions for decimal operations with fixed precision. [#42438](https://github.com/ClickHouse/ClickHouse/pull/42438) ([Andrey Zvonov](https://github.com/zvonand)).
* Added `system.moves` table with list of currently moving parts. [#42660](https://github.com/ClickHouse/ClickHouse/pull/42660) ([Sergei Trifonov](https://github.com/serxa)).
* Keeper feature: add support for embedded Prometheus endpoint. [#43087](https://github.com/ClickHouse/ClickHouse/pull/43087) ([Antonio Andelic](https://github.com/antonio2368)).
* Added age function to calculate difference between two dates or dates with time values expressed as number of full units. Close [#41115](https://github.com/ClickHouse/ClickHouse/issues/41115). [#43123](https://github.com/ClickHouse/ClickHouse/pull/43123) ([Roman Vasin](https://github.com/rvasin)).
* Add settings `max_streams_for_merge_tree_reading` and `allow_asynchronous_read_from_io_pool_for_merge_tree`. Setting `max_streams_for_merge_tree_reading` limits the number of reading streams for MergeTree tables. Setting `allow_asynchronous_read_from_io_pool_for_merge_tree` enables background I/O pool to read from `MergeTree` tables. This may increase performance for I/O bound queries if used together with `max_streams_to_max_threads_ratio` or `max_streams_for_merge_tree_reading`. [#43260](https://github.com/ClickHouse/ClickHouse/pull/43260) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Add the expression of the index on `data_skipping_indices` system table. [#43308](https://github.com/ClickHouse/ClickHouse/pull/43308) ([Guillaume Tassery](https://github.com/YiuRULE)).
* New hash function [xxh3](https://github.com/Cyan4973/xxHash) added. Also performance of `xxHash32` and `xxHash64` improved on arm thanks to library update. [#43411](https://github.com/ClickHouse/ClickHouse/pull/43411) ([Nikita Taranov](https://github.com/nickitat)).
* - Temporary data (for external sorting, aggregation, and JOINs) can share storage with the filesystem cache for remote disks and evict it, close [#42158](https://github.com/ClickHouse/ClickHouse/issues/42158). [#43457](https://github.com/ClickHouse/ClickHouse/pull/43457) ([Vladimir C](https://github.com/vdimir)).
* Add column `engine_full` to system table `databases` so that users can access whole engine definition of database via system tables. [#43468](https://github.com/ClickHouse/ClickHouse/pull/43468) ([凌涛](https://github.com/lingtaolf)).
* Add password complexity rules and checks for creating a new user. [#43719](https://github.com/ClickHouse/ClickHouse/pull/43719) ([Nikolay Degterinsky](https://github.com/evillique)).
* Add function concatWithSeparator , like concat_ws in spark. [#43749](https://github.com/ClickHouse/ClickHouse/pull/43749) ([李扬](https://github.com/taiyang-li)).
* Added constraints for merge tree settings. [#43903](https://github.com/ClickHouse/ClickHouse/pull/43903) ([Sergei Trifonov](https://github.com/serxa)).
* Support numeric literals with _ as separator. [#43925](https://github.com/ClickHouse/ClickHouse/pull/43925) ([jh0x](https://github.com/jh0x)).
* Add a new setting `input_format_json_read_objects_as_strings` that allows to parse nested JSON objects into Strings in all JSON input formats. This setting is disable by default. [#44052](https://github.com/ClickHouse/ClickHouse/pull/44052) ([Kruglov Pavel](https://github.com/Avogar)).
#### Performance Improvement
* Optimisation is getting skipped now if `max_size_to_preallocate_for_aggregation` has too small value. Default value of this setting increased to `10^8`. [#43945](https://github.com/ClickHouse/ClickHouse/pull/43945) ([Nikita Taranov](https://github.com/nickitat)).
#### Improvement
* Support numeric literals with underscores. closes [#28967](https://github.com/ClickHouse/ClickHouse/issues/28967). [#39129](https://github.com/ClickHouse/ClickHouse/pull/39129) ([unbyte](https://github.com/unbyte)).
* Add `FROM table SELECT column` syntax. [#41095](https://github.com/ClickHouse/ClickHouse/pull/41095) ([Nikolay Degterinsky](https://github.com/evillique)).
* This PR changes how followed queries delete parts: truncate table, alter table drop part, alter table drop partition. Now these queries make empty parts which cover old parts. This makes truncate query works without exclusive lock which means concurrent reads aren't locked. Also achieved durability in all those queries. If request is succeeded then no resurrected pars appear later. Note that atomicity is achieved only with transaction scope. [#41145](https://github.com/ClickHouse/ClickHouse/pull/41145) ([Sema Checherinda](https://github.com/CheSema)).
* `SET param_x` query no longer requires manual string serialization for the value of the parameter. For example, query `SET param_a = '[\'a\', \'b\']'` can now be written like `SET param_a = ['a', 'b']`. [#41874](https://github.com/ClickHouse/ClickHouse/pull/41874) ([Nikolay Degterinsky](https://github.com/evillique)).
* `filesystemAvailable` and related functions support one optional argument with disk name, and change `filesystemFree` to `filesystemUnreserved`. Closes [#35076](https://github.com/ClickHouse/ClickHouse/issues/35076). [#42064](https://github.com/ClickHouse/ClickHouse/pull/42064) ([flynn](https://github.com/ucasfl)).
* Increased the default value of search_limit to 256, and added LDAP server config option to change that to an arbitrary value. Closes: [#42276](https://github.com/ClickHouse/ClickHouse/issues/42276). [#42461](https://github.com/ClickHouse/ClickHouse/pull/42461) ([Vasily Nemkov](https://github.com/Enmk)).
* Add cosine distance for annoy. [#42778](https://github.com/ClickHouse/ClickHouse/pull/42778) ([Filatenkov Artur](https://github.com/FArthur-cmd)).
* Allow to remove sensitive information from the exception messages also. Resolves [#41418](https://github.com/ClickHouse/ClickHouse/issues/41418). [#42940](https://github.com/ClickHouse/ClickHouse/pull/42940) ([filimonov](https://github.com/filimonov)).
* Keeper improvement: Add 4lw command `rqld` which can manually assign a node as leader. [#43026](https://github.com/ClickHouse/ClickHouse/pull/43026) ([JackyWoo](https://github.com/JackyWoo)).
* Apply connection timeouts settings for Distributed async INSERT from the query. [#43156](https://github.com/ClickHouse/ClickHouse/pull/43156) ([Azat Khuzhin](https://github.com/azat)).
* unhex function support FixedString arguments. [issue42369](https://github.com/ClickHouse/ClickHouse/issues/42369). [#43207](https://github.com/ClickHouse/ClickHouse/pull/43207) ([DR](https://github.com/freedomDR)).
* Priority is given to deleting completely expired Partsrelated [#42869](https://github.com/ClickHouse/ClickHouse/issues/42869). [#43222](https://github.com/ClickHouse/ClickHouse/pull/43222) ([zhongyuankai](https://github.com/zhongyuankai)).
* Follow-up to https://github.com/ClickHouse/ClickHouse/pull/42484. Mask sensitive information in logs better; mask secret parts in the output of queries `SHOW CREATE TABLE` and `SELECT FROM system.tables`. Also resolves [#41418](https://github.com/ClickHouse/ClickHouse/issues/41418). [#43227](https://github.com/ClickHouse/ClickHouse/pull/43227) ([Vitaly Baranov](https://github.com/vitlibar)).
* Enable compress marks and primary key. [#43288](https://github.com/ClickHouse/ClickHouse/pull/43288) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* resolve issue [#38075](https://github.com/ClickHouse/ClickHouse/issues/38075) . Right now async insert doesn't support deduplication, because multiple small inserts will coexist in one part, which corespond multiple `block id`s. This solution is straitfoward: The change involves: 1. mark offsets for every inserts in every chunk 2. calculate multiple `block_id`s when sinker receive a chunk 3. get block number lock by these `block_id`s 3.1. if fails, remove the dup insert(s) and dup `block_id`(s) from block and recalculate `offsets` agian. 3.2. if succeeds, commit `block_id`'s and other items into keeper a. if fails, do 3.1 b. if succeeds, everything succeeds. [#43304](https://github.com/ClickHouse/ClickHouse/pull/43304) ([Han Fei](https://github.com/hanfei1991)).
* More precise and reactive CPU load indication on client. [#43307](https://github.com/ClickHouse/ClickHouse/pull/43307) ([Sergei Trifonov](https://github.com/serxa)).
* Restrict default access to named collections for user defined in config. It must have explicit `show_named_collections=1` to be able to see them. [#43325](https://github.com/ClickHouse/ClickHouse/pull/43325) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Support reading of subcolumns of nested types from storage `S3` and table function `s3` with formats `Parquet`, `Arrow` and `ORC`. [#43329](https://github.com/ClickHouse/ClickHouse/pull/43329) ([chen](https://github.com/xiedeyantu)).
* - Systemd integration now correctly notifies systemd that service is really started and is ready to server requests. [#43400](https://github.com/ClickHouse/ClickHouse/pull/43400) ([Коренберг Марк](https://github.com/socketpair)).
* Add table_uuid to system.parts. [#43404](https://github.com/ClickHouse/ClickHouse/pull/43404) ([Azat Khuzhin](https://github.com/azat)).
* Added client option to display the number of locally processed rows in non-interactive mode (--print-num-processed-rows). [#43407](https://github.com/ClickHouse/ClickHouse/pull/43407) ([jh0x](https://github.com/jh0x)).
* Show read rows while reading from stdin from client. Closes [#43423](https://github.com/ClickHouse/ClickHouse/issues/43423). [#43442](https://github.com/ClickHouse/ClickHouse/pull/43442) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Keeper improvement: try syncing logs to disk in parallel with replication. [#43450](https://github.com/ClickHouse/ClickHouse/pull/43450) ([Antonio Andelic](https://github.com/antonio2368)).
* Show progress bar while reading from s3 table function / engine. [#43454](https://github.com/ClickHouse/ClickHouse/pull/43454) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Progress bar will show both read and written rows. [#43496](https://github.com/ClickHouse/ClickHouse/pull/43496) ([Ilya Yatsishin](https://github.com/qoega)).
* Implement `aggregation-in-order` optimization on top of query plan. It is enabled by default (but works only together with `optimize_aggregation_in_order`, which is disabled by default). Set `query_plan_aggregation_in_order = 0` to use previous AST-based version. [#43592](https://github.com/ClickHouse/ClickHouse/pull/43592) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Allow to send profile events with `trace_type = 'ProfileEvent'` to `system.trace_log` on each increment with current stack, profile event name and value of increment. It can be enabled by setting `trace_profile_events` and used to debug performance of queries. [#43639](https://github.com/ClickHouse/ClickHouse/pull/43639) ([Anton Popov](https://github.com/CurtizJ)).
* Keeper improvement: requests are batched more often. The batching can be controlled with the new setting `max_requests_quick_batch_size`. [#43686](https://github.com/ClickHouse/ClickHouse/pull/43686) ([Antonio Andelic](https://github.com/antonio2368)).
* Added possibility to use array as a second parameter for cutURLParameter function. Close [#6827](https://github.com/ClickHouse/ClickHouse/issues/6827). [#43788](https://github.com/ClickHouse/ClickHouse/pull/43788) ([Roman Vasin](https://github.com/rvasin)).
* Implement referential dependencies and use them to create tables in the correct order while restoring from a backup. [#43834](https://github.com/ClickHouse/ClickHouse/pull/43834) ([Vitaly Baranov](https://github.com/vitlibar)).
* Add a new setting `input_format_max_binary_string_size` to limit string size in RowBinary format. [#43842](https://github.com/ClickHouse/ClickHouse/pull/43842) ([Kruglov Pavel](https://github.com/Avogar)).
* - Fix some incorrect logic in ast level optimization related. [#43873](https://github.com/ClickHouse/ClickHouse/pull/43873) ([Duc Canh Le](https://github.com/canhld94)).
* Support query like `SHOW FULL TABLES ...`. [#43910](https://github.com/ClickHouse/ClickHouse/pull/43910) ([Filatenkov Artur](https://github.com/FArthur-cmd)).
* When ClickHouse requests a remote HTTP server, and it returns an error, the numeric HTTP code was not displayed correctly in the exception message. Closes [#43919](https://github.com/ClickHouse/ClickHouse/issues/43919). [#43920](https://github.com/ClickHouse/ClickHouse/pull/43920) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Settings `merge_tree_min_rows_for_concurrent_read_for_remote_filesystem/merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem` did not respect adaptive granularity. Fat rows did not decrease the number of read rows (as it is was done for `merge_tree_min_rows_for_concurrent_read/merge_tree_min_bytes_for_concurrent_read`, which could lead to high memory usage. [#43965](https://github.com/ClickHouse/ClickHouse/pull/43965) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Support `optimize_if_transform_strings_to_enum` in new analyzer. [#43999](https://github.com/ClickHouse/ClickHouse/pull/43999) ([Antonio Andelic](https://github.com/antonio2368)).
* This is to upgrade the new "DeflateQpl" compression codec which has been implemented on previous PR (details: https://github.com/ClickHouse/ClickHouse/pull/39494). This patch improves codec on below aspects: 1. QPL v0.2.0 to QPL v0.3.0 [Intel® Query Processing Library (QPL)](https://github.com/intel/qpl) 2. Improve CMake file for fixing QPL build issues for QPL v0.3.0。 3. Link the QPL library with libaccel-config at build time instead of runtime loading on QPL v0.2.0 (dlopen) 4. Fixed log print issue in CompressionCodecDeflateQpl.cpp. [#44024](https://github.com/ClickHouse/ClickHouse/pull/44024) ([jasperzhu](https://github.com/jinjunzh)).
* Follow-up to https://github.com/ClickHouse/ClickHouse/pull/43834 Fix review issues; dependencies from `Distributed` table engine and from `cluster()` function are also considered now; as well as dependencies of a dictionary defined without host & port specified. [#44158](https://github.com/ClickHouse/ClickHouse/pull/44158) ([Vitaly Baranov](https://github.com/vitlibar)).
#### Bug Fix
* Fix mutations not making progress when checksums do not match between replicas (e.g. caused by a change in data format on an upgrade). [#36877](https://github.com/ClickHouse/ClickHouse/pull/36877) ([nvartolomei](https://github.com/nvartolomei)).
* fix skip_unavailable_shards does not work using hdfsCluster table function. [#43236](https://github.com/ClickHouse/ClickHouse/pull/43236) ([chen](https://github.com/xiedeyantu)).
* fix s3 support question mark wildcard. Closes [#42731](https://github.com/ClickHouse/ClickHouse/issues/42731). [#43253](https://github.com/ClickHouse/ClickHouse/pull/43253) ([chen](https://github.com/xiedeyantu)).
* - Fix functions arrayFirstOrNull and arrayLastOrNull or null when array is Nullable. [#43274](https://github.com/ClickHouse/ClickHouse/pull/43274) ([Duc Canh Le](https://github.com/canhld94)).
* - we create a new zk path called "async_blocks" for replicated tables in [#43304](https://github.com/ClickHouse/ClickHouse/issues/43304) . However, for tables created in older versions, this path does not exist and will cause error when doing partition operations. This PR will create this node when initializing replicated tree. - This PR created a flag `async_insert_deduplicate` with `false` default value to control whether to use this function. As mentioned in [#38075](https://github.com/ClickHouse/ClickHouse/issues/38075) , this function is not yet fully finished. I would turn off it by default. [#44223](https://github.com/ClickHouse/ClickHouse/pull/44223) ([Han Fei](https://github.com/hanfei1991)).
#### Build/Testing/Packaging Improvement
* Add support for FreeBSD/powerpc64le. [#40422](https://github.com/ClickHouse/ClickHouse/pull/40422) ([pkubaj](https://github.com/pkubaj)).
* Bump Testcontainers for Go to v0.15.0. [#43278](https://github.com/ClickHouse/ClickHouse/pull/43278) ([Manuel de la Peña](https://github.com/mdelapenya)).
* ... Enable base64 on s390x > Information about CI checks: https://clickhouse.com/docs/en/development/continuous-integration/. [#43352](https://github.com/ClickHouse/ClickHouse/pull/43352) ([Suzy Wang](https://github.com/SuzyWangIBMer)).
* Shutdown will be much faster if do not call clearOldPartsFromFilesystem. Especially this is right for tests with zero-copy due to single thread deletion parts. clearOldPartsFromFilesystem is unnecessary after https://github.com/ClickHouse/ClickHouse/pull/41145. [#43760](https://github.com/ClickHouse/ClickHouse/pull/43760) ([Sema Checherinda](https://github.com/CheSema)).
* Integrate skim into the client/local. [#43922](https://github.com/ClickHouse/ClickHouse/pull/43922) ([Azat Khuzhin](https://github.com/azat)).
* Allow clickhouse to use openssl as a dynamic library and in-tree for development purposes. [#43991](https://github.com/ClickHouse/ClickHouse/pull/43991) ([Boris Kuschel](https://github.com/bkuschel)).
* Closes [#43912](https://github.com/ClickHouse/ClickHouse/issues/43912). [#43992](https://github.com/ClickHouse/ClickHouse/pull/43992) ([Nikolay Degterinsky](https://github.com/evillique)).
* Bring sha512 sums back to the building step. [#44017](https://github.com/ClickHouse/ClickHouse/pull/44017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Kill stress tests after 2.5h in case of hanging process. [#44214](https://github.com/ClickHouse/ClickHouse/pull/44214) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Fixed unable to log in (because of failure to create session_log entry) in rare case of messed up setting profiles. ... [#42641](https://github.com/ClickHouse/ClickHouse/pull/42641) ([Vasily Nemkov](https://github.com/Enmk)).
* Fix incorrect UserTimeMicroseconds/SystemTimeMicroseconds accounting. [#42791](https://github.com/ClickHouse/ClickHouse/pull/42791) ([Azat Khuzhin](https://github.com/azat)).
* Do not suppress exceptions in web disk. Fix retries for web disk. [#42800](https://github.com/ClickHouse/ClickHouse/pull/42800) ([Azat Khuzhin](https://github.com/azat)).
* Fixed race condition between inserts and dropping MVs. [#43161](https://github.com/ClickHouse/ClickHouse/pull/43161) ([AlfVII](https://github.com/AlfVII)).
* Fixed bug which could lead to deadlock while using asynchronous inserts. [#43233](https://github.com/ClickHouse/ClickHouse/pull/43233) ([Anton Popov](https://github.com/CurtizJ)).
* Additional check on zero uncompressed size is added to `CompressionCodecDelta`. [#43255](https://github.com/ClickHouse/ClickHouse/pull/43255) ([Nikita Taranov](https://github.com/nickitat)).
* An issue with the following exception has been reported while trying to read a Parquet file from S3 into ClickHouse:. [#43297](https://github.com/ClickHouse/ClickHouse/pull/43297) ([Arthur Passos](https://github.com/arthurpassos)).
* Fix bad cast from LowCardinality column when using short circuit function execution. Proper fix of https://github.com/ClickHouse/ClickHouse/pull/42937. [#43311](https://github.com/ClickHouse/ClickHouse/pull/43311) ([Kruglov Pavel](https://github.com/Avogar)).
* Fixed queries with `SAMPLE BY` with prewhere optimization on tables using `Merge` engine. [#43315](https://github.com/ClickHouse/ClickHouse/pull/43315) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix `DESCRIBE` for `deltaLake` and `hudi` table functions. [#43323](https://github.com/ClickHouse/ClickHouse/pull/43323) ([Antonio Andelic](https://github.com/antonio2368)).
* Check and compare the content of `format_version` file in `MergeTreeData` so tables can be loaded even if the storage policy was changed. [#43328](https://github.com/ClickHouse/ClickHouse/pull/43328) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix possible (very unlikely) "No column to rollback" logical error during INSERT into Buffer. [#43336](https://github.com/ClickHouse/ClickHouse/pull/43336) ([Azat Khuzhin](https://github.com/azat)).
* Fix a bug that allowed FucntionParser to parse an unlimited amount of round brackets into one function if `allow_function_parameters` is set. [#43350](https://github.com/ClickHouse/ClickHouse/pull/43350) ([Nikolay Degterinsky](https://github.com/evillique)).
* MaterializeMySQL support ddl drop table t1,t2 and Compatible with most of MySQL drop ddl. [#43366](https://github.com/ClickHouse/ClickHouse/pull/43366) ([zzsmdfj](https://github.com/zzsmdfj)).
* Fix possible `Cannot create non-empty column with type Nothing` in functions if/multiIf. Closes [#43356](https://github.com/ClickHouse/ClickHouse/issues/43356). [#43368](https://github.com/ClickHouse/ClickHouse/pull/43368) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix a bug when row level filter uses default value of column. [#43387](https://github.com/ClickHouse/ClickHouse/pull/43387) ([Alexander Gololobov](https://github.com/davenger)).
* Query with DISTINCT + LIMIT BY + LIMIT can return fewer rows than expected. Fixes [#43377](https://github.com/ClickHouse/ClickHouse/issues/43377). [#43410](https://github.com/ClickHouse/ClickHouse/pull/43410) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix sumMap() for Nullable(Decimal()). [#43414](https://github.com/ClickHouse/ClickHouse/pull/43414) ([Azat Khuzhin](https://github.com/azat)).
* Fix date_diff() for hour/minute on macOS. Close [#42742](https://github.com/ClickHouse/ClickHouse/issues/42742). [#43466](https://github.com/ClickHouse/ClickHouse/pull/43466) ([zzsmdfj](https://github.com/zzsmdfj)).
* Fix incorrect memory accounting because of merges/mutations. [#43516](https://github.com/ClickHouse/ClickHouse/pull/43516) ([Azat Khuzhin](https://github.com/azat)).
* Substitute UDFs in `CREATE` query to avoid failures during loading at the startup. Additionally, UDFs can now be used as `DEFAULT` expressions for columns. [#43539](https://github.com/ClickHouse/ClickHouse/pull/43539) ([Antonio Andelic](https://github.com/antonio2368)).
* Correctly report errors in queries even when multiple JOINs optimization is taking place. [#43583](https://github.com/ClickHouse/ClickHouse/pull/43583) ([Salvatore](https://github.com/tbsal)).
* Fixed primary key analysis with conditions involving `toString(enum)`. [#43596](https://github.com/ClickHouse/ClickHouse/pull/43596) ([Nikita Taranov](https://github.com/nickitat)).
* - Ensure consistency when copier update status and `attach_is_done` in keeper after partition attach is done. [#43602](https://github.com/ClickHouse/ClickHouse/pull/43602) ([lizhuoyu5](https://github.com/lzydmxy)).
* During recovering of the lost replica there could a situation where we need to atomically swap two table names (use EXCHANGE), but instead previously we tried to use two RENAME queries. Which was obviously failed and moreover failed the whole recovery process of the database replica. [#43628](https://github.com/ClickHouse/ClickHouse/pull/43628) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* fix s3Cluster function returns NOT_FOUND_COLUMN_IN_BLOCK error. Closes [#43534](https://github.com/ClickHouse/ClickHouse/issues/43534). [#43629](https://github.com/ClickHouse/ClickHouse/pull/43629) ([chen](https://github.com/xiedeyantu)).
* Optimized number of List requests to ZooKeeper when selecting a part to merge. Previously it could produce thousands of requests in some cases. Fixes [#43647](https://github.com/ClickHouse/ClickHouse/issues/43647). [#43675](https://github.com/ClickHouse/ClickHouse/pull/43675) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix posssible logical error 'Array sizes mismatched' while parsing JSON object with arrays with same key names but with different nesting level. Closes [#43569](https://github.com/ClickHouse/ClickHouse/issues/43569). [#43693](https://github.com/ClickHouse/ClickHouse/pull/43693) ([Kruglov Pavel](https://github.com/Avogar)).
* Fixed possible exception in case of distributed group by with an alias column among aggregation keys. [#43709](https://github.com/ClickHouse/ClickHouse/pull/43709) ([Nikita Taranov](https://github.com/nickitat)).
* Fix bug which can lead to broken projections if zero-copy replication is enabled and used. [#43764](https://github.com/ClickHouse/ClickHouse/pull/43764) ([alesapin](https://github.com/alesapin)).
* - Fix using multipart upload for large S3 objects in AWS S3. [#43824](https://github.com/ClickHouse/ClickHouse/pull/43824) ([ianton-ru](https://github.com/ianton-ru)).
* Fixed `ALTER ... RESET SETTING` with `ON CLUSTER`. It could be applied to one replica only. Fixes [#43843](https://github.com/ClickHouse/ClickHouse/issues/43843). [#43848](https://github.com/ClickHouse/ClickHouse/pull/43848) ([Elena Torró](https://github.com/elenatorro)).
* * Fix logical error in right storage join with using. [#43963](https://github.com/ClickHouse/ClickHouse/pull/43963) ([Vladimir C](https://github.com/vdimir)).
* Keeper fix: throw if interserver port for Raft is already in use. Fix segfault in Prometheus when Raft server failed to initialize. [#43984](https://github.com/ClickHouse/ClickHouse/pull/43984) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix order by positional arg in case unneeded columns pruning. Closes [#43964](https://github.com/ClickHouse/ClickHouse/issues/43964). [#43987](https://github.com/ClickHouse/ClickHouse/pull/43987) ([Kseniia Sumarokova](https://github.com/kssenii)).
* * Fix bug with wrong order of keys in Storage Join. [#44012](https://github.com/ClickHouse/ClickHouse/pull/44012) ([Vladimir C](https://github.com/vdimir)).
* Fixed exception when subquery contains having but doesn't contain actual aggregation. [#44051](https://github.com/ClickHouse/ClickHouse/pull/44051) ([Nikita Taranov](https://github.com/nickitat)).
* Fix race in s3 multipart upload. This race could cause the error `Part number must be an integer between 1 and 10000, inclusive. (S3_ERROR)` while restoring from a backup. [#44065](https://github.com/ClickHouse/ClickHouse/pull/44065) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix undefined behavior in the `quantiles` function, which might lead to uninitialized memory. Found by fuzzer. This closes [#44066](https://github.com/ClickHouse/ClickHouse/issues/44066). [#44067](https://github.com/ClickHouse/ClickHouse/pull/44067) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Prevent dropping nested column if it creates empty part. [#44159](https://github.com/ClickHouse/ClickHouse/pull/44159) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix `LOGICAL_ERROR` in case when fetch of part was stopped while fetching projection to the disk with enabled zero-copy replication. [#44173](https://github.com/ClickHouse/ClickHouse/pull/44173) ([Anton Popov](https://github.com/CurtizJ)).
* Fix possible Bad cast from type DB::IAST const* to DB::ASTLiteral const*. Closes [#44191](https://github.com/ClickHouse/ClickHouse/issues/44191). [#44192](https://github.com/ClickHouse/ClickHouse/pull/44192) ([Kruglov Pavel](https://github.com/Avogar)).
* Prevent `ReadonlyReplica` metric from having negative values. [#44220](https://github.com/ClickHouse/ClickHouse/pull/44220) ([Antonio Andelic](https://github.com/antonio2368)).
#### Build Improvement
* Fixed Endian issues in hex string conversion on s390x (which is not supported by ClickHouse). [#41245](https://github.com/ClickHouse/ClickHouse/pull/41245) ([Harry Lee](https://github.com/HarryLeeIBM)).
* ... toDateTime64 conversion generates wrong time on z build, add bit_cast swap fix to support toDateTime64 on s390x platform. [#42847](https://github.com/ClickHouse/ClickHouse/pull/42847) ([Suzy Wang](https://github.com/SuzyWangIBMer)).
* ... s390x support for ip coding functions. [#43078](https://github.com/ClickHouse/ClickHouse/pull/43078) ([Suzy Wang](https://github.com/SuzyWangIBMer)).
* Fix byte order issue of wide integers for s390x. [#43228](https://github.com/ClickHouse/ClickHouse/pull/43228) ([Harry Lee](https://github.com/HarryLeeIBM)).
* Fixed endian issue in bloom filter serialization for s390x. [#43642](https://github.com/ClickHouse/ClickHouse/pull/43642) ([Harry Lee](https://github.com/HarryLeeIBM)).
* Fixed setting TCP_KEEPIDLE of client connection for s390x. [#43850](https://github.com/ClickHouse/ClickHouse/pull/43850) ([Harry Lee](https://github.com/HarryLeeIBM)).
* Fix endian issue in StringHashTable for s390x. [#44049](https://github.com/ClickHouse/ClickHouse/pull/44049) ([Harry Lee](https://github.com/HarryLeeIBM)).
#### NO CL ENTRY
* NO CL ENTRY: 'Revert "Revert "S3 request per second rate throttling""'. [#43335](https://github.com/ClickHouse/ClickHouse/pull/43335) ([Sergei Trifonov](https://github.com/serxa)).
* NO CL ENTRY: 'Update version after release'. [#43348](https://github.com/ClickHouse/ClickHouse/pull/43348) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* NO CL ENTRY: 'Revert "Add table_uuid to system.parts"'. [#43571](https://github.com/ClickHouse/ClickHouse/pull/43571) ([Alexander Tokmakov](https://github.com/tavplubix)).
* NO CL ENTRY: 'Revert "Fix endian issue in integer hex string conversion"'. [#43613](https://github.com/ClickHouse/ClickHouse/pull/43613) ([Vladimir C](https://github.com/vdimir)).
* NO CL ENTRY: 'Update replication.md'. [#43643](https://github.com/ClickHouse/ClickHouse/pull/43643) ([Peignon Melvyn](https://github.com/melvynator)).
* NO CL ENTRY: 'Revert "Temporary files evict fs cache"'. [#43883](https://github.com/ClickHouse/ClickHouse/pull/43883) ([Vladimir C](https://github.com/vdimir)).
* NO CL ENTRY: 'Update html interface doc'. [#44064](https://github.com/ClickHouse/ClickHouse/pull/44064) ([San](https://github.com/santrancisco)).
* NO CL ENTRY: 'Revert "Add function 'age'"'. [#44203](https://github.com/ClickHouse/ClickHouse/pull/44203) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'Revert "Builtin skim"'. [#44227](https://github.com/ClickHouse/ClickHouse/pull/44227) ([Azat Khuzhin](https://github.com/azat)).
* NO CL ENTRY: 'Revert "Add information about written rows in progress indicator"'. [#44255](https://github.com/ClickHouse/ClickHouse/pull/44255) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Build libcxx and libcxxabi from llvm-project [#42730](https://github.com/ClickHouse/ClickHouse/pull/42730) ([Robert Schulze](https://github.com/rschu1ze)).
* Allow release only from ready commits [#43019](https://github.com/ClickHouse/ClickHouse/pull/43019) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add global flags to base/ libraries [#43082](https://github.com/ClickHouse/ClickHouse/pull/43082) ([Raúl Marín](https://github.com/Algunenano)).
* Enable strict typing check in tests/ci [#43132](https://github.com/ClickHouse/ClickHouse/pull/43132) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add server UUID for disks access checks (read/read-by-offset/write/delete) to avoid possible races [#43143](https://github.com/ClickHouse/ClickHouse/pull/43143) ([Azat Khuzhin](https://github.com/azat)).
* Do not include libcxx library for C [#43166](https://github.com/ClickHouse/ClickHouse/pull/43166) ([Azat Khuzhin](https://github.com/azat)).
* Followup fixes for FuseFunctionsPass [#43217](https://github.com/ClickHouse/ClickHouse/pull/43217) ([Vladimir C](https://github.com/vdimir)).
* Fix bug in replication queue which can lead to premature mutation finish [#43231](https://github.com/ClickHouse/ClickHouse/pull/43231) ([alesapin](https://github.com/alesapin)).
* Support `CREATE / ALTER / DROP NAMED COLLECTION` queries under according access types [#43252](https://github.com/ClickHouse/ClickHouse/pull/43252) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix race in `IColumn::dumpStructure` [#43269](https://github.com/ClickHouse/ClickHouse/pull/43269) ([Anton Popov](https://github.com/CurtizJ)).
* Sanitize thirdparty libraries for public flags [#43275](https://github.com/ClickHouse/ClickHouse/pull/43275) ([Azat Khuzhin](https://github.com/azat)).
* stress: increase timeout for server waiting after TERM [#43277](https://github.com/ClickHouse/ClickHouse/pull/43277) ([Azat Khuzhin](https://github.com/azat)).
* Fix cloning of ASTIdentifier [#43282](https://github.com/ClickHouse/ClickHouse/pull/43282) ([Nikolay Degterinsky](https://github.com/evillique)).
* Fix race on write in `ReplicatedMergeTree` [#43289](https://github.com/ClickHouse/ClickHouse/pull/43289) ([Antonio Andelic](https://github.com/antonio2368)).
* Cancel lambda api url [#43295](https://github.com/ClickHouse/ClickHouse/pull/43295) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fixed: Typo [#43312](https://github.com/ClickHouse/ClickHouse/pull/43312) ([Raevsky Rudolf](https://github.com/lanesket)).
* Analyzer small fixes [#43321](https://github.com/ClickHouse/ClickHouse/pull/43321) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix: make test_read_only_table more stable [#43326](https://github.com/ClickHouse/ClickHouse/pull/43326) ([Igor Nikonov](https://github.com/devcrafter)).
* Make insertRangeFrom() more exception safe [#43338](https://github.com/ClickHouse/ClickHouse/pull/43338) ([Azat Khuzhin](https://github.com/azat)).
* Analyzer added indexes support [#43341](https://github.com/ClickHouse/ClickHouse/pull/43341) ([Maksim Kita](https://github.com/kitaisreal)).
* Allow to "drop tables" from s3_plain disk (so as from web disk) [#43343](https://github.com/ClickHouse/ClickHouse/pull/43343) ([Azat Khuzhin](https://github.com/azat)).
* Add --max-consecutive-errors for clickhouse-benchmark [#43344](https://github.com/ClickHouse/ClickHouse/pull/43344) ([Azat Khuzhin](https://github.com/azat)).
* Add [#43072](https://github.com/ClickHouse/ClickHouse/issues/43072) [#43345](https://github.com/ClickHouse/ClickHouse/pull/43345) ([Nikita Taranov](https://github.com/nickitat)).
* Suggest users installation troubleshooting [#43346](https://github.com/ClickHouse/ClickHouse/pull/43346) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Update version_date.tsv and changelogs after v22.11.1.1360-stable [#43349](https://github.com/ClickHouse/ClickHouse/pull/43349) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Provide full stacktrace in case of uncaught exception during server startup [#43364](https://github.com/ClickHouse/ClickHouse/pull/43364) ([Azat Khuzhin](https://github.com/azat)).
* Update SECURITY.md on new stable tags [#43365](https://github.com/ClickHouse/ClickHouse/pull/43365) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Splitting checks in CI more [#43373](https://github.com/ClickHouse/ClickHouse/pull/43373) ([alesapin](https://github.com/alesapin)).
* Update version_date.tsv and changelogs after v22.8.9.24-lts [#43393](https://github.com/ClickHouse/ClickHouse/pull/43393) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Fix mess with signed sizes in SingleValueDataString [#43401](https://github.com/ClickHouse/ClickHouse/pull/43401) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Add a comment [#43403](https://github.com/ClickHouse/ClickHouse/pull/43403) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Avoid race condition for updating system.distribution_queue values [#43406](https://github.com/ClickHouse/ClickHouse/pull/43406) ([Azat Khuzhin](https://github.com/azat)).
* Fix flaky 01926_order_by_desc_limit [#43408](https://github.com/ClickHouse/ClickHouse/pull/43408) ([Azat Khuzhin](https://github.com/azat)).
* Fix possible heap-use-after-free in local if history file cannot be created [#43409](https://github.com/ClickHouse/ClickHouse/pull/43409) ([Azat Khuzhin](https://github.com/azat)).
* Fix flaky test [#43435](https://github.com/ClickHouse/ClickHouse/pull/43435) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix backward compatibility check [#43436](https://github.com/ClickHouse/ClickHouse/pull/43436) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix typo [#43446](https://github.com/ClickHouse/ClickHouse/pull/43446) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove noise from logs about NetLink in Docker [#43447](https://github.com/ClickHouse/ClickHouse/pull/43447) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Modify test slightly [#43448](https://github.com/ClickHouse/ClickHouse/pull/43448) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Set run_passes to 1 by default [#43451](https://github.com/ClickHouse/ClickHouse/pull/43451) ([Dmitry Novik](https://github.com/novikd)).
* Do not reuse jemalloc memory in test_global_overcommit [#43453](https://github.com/ClickHouse/ClickHouse/pull/43453) ([Dmitry Novik](https://github.com/novikd)).
* Fix createTableSharedID again [#43458](https://github.com/ClickHouse/ClickHouse/pull/43458) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Use smaller buffer for small files [#43460](https://github.com/ClickHouse/ClickHouse/pull/43460) ([Alexander Gololobov](https://github.com/davenger)).
* Merging [#42064](https://github.com/ClickHouse/ClickHouse/issues/42064) [#43461](https://github.com/ClickHouse/ClickHouse/pull/43461) ([Anton Popov](https://github.com/CurtizJ)).
* Use all parameters with prefixes from ssm [#43467](https://github.com/ClickHouse/ClickHouse/pull/43467) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Avoid possible DROP hung due to attached web disk [#43489](https://github.com/ClickHouse/ClickHouse/pull/43489) ([Azat Khuzhin](https://github.com/azat)).
* Improve fuzzy search in clickhouse-client/clickhouse-local [#43498](https://github.com/ClickHouse/ClickHouse/pull/43498) ([Azat Khuzhin](https://github.com/azat)).
* check ast limits for create_parser_fuzzer [#43504](https://github.com/ClickHouse/ClickHouse/pull/43504) ([Sema Checherinda](https://github.com/CheSema)).
* Add another test for SingleDataValueString [#43514](https://github.com/ClickHouse/ClickHouse/pull/43514) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Move password reset message from client to server [#43517](https://github.com/ClickHouse/ClickHouse/pull/43517) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Sync everything to persistent storage to avoid writeback affects perf tests [#43530](https://github.com/ClickHouse/ClickHouse/pull/43530) ([Azat Khuzhin](https://github.com/azat)).
* bump lib for diag [#43538](https://github.com/ClickHouse/ClickHouse/pull/43538) ([Dale McDiarmid](https://github.com/gingerwizard)).
* Temporarily disable `test_hive_query` [#43542](https://github.com/ClickHouse/ClickHouse/pull/43542) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Analyzer SumIfToCountIfPass fix [#43543](https://github.com/ClickHouse/ClickHouse/pull/43543) ([Maksim Kita](https://github.com/kitaisreal)).
* Analyzer UniqInjectiveFunctionsEliminationPass [#43547](https://github.com/ClickHouse/ClickHouse/pull/43547) ([Maksim Kita](https://github.com/kitaisreal)).
* Disable broken 00176_bson_parallel_parsing [#43550](https://github.com/ClickHouse/ClickHouse/pull/43550) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Add benchmark for query interpretation with JOINs [#43556](https://github.com/ClickHouse/ClickHouse/pull/43556) ([Raúl Marín](https://github.com/Algunenano)).
* Analyzer table functions untuple fix [#43572](https://github.com/ClickHouse/ClickHouse/pull/43572) ([Maksim Kita](https://github.com/kitaisreal)).
* Prepare CI for universal runners preallocated pool [#43579](https://github.com/ClickHouse/ClickHouse/pull/43579) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Iterate list without index-based access [#43584](https://github.com/ClickHouse/ClickHouse/pull/43584) ([Alexander Gololobov](https://github.com/davenger)).
* Remove code that I do not understand [#43593](https://github.com/ClickHouse/ClickHouse/pull/43593) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Add table_uuid to system.parts (resubmit) [#43595](https://github.com/ClickHouse/ClickHouse/pull/43595) ([Azat Khuzhin](https://github.com/azat)).
* Move perf tests for Aarch64 from PRs to master [#43623](https://github.com/ClickHouse/ClickHouse/pull/43623) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix flaky 01175_distributed_ddl_output_mode_long [#43626](https://github.com/ClickHouse/ClickHouse/pull/43626) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Speedup backup config loading [#43627](https://github.com/ClickHouse/ClickHouse/pull/43627) ([Alexander Gololobov](https://github.com/davenger)).
* Fix [#43478](https://github.com/ClickHouse/ClickHouse/issues/43478) [#43636](https://github.com/ClickHouse/ClickHouse/pull/43636) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Do not checkout submodules recursively [#43637](https://github.com/ClickHouse/ClickHouse/pull/43637) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Optimize binary-builder size [#43654](https://github.com/ClickHouse/ClickHouse/pull/43654) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix flaky `KeeperMap` integration tests [#43658](https://github.com/ClickHouse/ClickHouse/pull/43658) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix data race in `Keeper` snapshot [#43663](https://github.com/ClickHouse/ClickHouse/pull/43663) ([Antonio Andelic](https://github.com/antonio2368)).
* Use docker images cache from merged PRs in master and release branches [#43664](https://github.com/ClickHouse/ClickHouse/pull/43664) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Update AsynchronousReadIndirectBufferFromRemoteFS.cpp [#43667](https://github.com/ClickHouse/ClickHouse/pull/43667) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix pagination issue in GITHUB_JOB_ID() [#43681](https://github.com/ClickHouse/ClickHouse/pull/43681) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Try fix flaky test 00176_bson_parallel_parsing [#43696](https://github.com/ClickHouse/ClickHouse/pull/43696) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix log messages in clickhouse-copier [#43707](https://github.com/ClickHouse/ClickHouse/pull/43707) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* try to remove clickhouse if already exists [#43728](https://github.com/ClickHouse/ClickHouse/pull/43728) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Fix 43622 [#43731](https://github.com/ClickHouse/ClickHouse/pull/43731) ([Amos Bird](https://github.com/amosbird)).
* Fix example of colored prompt in client [#43738](https://github.com/ClickHouse/ClickHouse/pull/43738) ([Azat Khuzhin](https://github.com/azat)).
* Minor fixes in annoy index documentation [#43743](https://github.com/ClickHouse/ClickHouse/pull/43743) ([Robert Schulze](https://github.com/rschu1ze)).
* Terminate lost runners [#43756](https://github.com/ClickHouse/ClickHouse/pull/43756) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Update README.md [#43759](https://github.com/ClickHouse/ClickHouse/pull/43759) ([Tyler Hannan](https://github.com/tylerhannan)).
* Fix included_elements calculation in AggregateFunctionNullVariadic [#43763](https://github.com/ClickHouse/ClickHouse/pull/43763) ([Dmitry Novik](https://github.com/novikd)).
* Migrate runner_token_rotation_lambda to zip-package deployment [#43766](https://github.com/ClickHouse/ClickHouse/pull/43766) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Analyzer compound expression crash fix [#43768](https://github.com/ClickHouse/ClickHouse/pull/43768) ([Maksim Kita](https://github.com/kitaisreal)).
* Migrate termination lambda to zip-package [#43769](https://github.com/ClickHouse/ClickHouse/pull/43769) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix flaky `test_store_cleanup` [#43770](https://github.com/ClickHouse/ClickHouse/pull/43770) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Attempt to fix StyleCheck condition [#43773](https://github.com/ClickHouse/ClickHouse/pull/43773) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Rerun PullRequestCI on changed description body [#43777](https://github.com/ClickHouse/ClickHouse/pull/43777) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Yet another fix for AggregateFunctionMinMaxAny [#43778](https://github.com/ClickHouse/ClickHouse/pull/43778) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Add google benchmark to contrib [#43779](https://github.com/ClickHouse/ClickHouse/pull/43779) ([Nikita Taranov](https://github.com/nickitat)).
* Fix EN doc as in [#43765](https://github.com/ClickHouse/ClickHouse/issues/43765) [#43780](https://github.com/ClickHouse/ClickHouse/pull/43780) ([Alexander Gololobov](https://github.com/davenger)).
* Detach threads from thread group [#43781](https://github.com/ClickHouse/ClickHouse/pull/43781) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Try making `test_keeper_zookeeper_converter` less flaky [#43789](https://github.com/ClickHouse/ClickHouse/pull/43789) ([Antonio Andelic](https://github.com/antonio2368)).
* Polish UDF substitution visitor [#43790](https://github.com/ClickHouse/ClickHouse/pull/43790) ([Antonio Andelic](https://github.com/antonio2368)).
* Analyzer ConstantNode refactoring [#43793](https://github.com/ClickHouse/ClickHouse/pull/43793) ([Maksim Kita](https://github.com/kitaisreal)).
* Update Poco [#43802](https://github.com/ClickHouse/ClickHouse/pull/43802) ([Alexander Gololobov](https://github.com/davenger)).
* Add another BC check suppression [#43810](https://github.com/ClickHouse/ClickHouse/pull/43810) ([Alexander Tokmakov](https://github.com/tavplubix)).
* tests: fix 01676_long_clickhouse_client_autocomplete flakiness [#43819](https://github.com/ClickHouse/ClickHouse/pull/43819) ([Azat Khuzhin](https://github.com/azat)).
* Use disk operation to serialize and deserialize meta files of StorageFilelog [#43826](https://github.com/ClickHouse/ClickHouse/pull/43826) ([flynn](https://github.com/ucasfl)).
* Add constexpr [#43827](https://github.com/ClickHouse/ClickHouse/pull/43827) ([zhanglistar](https://github.com/zhanglistar)).
* Do not postpone removal of in-memory tables [#43833](https://github.com/ClickHouse/ClickHouse/pull/43833) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Increase some logging level for keeper client. [#43835](https://github.com/ClickHouse/ClickHouse/pull/43835) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* FuseFunctionsPass small fix [#43837](https://github.com/ClickHouse/ClickHouse/pull/43837) ([Maksim Kita](https://github.com/kitaisreal)).
* Followup fixes for XML helpers [#43845](https://github.com/ClickHouse/ClickHouse/pull/43845) ([Alexander Gololobov](https://github.com/davenger)).
* Hold ProcessListEntry a bit longer in case of exception from Interpreter [#43847](https://github.com/ClickHouse/ClickHouse/pull/43847) ([Alexander Tokmakov](https://github.com/tavplubix)).
* A little improve performance of PODArray [#43860](https://github.com/ClickHouse/ClickHouse/pull/43860) ([zhanglistar](https://github.com/zhanglistar)).
* Change email for robot-clickhouse to immutable one [#43861](https://github.com/ClickHouse/ClickHouse/pull/43861) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Rerun DocsCheck on edited PR description [#43862](https://github.com/ClickHouse/ClickHouse/pull/43862) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Temporarily disable misc-* slow clang-tidy checks [#43863](https://github.com/ClickHouse/ClickHouse/pull/43863) ([Robert Schulze](https://github.com/rschu1ze)).
* do not leave tmp part on disk, do not go to the keeper for remove it [#43866](https://github.com/ClickHouse/ClickHouse/pull/43866) ([Sema Checherinda](https://github.com/CheSema)).
* do not read part status just for logging [#43868](https://github.com/ClickHouse/ClickHouse/pull/43868) ([Sema Checherinda](https://github.com/CheSema)).
* Analyzer Context refactoring [#43884](https://github.com/ClickHouse/ClickHouse/pull/43884) ([Maksim Kita](https://github.com/kitaisreal)).
* Analyzer CTE resolution fix [#43893](https://github.com/ClickHouse/ClickHouse/pull/43893) ([Maksim Kita](https://github.com/kitaisreal)).
* Improve release script [#43894](https://github.com/ClickHouse/ClickHouse/pull/43894) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Use only PRs to our repository in pr_info on push [#43895](https://github.com/ClickHouse/ClickHouse/pull/43895) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Join engine works with analyzer [#43897](https://github.com/ClickHouse/ClickHouse/pull/43897) ([Vladimir C](https://github.com/vdimir)).
* Fix reports [#43904](https://github.com/ClickHouse/ClickHouse/pull/43904) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix vim settings (and make it compatible with neovim) [#43909](https://github.com/ClickHouse/ClickHouse/pull/43909) ([Azat Khuzhin](https://github.com/azat)).
* Fix clang tidy errors introduced in [#43834](https://github.com/ClickHouse/ClickHouse/issues/43834) [#43911](https://github.com/ClickHouse/ClickHouse/pull/43911) ([Nikita Taranov](https://github.com/nickitat)).
* Fix BACKUP TO S3 for Google Cloud Storage [#43940](https://github.com/ClickHouse/ClickHouse/pull/43940) ([Azat Khuzhin](https://github.com/azat)).
* Fix tags workflow [#43942](https://github.com/ClickHouse/ClickHouse/pull/43942) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Generate missed changelogs for latest releases [#43944](https://github.com/ClickHouse/ClickHouse/pull/43944) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix typo in tests/ci/bugfix_validate_check.py [#43973](https://github.com/ClickHouse/ClickHouse/pull/43973) ([Vladimir C](https://github.com/vdimir)).
* Remove test logging of signal "EINTR" [#44001](https://github.com/ClickHouse/ClickHouse/pull/44001) ([Kruglov Pavel](https://github.com/Avogar)).
* Some cleanup of isDeterministic(InScopeOfQuery)() [#44011](https://github.com/ClickHouse/ClickHouse/pull/44011) ([Robert Schulze](https://github.com/rschu1ze)).
* Try to keep runners alive for longer [#44015](https://github.com/ClickHouse/ClickHouse/pull/44015) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix relaxed "too many parts" threshold [#44021](https://github.com/ClickHouse/ClickHouse/pull/44021) ([Sergei Trifonov](https://github.com/serxa)).
* Correct CompressionCodecGorilla exception message [#44023](https://github.com/ClickHouse/ClickHouse/pull/44023) ([Duc Canh Le](https://github.com/canhld94)).
* Fix exception message [#44034](https://github.com/ClickHouse/ClickHouse/pull/44034) ([Nikolay Degterinsky](https://github.com/evillique)).
* Update version_date.tsv and changelogs after v22.8.11.15-lts [#44035](https://github.com/ClickHouse/ClickHouse/pull/44035) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* do not hardlink serialization.json in new part [#44036](https://github.com/ClickHouse/ClickHouse/pull/44036) ([Sema Checherinda](https://github.com/CheSema)).
* Fix tracing of profile events [#44045](https://github.com/ClickHouse/ClickHouse/pull/44045) ([Anton Popov](https://github.com/CurtizJ)).
* Slightly better clickhouse disks and remove DiskMemory [#44050](https://github.com/ClickHouse/ClickHouse/pull/44050) ([alesapin](https://github.com/alesapin)).
* Assign release PRs [#44055](https://github.com/ClickHouse/ClickHouse/pull/44055) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Merging [#36877](https://github.com/ClickHouse/ClickHouse/issues/36877) [#44059](https://github.com/ClickHouse/ClickHouse/pull/44059) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* add changelogs [#44061](https://github.com/ClickHouse/ClickHouse/pull/44061) ([Dan Roscigno](https://github.com/DanRoscigno)).
* Fix the CACHE_PATH creation for default value [#44079](https://github.com/ClickHouse/ClickHouse/pull/44079) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix aspell [#44090](https://github.com/ClickHouse/ClickHouse/pull/44090) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix another bug in AggregateFunctionMinMaxAny [#44091](https://github.com/ClickHouse/ClickHouse/pull/44091) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Analyzer aggregate function lambda crash fix [#44098](https://github.com/ClickHouse/ClickHouse/pull/44098) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix -Wshorten-64-to-32 on FreeBSD and enable -Werror [#44121](https://github.com/ClickHouse/ClickHouse/pull/44121) ([Azat Khuzhin](https://github.com/azat)).
* Fix flaky test `02497_trace_events_stress_long` [#44124](https://github.com/ClickHouse/ClickHouse/pull/44124) ([Anton Popov](https://github.com/CurtizJ)).
* Minor file renaming [#44125](https://github.com/ClickHouse/ClickHouse/pull/44125) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix typo [#44127](https://github.com/ClickHouse/ClickHouse/pull/44127) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Better descriptions of signals [#44129](https://github.com/ClickHouse/ClickHouse/pull/44129) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* make calls to be sure that parts are deleted [#44156](https://github.com/ClickHouse/ClickHouse/pull/44156) ([Sema Checherinda](https://github.com/CheSema)).
* Ignore "session expired" errors after BC check [#44157](https://github.com/ClickHouse/ClickHouse/pull/44157) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix incorrect assertion [#44160](https://github.com/ClickHouse/ClickHouse/pull/44160) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Close GRPC channels in tests [#44184](https://github.com/ClickHouse/ClickHouse/pull/44184) ([Antonio Andelic](https://github.com/antonio2368)).
* Remove misleading message from logs [#44190](https://github.com/ClickHouse/ClickHouse/pull/44190) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Minor clang-tidy fixes in fromUnixTimestamp64() [#44194](https://github.com/ClickHouse/ClickHouse/pull/44194) ([Igor Nikonov](https://github.com/devcrafter)).
* Hotfix for "check_status.tsv doesn't exists" in stress tests [#44197](https://github.com/ClickHouse/ClickHouse/pull/44197) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix documentation after [#42438](https://github.com/ClickHouse/ClickHouse/issues/42438) [#44200](https://github.com/ClickHouse/ClickHouse/pull/44200) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix an assertion in transactions [#44202](https://github.com/ClickHouse/ClickHouse/pull/44202) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Add log message [#44237](https://github.com/ClickHouse/ClickHouse/pull/44237) ([Alexander Tokmakov](https://github.com/tavplubix)).

View File

@ -0,0 +1,37 @@
---
sidebar_position: 1
sidebar_label: 2022
---
# 2022 Changelog
### ClickHouse release v22.9.7.34-stable (613fe09ca2e) FIXME as compared to v22.9.6.20-stable (ef6343f9579)
#### Bug Fix
* Backported in [#43099](https://github.com/ClickHouse/ClickHouse/issues/43099): Updated normaliser to clone the alias ast. resolves [#42452](https://github.com/ClickHouse/ClickHouse/issues/42452) Implementation: * Updated QueryNormalizer to clone alias ast, when its replaced. Previously just assigning the same leads to exception in LogicalExpressinsOptimizer as it would be the same parent being inserted again. * This bug is not seen with new analyser (allow_experimental_analyzer), so no changes for it. I added a test for the same. [#42827](https://github.com/ClickHouse/ClickHouse/pull/42827) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
#### Build/Testing/Packaging Improvement
* Backported in [#44111](https://github.com/ClickHouse/ClickHouse/issues/44111): Bring sha512 sums back to the building step. [#44017](https://github.com/ClickHouse/ClickHouse/pull/44017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Backported in [#43612](https://github.com/ClickHouse/ClickHouse/issues/43612): Fix bad inefficiency of `remote_filesystem_read_method=read` with filesystem cache. Closes [#42125](https://github.com/ClickHouse/ClickHouse/issues/42125). [#42129](https://github.com/ClickHouse/ClickHouse/pull/42129) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#43526](https://github.com/ClickHouse/ClickHouse/issues/43526): Fix incorrect UserTimeMicroseconds/SystemTimeMicroseconds accounting. [#42791](https://github.com/ClickHouse/ClickHouse/pull/42791) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#43518](https://github.com/ClickHouse/ClickHouse/issues/43518): Fix rare possible hung on query cancellation. [#42874](https://github.com/ClickHouse/ClickHouse/pull/42874) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#43752](https://github.com/ClickHouse/ClickHouse/issues/43752): An issue with the following exception has been reported while trying to read a Parquet file from S3 into ClickHouse:. [#43297](https://github.com/ClickHouse/ClickHouse/pull/43297) ([Arthur Passos](https://github.com/arthurpassos)).
* Backported in [#43618](https://github.com/ClickHouse/ClickHouse/issues/43618): Fix sumMap() for Nullable(Decimal()). [#43414](https://github.com/ClickHouse/ClickHouse/pull/43414) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#43887](https://github.com/ClickHouse/ClickHouse/issues/43887): Fixed `ALTER ... RESET SETTING` with `ON CLUSTER`. It could be applied to one replica only. Fixes [#43843](https://github.com/ClickHouse/ClickHouse/issues/43843). [#43848](https://github.com/ClickHouse/ClickHouse/pull/43848) ([Elena Torró](https://github.com/elenatorro)).
* Backported in [#44145](https://github.com/ClickHouse/ClickHouse/issues/44145): Fix undefined behavior in the `quantiles` function, which might lead to uninitialized memory. Found by fuzzer. This closes [#44066](https://github.com/ClickHouse/ClickHouse/issues/44066). [#44067](https://github.com/ClickHouse/ClickHouse/pull/44067) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### NO CL ENTRY
* NO CL ENTRY: 'Fix multipart upload for large S3 object, backport to 22.9'. [#44219](https://github.com/ClickHouse/ClickHouse/pull/44219) ([ianton-ru](https://github.com/ianton-ru)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Yet another fix for AggregateFunctionMinMaxAny [#43778](https://github.com/ClickHouse/ClickHouse/pull/43778) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Use only PRs to our repository in pr_info on push [#43895](https://github.com/ClickHouse/ClickHouse/pull/43895) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix tags workflow [#43942](https://github.com/ClickHouse/ClickHouse/pull/43942) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Assign release PRs [#44055](https://github.com/ClickHouse/ClickHouse/pull/44055) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix another bug in AggregateFunctionMinMaxAny [#44091](https://github.com/ClickHouse/ClickHouse/pull/44091) ([Alexander Tokmakov](https://github.com/tavplubix)).

View File

@ -92,7 +92,7 @@ Code: 452, e.displayText() = DB::Exception: Setting force_index_by_date should n
**Note:** the `default` profile has special handling: all the constraints defined for the `default` profile become the default constraints, so they restrict all the users until theyre overridden explicitly for these users.
## Constraints on Merge Tree Settings
It is possible to set constraints for [merge tree settings](merge-tree-settings.md). There constraints are applied when table with merge tree engine is created or its storage settings are altered. Name of merge tree setting must be prepended by `merge_tree_` prefix when referenced in `<constraint>` section.
It is possible to set constraints for [merge tree settings](merge-tree-settings.md). These constraints are applied when table with merge tree engine is created or its storage settings are altered. Name of merge tree setting must be prepended by `merge_tree_` prefix when referenced in `<constraints>` section.
**Example:** Forbid to create new tables with explicitly specified `storage_policy`

View File

@ -3588,6 +3588,24 @@ y Nullable(String)
z IPv4
```
## input_format_try_infer_integers {#input_format_try_infer_integers}
If enabled, ClickHouse will try to infer integers instead of floats in schema inference for text formats. If all numbers in the column from input data are integers, the result type will be `Int64`, if at least one number is float, the result type will be `Float64`.
Enabled by default.
## input_format_try_infer_dates {#input_format_try_infer_dates}
If enabled, ClickHouse will try to infer type `Date` from string fields in schema inference for text formats. If all fields from a column in input data were successfully parsed as dates, the result type will be `Date`, if at least one field was not parsed as date, the result type will be `String`.
Enabled by default.
## input_format_try_infer_datetimes {#input_format_try_infer_datetimes}
If enabled, ClickHouse will try to infer type `DateTime64` from string fields in schema inference for text formats. If all fields from a column in input data were successfully parsed as datetimes, the result type will be `DateTime64`, if at least one field was not parsed as datetime, the result type will be `String`.
Enabled by default.
## date_time_input_format {#date_time_input_format}
Allows choosing a parser of the text representation of date and time.

View File

@ -607,3 +607,7 @@ dictGetString('prefix', 'asn', tuple(IPv6StringToNum('2001:db8::1')))
Other types are not supported yet. The function returns the attribute for the prefix that corresponds to this IP address. If there are overlapping prefixes, the most specific one is returned.
Data must completely fit into RAM.
## Related Content
- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse)

View File

@ -136,3 +136,7 @@ or
SOURCE(CLICKHOUSE(... update_field 'added_time' update_lag 15))
...
```
## Related Content
- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse)

View File

@ -824,3 +824,7 @@ Setting fields:
:::note
The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared.
:::
## Related Content
- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse)

View File

@ -176,3 +176,6 @@ Configuration fields:
- [Functions for working with dictionaries](../../../sql-reference/functions/ext-dict-functions.md).
## Related Content
- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse)

View File

@ -52,3 +52,6 @@ LIFETIME(...) -- Lifetime of dictionary in memory
- [structure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md) — Structure of the dictionary . A key and attributes that can be retrieved by this key.
- [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) — Frequency of dictionary updates.
## Related Content
- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse)

View File

@ -79,3 +79,6 @@ You can convert values for a small dictionary by describing it in a `SELECT` que
- [Dictionary Key and Fields](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md)
- [Functions for Working with Dictionaries](../../../sql-reference/functions/ext-dict-functions.md)
## Related Content
- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse)

View File

@ -296,3 +296,7 @@ Another example is the `hostName` function, which returns the name of the server
If a function in a query is performed on the requestor server, but you need to perform it on remote servers, you can wrap it in an any aggregate function or add it to a key in `GROUP BY`.
## Related Content
- [User-defined functions in ClickHouse Cloud](https://clickhouse.com/blog/user-defined-functions-clickhouse-udfs)

View File

@ -58,3 +58,7 @@ Result:
│ 2 │ even │
└────────┴──────────────────────────────────────┘
```
## Related Content
- [User-defined functions in ClickHouse Cloud](https://clickhouse.com/blog/user-defined-functions-clickhouse-udfs)

View File

@ -37,7 +37,7 @@
#include <AggregateFunctions/registerAggregateFunctions.h>
#include <TableFunctions/registerTableFunctions.h>
#include <Storages/registerStorages.h>
#include <Storages/NamedCollectionUtils.h>
#include <Storages/NamedCollections/NamedCollectionUtils.h>
#include <Dictionaries/registerDictionaries.h>
#include <Disks/registerDisks.h>
#include <Formats/registerFormats.h>

View File

@ -60,7 +60,7 @@
#include <Storages/System/attachInformationSchemaTables.h>
#include <Storages/Cache/ExternalDataSourceCache.h>
#include <Storages/Cache/registerRemoteFileMetadatas.h>
#include <Storages/NamedCollectionUtils.h>
#include <Storages/NamedCollections/NamedCollectionUtils.h>
#include <AggregateFunctions/registerAggregateFunctions.h>
#include <Functions/UserDefined/IUserDefinedSQLObjectsLoader.h>
#include <Functions/registerFunctions.h>

View File

@ -86,6 +86,49 @@ void SettingsConstraints::merge(const SettingsConstraints & other)
}
void SettingsConstraints::check(const Settings & current_settings, const SettingsProfileElements & profile_elements) const
{
for (const auto & element : profile_elements)
{
if (SettingsProfileElements::isAllowBackupSetting(element.setting_name))
continue;
if (!element.value.isNull())
{
SettingChange value(element.setting_name, element.value);
check(current_settings, value);
}
if (!element.min_value.isNull())
{
SettingChange value(element.setting_name, element.min_value);
check(current_settings, value);
}
if (!element.max_value.isNull())
{
SettingChange value(element.setting_name, element.max_value);
check(current_settings, value);
}
SettingConstraintWritability new_value = SettingConstraintWritability::WRITABLE;
SettingConstraintWritability old_value = SettingConstraintWritability::WRITABLE;
if (element.writability)
new_value = *element.writability;
auto it = constraints.find(element.setting_name);
if (it != constraints.end())
old_value = it->second.writability;
if (new_value != old_value)
{
if (old_value == SettingConstraintWritability::CONST)
throw Exception("Setting " + element.setting_name + " should not be changed", ErrorCodes::SETTING_CONSTRAINT_VIOLATION);
}
}
}
void SettingsConstraints::check(const Settings & current_settings, const SettingChange & change) const
{
checkImpl(current_settings, const_cast<SettingChange &>(change), THROW_ON_VIOLATION);

View File

@ -73,6 +73,7 @@ public:
void merge(const SettingsConstraints & other);
/// Checks whether `change` violates these constraints and throws an exception if so.
void check(const Settings & current_settings, const SettingsProfileElements & profile_elements) const;
void check(const Settings & current_settings, const SettingChange & change) const;
void check(const Settings & current_settings, const SettingsChanges & changes) const;
void check(const Settings & current_settings, SettingsChanges & changes) const;

View File

@ -248,4 +248,9 @@ bool SettingsProfileElements::isBackupAllowed() const
return true;
}
bool SettingsProfileElements::isAllowBackupSetting(const String & setting_name)
{
return setting_name == ALLOW_BACKUP_SETTING_NAME;
}
}

View File

@ -71,6 +71,8 @@ public:
std::vector<UUID> toProfileIDs() const;
bool isBackupAllowed() const;
static bool isAllowBackupSetting(const String & setting_name);
};
}

View File

@ -0,0 +1,647 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <Common/HashTable/HashMap.h>
#include <Common/SymbolIndex.h>
#include <Common/ArenaAllocator.h>
#include <Core/Settings.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/WriteHelpers.h>
#include <IO/Operators.h>
#include <filesystem>
namespace DB
{
namespace ErrorCodes
{
extern const int FUNCTION_NOT_ALLOWED;
extern const int NOT_IMPLEMENTED;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
struct AggregateFunctionFlameGraphTree
{
struct ListNode;
struct TreeNode
{
TreeNode * parent = nullptr;
ListNode * children = nullptr;
UInt64 ptr = 0;
size_t allocated = 0;
};
struct ListNode
{
ListNode * next = nullptr;
TreeNode * child = nullptr;
};
TreeNode root;
static ListNode * createChild(TreeNode * parent, UInt64 ptr, Arena * arena)
{
ListNode * list_node = reinterpret_cast<ListNode *>(arena->alloc(sizeof(ListNode)));
TreeNode * tree_node = reinterpret_cast<TreeNode *>(arena->alloc(sizeof(TreeNode)));
list_node->child = tree_node;
list_node->next = nullptr;
tree_node->parent =parent;
tree_node->children = nullptr;
tree_node->ptr = ptr;
tree_node->allocated = 0;
return list_node;
}
TreeNode * find(const UInt64 * stack, size_t stack_size, Arena * arena)
{
TreeNode * node = &root;
for (size_t i = 0; i < stack_size; ++i)
{
UInt64 ptr = stack[i];
if (ptr == 0)
break;
if (!node->children)
{
node->children = createChild(node, ptr, arena);
node = node->children->child;
}
else
{
ListNode * list = node->children;
while (list->child->ptr != ptr && list->next)
list = list->next;
if (list->child->ptr != ptr)
{
list->next = createChild(node, ptr, arena);
list = list->next;
}
node = list->child;
}
}
return node;
}
static void append(DB::PaddedPODArray<UInt64> & values, DB::PaddedPODArray<UInt64> & offsets, std::vector<UInt64> & frame)
{
UInt64 prev = offsets.empty() ? 0 : offsets.back();
offsets.push_back(prev + frame.size());
for (UInt64 val : frame)
values.push_back(val);
}
struct Trace
{
using Frames = std::vector<UInt64>;
Frames frames;
/// The total number of bytes allocated for traces with the same prefix.
size_t allocated_total = 0;
/// This counter is relevant in case we want to filter some traces with small amount of bytes.
/// It shows the total number of bytes for *filtered* traces with the same prefix.
/// This is the value which is used in flamegraph.
size_t allocated_self = 0;
};
using Traces = std::vector<Trace>;
Traces dump(size_t max_depth, size_t min_bytes) const
{
Traces traces;
Trace::Frames frames;
std::vector<size_t> allocated_total;
std::vector<size_t> allocated_self;
std::vector<ListNode *> nodes;
nodes.push_back(root.children);
allocated_total.push_back(root.allocated);
allocated_self.push_back(root.allocated);
while (!nodes.empty())
{
if (nodes.back() == nullptr)
{
traces.push_back({frames, allocated_total.back(), allocated_self.back()});
nodes.pop_back();
allocated_total.pop_back();
allocated_self.pop_back();
/// We don't have root's frame so framers are empty in the end.
if (!frames.empty())
frames.pop_back();
continue;
}
TreeNode * current = nodes.back()->child;
nodes.back() = nodes.back()->next;
bool enough_bytes = current->allocated >= min_bytes;
bool enough_depth = max_depth == 0 || nodes.size() < max_depth;
if (enough_bytes)
{
frames.push_back(current->ptr);
allocated_self.back() -= current->allocated;
if (enough_depth)
{
allocated_total.push_back(current->allocated);
allocated_self.push_back(current->allocated);
nodes.push_back(current->children);
}
else
{
traces.push_back({frames, current->allocated, current->allocated});
frames.pop_back();
}
}
}
return traces;
}
};
static void insertData(DB::PaddedPODArray<UInt8> & chars, DB::PaddedPODArray<UInt64> & offsets, const char * pos, size_t length)
{
const size_t old_size = chars.size();
const size_t new_size = old_size + length + 1;
chars.resize(new_size);
if (length)
memcpy(chars.data() + old_size, pos, length);
chars[old_size + length] = 0;
offsets.push_back(new_size);
}
/// Split str by line feed and write as separate row to ColumnString.
static void fillColumn(DB::PaddedPODArray<UInt8> & chars, DB::PaddedPODArray<UInt64> & offsets, const std::string & str)
{
size_t start = 0;
size_t end = 0;
size_t size = str.size();
while (end < size)
{
if (str[end] == '\n')
{
insertData(chars, offsets, str.data() + start, end - start);
start = end + 1;
}
++end;
}
if (start < end)
insertData(chars, offsets, str.data() + start, end - start);
}
void dumpFlameGraph(
const AggregateFunctionFlameGraphTree::Traces & traces,
DB::PaddedPODArray<UInt8> & chars,
DB::PaddedPODArray<UInt64> & offsets)
{
DB::WriteBufferFromOwnString out;
std::unordered_map<uintptr_t, size_t> mapping;
#if defined(__ELF__) && !defined(OS_FREEBSD)
auto symbol_index_ptr = DB::SymbolIndex::instance();
const DB::SymbolIndex & symbol_index = *symbol_index_ptr;
#endif
for (const auto & trace : traces)
{
if (trace.allocated_self == 0)
continue;
for (size_t i = 0; i < trace.frames.size(); ++i)
{
if (i)
out << ";";
const void * ptr = reinterpret_cast<const void *>(trace.frames[i]);
#if defined(__ELF__) && !defined(OS_FREEBSD)
if (const auto * symbol = symbol_index.findSymbol(ptr))
writeString(demangle(symbol->name), out);
else
DB::writePointerHex(ptr, out);
#else
DB::writePointerHex(ptr, out);
#endif
}
out << ' ' << trace.allocated_self << "\n";
}
fillColumn(chars, offsets, out.str());
}
struct AggregateFunctionFlameGraphData
{
struct Entry
{
AggregateFunctionFlameGraphTree::TreeNode * trace;
UInt64 size;
Entry * next = nullptr;
};
struct Pair
{
Entry * allocation = nullptr;
Entry * deallocation = nullptr;
};
using Entries = HashMap<UInt64, Pair>;
AggregateFunctionFlameGraphTree tree;
Entries entries;
Entry * free_list = nullptr;
Entry * alloc(Arena * arena)
{
if (free_list)
{
auto * res = free_list;
free_list = free_list->next;
return res;
}
return reinterpret_cast<Entry *>(arena->alloc(sizeof(Entry)));
}
void release(Entry * entry)
{
entry->next = free_list;
free_list = entry;
}
static void track(Entry * allocation)
{
auto * node = allocation->trace;
while (node)
{
node->allocated += allocation->size;
node = node->parent;
}
}
static void untrack(Entry * allocation)
{
auto * node = allocation->trace;
while (node)
{
node->allocated -= allocation->size;
node = node->parent;
}
}
static Entry * tryFindMatchAndRemove(Entry *& list, UInt64 size)
{
if (!list)
return nullptr;
if (list->size == size)
{
Entry * entry = list;
list = list->next;
return entry;
}
else
{
Entry * parent = list;
while (parent->next && parent->next->size != size)
parent = parent->next;
if (parent->next && parent->next->size == size)
{
Entry * entry = parent->next;
parent->next = entry->next;
return entry;
}
return nullptr;
}
}
void add(UInt64 ptr, Int64 size, const UInt64 * stack, size_t stack_size, Arena * arena)
{
/// In case if argument is nullptr, only track allocations.
if (ptr == 0)
{
if (size > 0)
{
auto * node = tree.find(stack, stack_size, arena);
Entry entry{.trace = node, .size = UInt64(size)};
track(&entry);
}
return;
}
auto & place = entries[ptr];
if (size > 0)
{
if (auto * deallocation = tryFindMatchAndRemove(place.deallocation, size))
{
release(deallocation);
}
else
{
auto * node = tree.find(stack, stack_size, arena);
auto * allocation = alloc(arena);
allocation->size = UInt64(size);
allocation->trace = node;
track(allocation);
allocation->next = place.allocation;
place.allocation = allocation;
}
}
else if (size < 0)
{
UInt64 abs_size = -size;
if (auto * allocation = tryFindMatchAndRemove(place.allocation, abs_size))
{
untrack(allocation);
release(allocation);
}
else
{
auto * deallocation = alloc(arena);
deallocation->size = abs_size;
deallocation->next = place.deallocation;
place.deallocation = deallocation;
}
}
}
void merge(const AggregateFunctionFlameGraphTree & other_tree, Arena * arena)
{
AggregateFunctionFlameGraphTree::Trace::Frames frames;
std::vector<AggregateFunctionFlameGraphTree::ListNode *> nodes;
nodes.push_back(other_tree.root.children);
while (!nodes.empty())
{
if (nodes.back() == nullptr)
{
nodes.pop_back();
/// We don't have root's frame so framers are empty in the end.
if (!frames.empty())
frames.pop_back();
continue;
}
AggregateFunctionFlameGraphTree::TreeNode * current = nodes.back()->child;
nodes.back() = nodes.back()->next;
frames.push_back(current->ptr);
if (current->children)
nodes.push_back(current->children);
else
{
if (current->allocated)
add(0, current->allocated, frames.data(), frames.size(), arena);
frames.pop_back();
}
}
}
void merge(const AggregateFunctionFlameGraphData & other, Arena * arena)
{
AggregateFunctionFlameGraphTree::Trace::Frames frames;
for (const auto & entry : other.entries)
{
for (auto * allocation = entry.value.second.allocation; allocation; allocation = allocation->next)
{
frames.clear();
const auto * node = allocation->trace;
while (node->ptr)
{
frames.push_back(node->ptr);
node = node->parent;
}
std::reverse(frames.begin(), frames.end());
add(entry.value.first, allocation->size, frames.data(), frames.size(), arena);
untrack(allocation);
}
for (auto * deallocation = entry.value.second.deallocation; deallocation; deallocation = deallocation->next)
{
add(entry.value.first, -Int64(deallocation->size), nullptr, 0, arena);
}
}
merge(other.tree, arena);
}
void dumpFlameGraph(
DB::PaddedPODArray<UInt8> & chars,
DB::PaddedPODArray<UInt64> & offsets,
size_t max_depth, size_t min_bytes) const
{
DB::dumpFlameGraph(tree.dump(max_depth, min_bytes), chars, offsets);
}
};
/// Aggregate function which builds a flamegraph using the list of stacktraces.
/// The output is an array of strings which can be used by flamegraph.pl util.
/// See https://github.com/brendangregg/FlameGraph
///
/// Syntax: flameGraph(traces, [size = 1], [ptr = 0])
/// - trace : Array(UInt64), a stacktrace
/// - size : Int64, an allocation size (for memory profiling)
/// - ptr : UInt64, an allocation address
/// In case if ptr != 0, a flameGraph will map allocations (size > 0) and deallocations (size < 0) with the same size and ptr.
/// Only allocations which were not freed are shown. Not mapped deallocations are ignored.
///
/// Usage:
///
/// * Build a flamegraph based on CPU query profiler
/// set query_profiler_cpu_time_period_ns=10000000;
/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
/// clickhouse client --allow_introspection_functions=1
/// -q "select arrayJoin(flameGraph(arrayReverse(trace))) from system.trace_log where trace_type = 'CPU' and query_id = 'xxx'"
/// | ~/dev/FlameGraph/flamegraph.pl > flame_cpu.svg
///
/// * Build a flamegraph based on memory query profiler, showing all allocations
/// set memory_profiler_sample_probability=1, max_untracked_memory=1;
/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
/// clickhouse client --allow_introspection_functions=1
/// -q "select arrayJoin(flameGraph(trace, size)) from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx'"
/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem.svg
///
/// * Build a flamegraph based on memory query profiler, showing allocations which were not deallocated in query context
/// set memory_profiler_sample_probability=1, max_untracked_memory=1, use_uncompressed_cache=1, merge_tree_max_rows_to_use_cache=100000000000, merge_tree_max_bytes_to_use_cache=1000000000000;
/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
/// clickhouse client --allow_introspection_functions=1
/// -q "select arrayJoin(flameGraph(trace, size, ptr)) from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx'"
/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_untracked.svg
///
/// * Build a flamegraph based on memory query profiler, showing active allocations at the fixed point of time
/// set memory_profiler_sample_probability=1, max_untracked_memory=1;
/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
/// 1. Memory usage per second
/// select event_time, m, formatReadableSize(max(s) as m) from (select event_time, sum(size) over (order by event_time) as s from system.trace_log where query_id = 'xxx' and trace_type = 'MemorySample') group by event_time order by event_time;
/// 2. Find a time point with maximal memory usage
/// select argMax(event_time, s), max(s) from (select event_time, sum(size) over (order by event_time) as s from system.trace_log where query_id = 'xxx' and trace_type = 'MemorySample');
/// 3. Fix active allocations at fixed point of time
/// clickhouse client --allow_introspection_functions=1
/// -q "select arrayJoin(flameGraph(trace, size, ptr)) from (select * from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx' and event_time <= 'yyy' order by event_time)"
/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_time_point_pos.svg
/// 4. Find deallocations at fixed point of time
/// clickhouse client --allow_introspection_functions=1
/// -q "select arrayJoin(flameGraph(trace, -size, ptr)) from (select * from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx' and event_time > 'yyy' order by event_time desc)"
/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_time_point_neg.svg
class AggregateFunctionFlameGraph final : public IAggregateFunctionDataHelper<AggregateFunctionFlameGraphData, AggregateFunctionFlameGraph>
{
public:
explicit AggregateFunctionFlameGraph(const DataTypes & argument_types_)
: IAggregateFunctionDataHelper<AggregateFunctionFlameGraphData, AggregateFunctionFlameGraph>(argument_types_, {})
{}
String getName() const override { return "flameGraph"; }
DataTypePtr getReturnType() const override
{
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
}
bool allocatesMemoryInArena() const override { return true; }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
const auto * trace = typeid_cast<const ColumnArray *>(columns[0]);
const auto & trace_offsets = trace->getOffsets();
const auto & trace_values = typeid_cast<const ColumnUInt64 *>(&trace->getData())->getData();
UInt64 prev_offset = 0;
if (row_num)
prev_offset = trace_offsets[row_num - 1];
UInt64 trace_size = trace_offsets[row_num] - prev_offset;
Int64 allocated = 1;
if (argument_types.size() >= 2)
{
const auto & sizes = typeid_cast<const ColumnInt64 *>(columns[1])->getData();
allocated = sizes[row_num];
}
UInt64 ptr = 0;
if (argument_types.size() >= 3)
{
const auto & ptrs = typeid_cast<const ColumnUInt64 *>(columns[2])->getData();
ptr = ptrs[row_num];
}
this->data(place).add(ptr, allocated, trace_values.data() + prev_offset, trace_size, arena);
}
void addManyDefaults(
AggregateDataPtr __restrict /*place*/,
const IColumn ** /*columns*/,
size_t /*length*/,
Arena * /*arena*/) const override
{
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
this->data(place).merge(this->data(rhs), arena);
}
void serialize(ConstAggregateDataPtr __restrict, WriteBuffer &, std::optional<size_t> /* version */) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Serialization for function flameGraph is not implemented.");
}
void deserialize(AggregateDataPtr __restrict, ReadBuffer &, std::optional<size_t> /* version */, Arena *) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Deserialization for function flameGraph is not implemented.");
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
auto & array = assert_cast<ColumnArray &>(to);
auto & str = assert_cast<ColumnString &>(array.getData());
this->data(place).dumpFlameGraph(str.getChars(), str.getOffsets(), 0, 0);
array.getOffsets().push_back(str.size());
}
};
static void check(const std::string & name, const DataTypes & argument_types, const Array & params)
{
assertNoParameters(name, params);
if (argument_types.empty() || argument_types.size() > 3)
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Aggregate function {} requires 1 to 3 arguments : trace, [size = 1], [ptr = 0]",
name);
auto ptr_type = std::make_shared<DataTypeUInt64>();
auto trace_type = std::make_shared<DataTypeArray>(ptr_type);
auto size_type = std::make_shared<DataTypeInt64>();
if (!argument_types[0]->equals(*trace_type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"First argument (trace) for function {} must be Array(UInt64), but it has type {}",
name, argument_types[0]->getName());
if (argument_types.size() >= 2 && !argument_types[1]->equals(*size_type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Second argument (size) for function {} must be Int64, but it has type {}",
name, argument_types[1]->getName());
if (argument_types.size() >= 3 && !argument_types[2]->equals(*ptr_type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Third argument (ptr) for function {} must be UInt64, but it has type {}",
name, argument_types[2]->getName());
}
AggregateFunctionPtr createAggregateFunctionFlameGraph(const std::string & name, const DataTypes & argument_types, const Array & params, const Settings * settings)
{
if (!settings->allow_introspection_functions)
throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED,
"Introspection functions are disabled, because setting 'allow_introspection_functions' is set to 0");
check(name, argument_types, params);
return std::make_shared<AggregateFunctionFlameGraph>(argument_types);
}
void registerAggregateFunctionFlameGraph(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = true };
factory.registerFunction("flameGraph", { createAggregateFunctionFlameGraph, properties });
}
}

View File

@ -73,6 +73,7 @@ void registerAggregateFunctionExponentialMovingAverage(AggregateFunctionFactory
void registerAggregateFunctionSparkbar(AggregateFunctionFactory &);
void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &);
void registerAggregateFunctionAnalysisOfVariance(AggregateFunctionFactory &);
void registerAggregateFunctionFlameGraph(AggregateFunctionFactory &);
class AggregateFunctionCombinatorFactory;
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
@ -158,6 +159,7 @@ void registerAggregateFunctions()
registerAggregateFunctionExponentialMovingAverage(factory);
registerAggregateFunctionSparkbar(factory);
registerAggregateFunctionAnalysisOfVariance(factory);
registerAggregateFunctionFlameGraph(factory);
registerWindowFunctions(factory);
}

View File

@ -179,15 +179,6 @@ void BackupWriterS3::copyObjectImpl(
}
Aws::S3::Model::HeadObjectOutcome BackupWriterS3::requestObjectHeadData(const std::string & bucket_from, const std::string & key) const
{
Aws::S3::Model::HeadObjectRequest request;
request.SetBucket(bucket_from);
request.SetKey(key);
return client->HeadObject(request);
}
void BackupWriterS3::copyObjectMultipartImpl(
const String & src_bucket,
const String & src_key,
@ -310,7 +301,7 @@ void BackupWriterS3::copyFileNative(DiskPtr from_disk, const String & file_name_
std::string source_bucket = object_storage->getObjectsNamespace();
auto file_path = fs::path(s3_uri.key) / file_name_to;
auto head = requestObjectHeadData(source_bucket, objects[0].absolute_path).GetResult();
auto head = S3::headObject(*client, source_bucket, objects[0].absolute_path).GetResult();
if (static_cast<size_t>(head.GetContentLength()) < request_settings.max_single_operation_copy_size)
{
copyObjectImpl(

View File

@ -62,8 +62,6 @@ public:
void copyFileNative(DiskPtr from_disk, const String & file_name_from, const String & file_name_to) override;
private:
Aws::S3::Model::HeadObjectOutcome requestObjectHeadData(const std::string & bucket_from, const std::string & key) const;
void copyObjectImpl(
const String & src_bucket,
const String & src_key,

View File

@ -106,6 +106,7 @@ if (TARGET ch_contrib::nats_io)
endif()
add_headers_and_sources(dbms Storages/MeiliSearch)
add_headers_and_sources(dbms Storages/NamedCollections)
if (TARGET ch_contrib::amqp_cpp)
add_headers_and_sources(dbms Storages/RabbitMQ)

View File

@ -0,0 +1,16 @@
#pragma once
#include <cstddef>
/// This is a structure which is returned by MemoryTracker.
/// Methods onAlloc/onFree should be called after actual memory allocation if it succeed.
/// For now, it will only collect allocation trace with sample_probability.
struct AllocationTrace
{
AllocationTrace() = default;
explicit AllocationTrace(double sample_probability_);
void onAlloc(void * ptr, size_t size) const;
void onFree(void * ptr, size_t size) const;
double sample_probability = 0;
};

View File

@ -92,8 +92,10 @@ public:
void * alloc(size_t size, size_t alignment = 0)
{
checkSize(size);
CurrentMemoryTracker::alloc(size);
return allocNoTrack(size, alignment);
auto trace = CurrentMemoryTracker::alloc(size);
void * ptr = allocNoTrack(size, alignment);
trace.onAlloc(ptr, size);
return ptr;
}
/// Free memory range.
@ -103,7 +105,8 @@ public:
{
checkSize(size);
freeNoTrack(buf, size);
CurrentMemoryTracker::free(size);
auto trace = CurrentMemoryTracker::free(size);
trace.onFree(buf, size);
}
catch (...)
{
@ -129,13 +132,16 @@ public:
&& alignment <= MALLOC_MIN_ALIGNMENT)
{
/// Resize malloc'd memory region with no special alignment requirement.
CurrentMemoryTracker::realloc(old_size, new_size);
auto trace = CurrentMemoryTracker::realloc(old_size, new_size);
trace.onFree(buf, old_size);
void * new_buf = ::realloc(buf, new_size);
if (nullptr == new_buf)
DB::throwFromErrno(fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
buf = new_buf;
trace.onAlloc(buf, new_size);
if constexpr (clear_memory)
if (new_size > old_size)
memset(reinterpret_cast<char *>(buf) + old_size, 0, new_size - old_size);
@ -143,7 +149,8 @@ public:
else if (old_size >= MMAP_THRESHOLD && new_size >= MMAP_THRESHOLD)
{
/// Resize mmap'd memory region.
CurrentMemoryTracker::realloc(old_size, new_size);
auto trace = CurrentMemoryTracker::realloc(old_size, new_size);
trace.onFree(buf, old_size);
// On apple and freebsd self-implemented mremap used (common/mremap.h)
buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE,
@ -152,14 +159,17 @@ public:
DB::throwFromErrno(fmt::format("Allocator: Cannot mremap memory chunk from {} to {}.",
ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_MREMAP);
trace.onAlloc(buf, new_size);
/// No need for zero-fill, because mmap guarantees it.
}
else if (new_size < MMAP_THRESHOLD)
{
/// Small allocs that requires a copy. Assume there's enough memory in system. Call CurrentMemoryTracker once.
CurrentMemoryTracker::realloc(old_size, new_size);
auto trace = CurrentMemoryTracker::realloc(old_size, new_size);
trace.onFree(buf, old_size);
void * new_buf = allocNoTrack(new_size, alignment);
trace.onAlloc(new_buf, new_size);
memcpy(new_buf, buf, std::min(old_size, new_size));
freeNoTrack(buf, old_size);
buf = new_buf;

View File

@ -30,21 +30,24 @@ struct AllocatorWithMemoryTracking
throw std::bad_alloc();
size_t bytes = n * sizeof(T);
CurrentMemoryTracker::alloc(bytes);
auto trace = CurrentMemoryTracker::alloc(bytes);
T * p = static_cast<T *>(malloc(bytes));
if (!p)
throw std::bad_alloc();
trace.onAlloc(p, bytes);
return p;
}
void deallocate(T * p, size_t n) noexcept
{
free(p);
size_t bytes = n * sizeof(T);
CurrentMemoryTracker::free(bytes);
free(p);
auto trace = CurrentMemoryTracker::free(bytes);
trace.onFree(p, bytes);
}
};

View File

@ -37,7 +37,7 @@ MemoryTracker * getMemoryTracker()
using DB::current_thread;
void CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded)
AllocationTrace CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded)
{
#ifdef MEMORY_TRACKER_DEBUG_CHECKS
if (unlikely(memory_tracker_always_throw_logical_error_on_allocation))
@ -55,8 +55,9 @@ void CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded)
if (will_be > current_thread->untracked_memory_limit)
{
memory_tracker->allocImpl(will_be, throw_if_memory_exceeded);
auto res = memory_tracker->allocImpl(will_be, throw_if_memory_exceeded);
current_thread->untracked_memory = 0;
return res;
}
else
{
@ -68,36 +69,40 @@ void CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded)
/// total_memory_tracker only, ignore untracked_memory
else
{
memory_tracker->allocImpl(size, throw_if_memory_exceeded);
return memory_tracker->allocImpl(size, throw_if_memory_exceeded);
}
return AllocationTrace(memory_tracker->getSampleProbability());
}
return AllocationTrace(0);
}
void CurrentMemoryTracker::check()
{
if (auto * memory_tracker = getMemoryTracker())
memory_tracker->allocImpl(0, true);
std::ignore = memory_tracker->allocImpl(0, true);
}
void CurrentMemoryTracker::alloc(Int64 size)
AllocationTrace CurrentMemoryTracker::alloc(Int64 size)
{
bool throw_if_memory_exceeded = true;
allocImpl(size, throw_if_memory_exceeded);
return allocImpl(size, throw_if_memory_exceeded);
}
void CurrentMemoryTracker::allocNoThrow(Int64 size)
AllocationTrace CurrentMemoryTracker::allocNoThrow(Int64 size)
{
bool throw_if_memory_exceeded = false;
allocImpl(size, throw_if_memory_exceeded);
return allocImpl(size, throw_if_memory_exceeded);
}
void CurrentMemoryTracker::realloc(Int64 old_size, Int64 new_size)
AllocationTrace CurrentMemoryTracker::realloc(Int64 old_size, Int64 new_size)
{
Int64 addition = new_size - old_size;
addition > 0 ? alloc(addition) : free(-addition);
return addition > 0 ? alloc(addition) : free(-addition);
}
void CurrentMemoryTracker::free(Int64 size)
AllocationTrace CurrentMemoryTracker::free(Int64 size)
{
if (auto * memory_tracker = getMemoryTracker())
{
@ -106,15 +111,20 @@ void CurrentMemoryTracker::free(Int64 size)
current_thread->untracked_memory -= size;
if (current_thread->untracked_memory < -current_thread->untracked_memory_limit)
{
memory_tracker->free(-current_thread->untracked_memory);
Int64 untracked_memory = current_thread->untracked_memory;
current_thread->untracked_memory = 0;
return memory_tracker->free(-untracked_memory);
}
}
/// total_memory_tracker only, ignore untracked_memory
else
{
memory_tracker->free(size);
return memory_tracker->free(size);
}
return AllocationTrace(memory_tracker->getSampleProbability());
}
return AllocationTrace(0);
}

View File

@ -1,19 +1,20 @@
#pragma once
#include <base/types.h>
#include <Common/AllocationTrace.h>
/// Convenience methods, that use current thread's memory_tracker if it is available.
struct CurrentMemoryTracker
{
/// Call the following functions before calling of corresponding operations with memory allocators.
static void alloc(Int64 size);
static void allocNoThrow(Int64 size);
static void realloc(Int64 old_size, Int64 new_size);
[[nodiscard]] static AllocationTrace alloc(Int64 size);
[[nodiscard]] static AllocationTrace allocNoThrow(Int64 size);
[[nodiscard]] static AllocationTrace realloc(Int64 old_size, Int64 new_size);
/// This function should be called after memory deallocation.
static void free(Int64 size);
[[nodiscard]] static AllocationTrace free(Int64 size);
static void check();
private:
static void allocImpl(Int64 size, bool throw_if_memory_exceeded);
[[nodiscard]] static AllocationTrace allocImpl(Int64 size, bool throw_if_memory_exceeded);
};

View File

@ -103,6 +103,7 @@
M(S3Requests, "S3 requests") \
M(KeeperAliveConnections, "Number of alive connections") \
M(KeeperOutstandingRequets, "Number of outstanding requests") \
M(ThreadsInOvercommitTracker, "Number of waiting threads inside of OvercommitTracker") \
namespace CurrentMetrics
{

View File

@ -57,7 +57,8 @@ public:
}
/// Do not count guard page in memory usage.
CurrentMemoryTracker::alloc(num_pages * page_size);
auto trace = CurrentMemoryTracker::alloc(num_pages * page_size);
trace.onAlloc(vp, num_pages * page_size);
boost::context::stack_context sctx;
sctx.size = num_bytes;
@ -77,6 +78,7 @@ public:
::munmap(vp, sctx.size);
/// Do not count guard page in memory usage.
CurrentMemoryTracker::free(sctx.size - page_size);
auto trace = CurrentMemoryTracker::free(sctx.size - page_size);
trace.onFree(vp, sctx.size - page_size);
}
};

View File

@ -1,6 +1,7 @@
#include "MemoryTracker.h"
#include <IO/WriteHelpers.h>
#include <Common/SipHash.h>
#include <Common/VariableContext.h>
#include <Common/TraceSender.h>
#include <Common/Exception.h>
@ -82,6 +83,53 @@ inline std::string_view toDescription(OvercommitResult result)
}
}
bool shouldTrackAllocation(DB::Float64 probability, void * ptr)
{
return sipHash64(uintptr_t(ptr)) < std::numeric_limits<uint64_t>::max() * probability;
}
AllocationTrace updateAllocationTrace(AllocationTrace trace, const std::optional<double> & sample_probability)
{
if (unlikely(sample_probability))
return AllocationTrace(*sample_probability);
return trace;
}
AllocationTrace getAllocationTrace(std::optional<double> & sample_probability)
{
if (unlikely(sample_probability))
return AllocationTrace(*sample_probability);
return AllocationTrace(0);
}
}
AllocationTrace::AllocationTrace(double sample_probability_) : sample_probability(sample_probability_) {}
void AllocationTrace::onAlloc(void * ptr, size_t size) const
{
if (likely(sample_probability == 0))
return;
if (sample_probability < 1 && !shouldTrackAllocation(sample_probability, ptr))
return;
MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global);
DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = Int64(size), .ptr = ptr});
}
void AllocationTrace::onFree(void * ptr, size_t size) const
{
if (likely(sample_probability == 0))
return;
if (sample_probability < 1 && !shouldTrackAllocation(sample_probability, ptr))
return;
MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global);
DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = -Int64(size), .ptr = ptr});
}
namespace ProfileEvents
@ -135,7 +183,7 @@ void MemoryTracker::logMemoryUsage(Int64 current) const
}
void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker)
AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker)
{
if (size < 0)
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Negative size ({}) is passed to MemoryTracker. It is a bug.", size);
@ -154,9 +202,14 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT
/// Since the MemoryTrackerBlockerInThread should respect the level, we should go to the next parent.
if (auto * loaded_next = parent.load(std::memory_order_relaxed))
loaded_next->allocImpl(size, throw_if_memory_exceeded,
level == VariableContext::Process ? this : query_tracker);
return;
{
MemoryTracker * tracker = level == VariableContext::Process ? this : query_tracker;
return updateAllocationTrace(
loaded_next->allocImpl(size, throw_if_memory_exceeded, tracker),
sample_probability);
}
return getAllocationTrace(sample_probability);
}
/** Using memory_order_relaxed means that if allocations are done simultaneously,
@ -183,14 +236,6 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT
allocation_traced = true;
}
std::bernoulli_distribution sample(sample_probability);
if (unlikely(sample_probability > 0.0 && sample(thread_local_rng)))
{
MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global);
DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = size});
allocation_traced = true;
}
std::bernoulli_distribution fault(fault_probability);
if (unlikely(fault_probability > 0.0 && fault(thread_local_rng)))
{
@ -309,16 +354,22 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT
}
if (auto * loaded_next = parent.load(std::memory_order_relaxed))
loaded_next->allocImpl(size, throw_if_memory_exceeded,
level == VariableContext::Process ? this : query_tracker);
{
MemoryTracker * tracker = level == VariableContext::Process ? this : query_tracker;
return updateAllocationTrace(
loaded_next->allocImpl(size, throw_if_memory_exceeded, tracker),
sample_probability);
}
return getAllocationTrace(sample_probability);
}
void MemoryTracker::adjustWithUntrackedMemory(Int64 untracked_memory)
{
if (untracked_memory > 0)
allocImpl(untracked_memory, /*throw_if_memory_exceeded*/ false);
std::ignore = allocImpl(untracked_memory, /*throw_if_memory_exceeded*/ false);
else
free(-untracked_memory);
std::ignore = free(-untracked_memory);
}
bool MemoryTracker::updatePeak(Int64 will_be, bool log_memory_usage)
@ -337,8 +388,7 @@ bool MemoryTracker::updatePeak(Int64 will_be, bool log_memory_usage)
return false;
}
void MemoryTracker::free(Int64 size)
AllocationTrace MemoryTracker::free(Int64 size)
{
if (MemoryTrackerBlockerInThread::isBlocked(level))
{
@ -353,15 +403,9 @@ void MemoryTracker::free(Int64 size)
/// Since the MemoryTrackerBlockerInThread should respect the level, we should go to the next parent.
if (auto * loaded_next = parent.load(std::memory_order_relaxed))
loaded_next->free(size);
return;
}
return updateAllocationTrace(loaded_next->free(size), sample_probability);
std::bernoulli_distribution sample(sample_probability);
if (unlikely(sample_probability > 0.0 && sample(thread_local_rng)))
{
MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global);
DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = -size});
return getAllocationTrace(sample_probability);
}
Int64 accounted_size = size;
@ -389,12 +433,15 @@ void MemoryTracker::free(Int64 size)
if (auto * overcommit_tracker_ptr = overcommit_tracker.load(std::memory_order_relaxed))
overcommit_tracker_ptr->tryContinueQueryExecutionAfterFree(accounted_size);
AllocationTrace res = getAllocationTrace(sample_probability);
if (auto * loaded_next = parent.load(std::memory_order_relaxed))
loaded_next->free(size);
res = updateAllocationTrace(loaded_next->free(size), sample_probability);
auto metric_loaded = metric.load(std::memory_order_relaxed);
if (metric_loaded != CurrentMetrics::end())
CurrentMetrics::sub(metric_loaded, accounted_size);
return res;
}
@ -478,3 +525,14 @@ void MemoryTracker::setOrRaiseProfilerLimit(Int64 value)
while ((value == 0 || old_value < value) && !profiler_limit.compare_exchange_weak(old_value, value))
;
}
double MemoryTracker::getSampleProbability()
{
if (sample_probability)
return *sample_probability;
if (auto * loaded_next = parent.load(std::memory_order_relaxed))
return loaded_next->getSampleProbability();
return 0;
}

View File

@ -2,9 +2,11 @@
#include <atomic>
#include <chrono>
#include <optional>
#include <base/types.h>
#include <Common/CurrentMetrics.h>
#include <Common/VariableContext.h>
#include <Common/AllocationTrace.h>
#if !defined(NDEBUG)
#define MEMORY_TRACKER_DEBUG_CHECKS
@ -65,7 +67,7 @@ private:
double fault_probability = 0;
/// To randomly sample allocations and deallocations in trace_log.
double sample_probability = 0;
std::optional<double> sample_probability;
/// Singly-linked list. All information will be passed to subsequent memory trackers also (it allows to implement trackers hierarchy).
/// In terms of tree nodes it is the list of parents. Lifetime of these trackers should "include" lifetime of current tracker.
@ -90,8 +92,8 @@ private:
/// allocImpl(...) and free(...) should not be used directly
friend struct CurrentMemoryTracker;
void allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker = nullptr);
void free(Int64 size);
[[nodiscard]] AllocationTrace allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker = nullptr);
[[nodiscard]] AllocationTrace free(Int64 size);
public:
static constexpr auto USAGE_EVENT_NAME = "MemoryTrackerUsage";
@ -146,6 +148,8 @@ public:
sample_probability = value;
}
double getSampleProbability();
void setProfilerStep(Int64 value)
{
profiler_step = value;

View File

@ -28,4 +28,5 @@ public:
}
friend class MemoryTracker;
friend struct AllocationTrace;
};

View File

@ -3,8 +3,13 @@
#include <chrono>
#include <mutex>
#include <Common/ProfileEvents.h>
#include <Common/CurrentMetrics.h>
#include <Interpreters/ProcessList.h>
namespace CurrentMetrics
{
extern const Metric ThreadsInOvercommitTracker;
}
namespace ProfileEvents
{
@ -32,6 +37,8 @@ OvercommitResult OvercommitTracker::needToStopQuery(MemoryTracker * tracker, Int
if (OvercommitTrackerBlockerInThread::isBlocked())
return OvercommitResult::NONE;
CurrentMetrics::Increment metric_increment(CurrentMetrics::ThreadsInOvercommitTracker);
// NOTE: Do not change the order of locks
//
// global mutex must be acquired before overcommit_m, because

View File

@ -123,16 +123,13 @@ void ProgressIndication::writeFinalProgress()
if (progress.read_rows < 1000)
return;
UInt64 processed_rows = progress.read_rows + progress.written_rows;
UInt64 processed_bytes = progress.read_bytes + progress.written_bytes;
std::cout << "Processed " << formatReadableQuantity(processed_rows) << " rows, "
<< formatReadableSizeWithDecimalSuffix(processed_bytes);
std::cout << "Processed " << formatReadableQuantity(progress.read_rows) << " rows, "
<< formatReadableSizeWithDecimalSuffix(progress.read_bytes);
UInt64 elapsed_ns = getElapsedNanoseconds();
if (elapsed_ns)
std::cout << " (" << formatReadableQuantity(processed_rows * 1000000000.0 / elapsed_ns) << " rows/s., "
<< formatReadableSizeWithDecimalSuffix(processed_bytes * 1000000000.0 / elapsed_ns) << "/s.)";
std::cout << " (" << formatReadableQuantity(progress.read_rows * 1000000000.0 / elapsed_ns) << " rows/s., "
<< formatReadableSizeWithDecimalSuffix(progress.read_bytes * 1000000000.0 / elapsed_ns) << "/s.)";
else
std::cout << ". ";
}
@ -167,18 +164,16 @@ void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message)
size_t prefix_size = message.count();
UInt64 processed_rows = progress.read_rows + progress.written_rows;
UInt64 processed_bytes = progress.read_bytes + progress.written_bytes;
message << indicator << " Progress: ";
message
<< formatReadableQuantity(processed_rows) << " rows, "
<< formatReadableSizeWithDecimalSuffix(processed_bytes);
<< formatReadableQuantity(progress.read_rows) << " rows, "
<< formatReadableSizeWithDecimalSuffix(progress.read_bytes);
UInt64 elapsed_ns = getElapsedNanoseconds();
if (elapsed_ns)
message << " ("
<< formatReadableQuantity(processed_rows * 1000000000.0 / elapsed_ns) << " rows/s., "
<< formatReadableSizeWithDecimalSuffix(processed_bytes * 1000000000.0 / elapsed_ns) << "/s.) ";
<< formatReadableQuantity(progress.read_rows * 1000000000.0 / elapsed_ns) << " rows/s., "
<< formatReadableSizeWithDecimalSuffix(progress.read_bytes * 1000000000.0 / elapsed_ns) << "/s.) ";
else
message << ". ";

View File

@ -33,6 +33,7 @@ void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Ext
+ sizeof(TraceType) /// trace type
+ sizeof(UInt64) /// thread_id
+ sizeof(Int64) /// size
+ sizeof(void *) /// ptr
+ sizeof(ProfileEvents::Event) /// event
+ sizeof(ProfileEvents::Count); /// increment
@ -74,6 +75,7 @@ void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Ext
writePODBinary(trace_type, out);
writePODBinary(thread_id, out);
writePODBinary(extras.size, out);
writePODBinary(UInt64(extras.ptr), out);
writePODBinary(extras.event, out);
writePODBinary(extras.increment, out);

View File

@ -28,8 +28,9 @@ class TraceSender
public:
struct Extras
{
/// size - for memory tracing is the amount of memory allocated; for other trace types it is 0.
/// size, ptr - for memory tracing is the amount of memory allocated; for other trace types it is 0.
Int64 size{};
void * ptr = nullptr;
/// Event type and increment for 'ProfileEvent' trace type; for other trace types defaults.
ProfileEvents::Event event{ProfileEvents::end()};
ProfileEvents::Count increment{};

View File

@ -9,7 +9,11 @@ extern "C" void * clickhouse_malloc(size_t size)
{
void * res = malloc(size);
if (res)
Memory::trackMemory(size);
{
AllocationTrace trace;
size_t actual_size = Memory::trackMemory(size, trace);
trace.onAlloc(res, actual_size);
}
return res;
}
@ -17,17 +21,29 @@ extern "C" void * clickhouse_calloc(size_t number_of_members, size_t size)
{
void * res = calloc(number_of_members, size);
if (res)
Memory::trackMemory(number_of_members * size);
{
AllocationTrace trace;
size_t actual_size = Memory::trackMemory(number_of_members * size, trace);
trace.onAlloc(res, actual_size);
}
return res;
}
extern "C" void * clickhouse_realloc(void * ptr, size_t size)
{
if (ptr)
Memory::untrackMemory(ptr);
{
AllocationTrace trace;
size_t actual_size = Memory::untrackMemory(ptr, trace);
trace.onFree(ptr, actual_size);
}
void * res = realloc(ptr, size);
if (res)
Memory::trackMemory(size);
{
AllocationTrace trace;
size_t actual_size = Memory::trackMemory(size, trace);
trace.onAlloc(res, actual_size);
}
return res;
}
@ -42,7 +58,9 @@ extern "C" void * clickhouse_reallocarray(void * ptr, size_t number_of_members,
extern "C" void clickhouse_free(void * ptr)
{
Memory::untrackMemory(ptr);
AllocationTrace trace;
size_t actual_size = Memory::untrackMemory(ptr, trace);
trace.onFree(ptr, actual_size);
free(ptr);
}
@ -50,6 +68,10 @@ extern "C" int clickhouse_posix_memalign(void ** memptr, size_t alignment, size_
{
int res = posix_memalign(memptr, alignment, size);
if (res == 0)
Memory::trackMemory(size);
{
AllocationTrace trace;
size_t actual_size = Memory::trackMemory(size, trace);
trace.onAlloc(*memptr, actual_size);
}
return res;
}

View File

@ -112,16 +112,19 @@ inline ALWAYS_INLINE size_t getActualAllocationSize(size_t size, TAlign... align
template <std::same_as<std::align_val_t>... TAlign>
requires DB::OptionalArgument<TAlign...>
inline ALWAYS_INLINE void trackMemory(std::size_t size, TAlign... align)
inline ALWAYS_INLINE size_t trackMemory(std::size_t size, AllocationTrace & trace, TAlign... align)
{
std::size_t actual_size = getActualAllocationSize(size, align...);
CurrentMemoryTracker::allocNoThrow(actual_size);
trace = CurrentMemoryTracker::allocNoThrow(actual_size);
return actual_size;
}
template <std::same_as<std::align_val_t>... TAlign>
requires DB::OptionalArgument<TAlign...>
inline ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t size [[maybe_unused]] = 0, TAlign... align [[maybe_unused]]) noexcept
inline ALWAYS_INLINE size_t untrackMemory(void * ptr [[maybe_unused]], AllocationTrace & trace, std::size_t size [[maybe_unused]] = 0, TAlign... align [[maybe_unused]]) noexcept
{
std::size_t actual_size = 0;
try
{
#if USE_JEMALLOC
@ -130,23 +133,26 @@ inline ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t
if (likely(ptr != nullptr))
{
if constexpr (sizeof...(TAlign) == 1)
CurrentMemoryTracker::free(sallocx(ptr, MALLOCX_ALIGN(alignToSizeT(align...))));
actual_size = sallocx(ptr, MALLOCX_ALIGN(alignToSizeT(align...)));
else
CurrentMemoryTracker::free(sallocx(ptr, 0));
actual_size = sallocx(ptr, 0);
}
#else
if (size)
CurrentMemoryTracker::free(size);
actual_size = size;
# if defined(_GNU_SOURCE)
/// It's innaccurate resource free for sanitizers. malloc_usable_size() result is greater or equal to allocated size.
else
CurrentMemoryTracker::free(malloc_usable_size(ptr));
actual_size = malloc_usable_size(ptr);
# endif
#endif
trace = CurrentMemoryTracker::free(actual_size);
}
catch (...)
{
}
return actual_size;
}
}

View File

@ -50,50 +50,74 @@ static struct InitializeJemallocZoneAllocatorForOSX
void * operator new(std::size_t size)
{
Memory::trackMemory(size);
return Memory::newImpl(size);
AllocationTrace trace;
std::size_t actual_size = Memory::trackMemory(size, trace);
void * ptr = Memory::newImpl(size);
trace.onAlloc(ptr, actual_size);
return ptr;
}
void * operator new(std::size_t size, std::align_val_t align)
{
Memory::trackMemory(size, align);
return Memory::newImpl(size, align);
AllocationTrace trace;
std::size_t actual_size = Memory::trackMemory(size, trace, align);
void * ptr = Memory::newImpl(size, align);
trace.onAlloc(ptr, actual_size);
return ptr;
}
void * operator new[](std::size_t size)
{
Memory::trackMemory(size);
return Memory::newImpl(size);
AllocationTrace trace;
std::size_t actual_size = Memory::trackMemory(size, trace);
void * ptr = Memory::newImpl(size);
trace.onAlloc(ptr, actual_size);
return ptr;
}
void * operator new[](std::size_t size, std::align_val_t align)
{
Memory::trackMemory(size, align);
return Memory::newImpl(size, align);
AllocationTrace trace;
std::size_t actual_size = Memory::trackMemory(size, trace, align);
void * ptr = Memory::newImpl(size, align);
trace.onAlloc(ptr, actual_size);
return ptr;
}
void * operator new(std::size_t size, const std::nothrow_t &) noexcept
{
Memory::trackMemory(size);
return Memory::newNoExept(size);
AllocationTrace trace;
std::size_t actual_size = Memory::trackMemory(size, trace);
void * ptr = Memory::newNoExept(size);
trace.onAlloc(ptr, actual_size);
return ptr;
}
void * operator new[](std::size_t size, const std::nothrow_t &) noexcept
{
Memory::trackMemory(size);
return Memory::newNoExept(size);
AllocationTrace trace;
std::size_t actual_size = Memory::trackMemory(size, trace);
void * ptr = Memory::newNoExept(size);
trace.onAlloc(ptr, actual_size);
return ptr;
}
void * operator new(std::size_t size, std::align_val_t align, const std::nothrow_t &) noexcept
{
Memory::trackMemory(size, align);
return Memory::newNoExept(size, align);
AllocationTrace trace;
std::size_t actual_size = Memory::trackMemory(size, trace, align);
void * ptr = Memory::newNoExept(size, align);
trace.onAlloc(ptr, actual_size);
return ptr;
}
void * operator new[](std::size_t size, std::align_val_t align, const std::nothrow_t &) noexcept
{
Memory::trackMemory(size, align);
return Memory::newNoExept(size, align);
AllocationTrace trace;
std::size_t actual_size = Memory::trackMemory(size, trace, align);
void * ptr = Memory::newNoExept(size, align);
trace.onAlloc(ptr, actual_size);
return ptr;
}
/// delete
@ -109,48 +133,64 @@ void * operator new[](std::size_t size, std::align_val_t align, const std::nothr
void operator delete(void * ptr) noexcept
{
Memory::untrackMemory(ptr);
AllocationTrace trace;
std::size_t actual_size = Memory::untrackMemory(ptr, trace);
trace.onFree(ptr, actual_size);
Memory::deleteImpl(ptr);
}
void operator delete(void * ptr, std::align_val_t align) noexcept
{
Memory::untrackMemory(ptr, 0, align);
AllocationTrace trace;
std::size_t actual_size = Memory::untrackMemory(ptr, trace, 0, align);
trace.onFree(ptr, actual_size);
Memory::deleteImpl(ptr);
}
void operator delete[](void * ptr) noexcept
{
Memory::untrackMemory(ptr);
AllocationTrace trace;
std::size_t actual_size = Memory::untrackMemory(ptr, trace);
trace.onFree(ptr, actual_size);
Memory::deleteImpl(ptr);
}
void operator delete[](void * ptr, std::align_val_t align) noexcept
{
Memory::untrackMemory(ptr, 0, align);
AllocationTrace trace;
std::size_t actual_size = Memory::untrackMemory(ptr, trace, 0, align);
trace.onFree(ptr, actual_size);
Memory::deleteImpl(ptr);
}
void operator delete(void * ptr, std::size_t size) noexcept
{
Memory::untrackMemory(ptr, size);
AllocationTrace trace;
std::size_t actual_size = Memory::untrackMemory(ptr, trace, size);
trace.onFree(ptr, actual_size);
Memory::deleteSized(ptr, size);
}
void operator delete(void * ptr, std::size_t size, std::align_val_t align) noexcept
{
Memory::untrackMemory(ptr, size, align);
AllocationTrace trace;
std::size_t actual_size = Memory::untrackMemory(ptr, trace, size, align);
trace.onFree(ptr, actual_size);
Memory::deleteSized(ptr, size, align);
}
void operator delete[](void * ptr, std::size_t size) noexcept
{
Memory::untrackMemory(ptr, size);
AllocationTrace trace;
std::size_t actual_size = Memory::untrackMemory(ptr, trace, size);
trace.onFree(ptr, actual_size);
Memory::deleteSized(ptr, size);
}
void operator delete[](void * ptr, std::size_t size, std::align_val_t align) noexcept
{
Memory::untrackMemory(ptr, size, align);
AllocationTrace trace;
std::size_t actual_size = Memory::untrackMemory(ptr, trace, size, align);
trace.onFree(ptr, actual_size);
Memory::deleteSized(ptr, size, align);
}

View File

@ -149,31 +149,13 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const std::string & snapshot_pa
};
};
const auto file_exists = [&](const auto & key)
{
Aws::S3::Model::HeadObjectRequest request;
request.SetBucket(s3_client->uri.bucket);
request.SetKey(key);
auto outcome = s3_client->client->HeadObject(request);
if (outcome.IsSuccess())
return true;
const auto & error = outcome.GetError();
if (error.GetErrorType() != Aws::S3::S3Errors::NO_SUCH_KEY && error.GetErrorType() != Aws::S3::S3Errors::RESOURCE_NOT_FOUND)
throw S3Exception(error.GetErrorType(), "Failed to verify existence of lock file: {}", error.GetMessage());
return false;
};
LOG_INFO(log, "Will try to upload snapshot on {} to S3", snapshot_path);
ReadBufferFromFile snapshot_file(snapshot_path);
auto snapshot_name = fs::path(snapshot_path).filename().string();
auto lock_file = fmt::format(".{}_LOCK", snapshot_name);
if (file_exists(snapshot_name))
if (S3::objectExists(*s3_client->client, s3_client->uri.bucket, snapshot_name))
{
LOG_ERROR(log, "Snapshot {} already exists", snapshot_name);
return;
@ -181,7 +163,7 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const std::string & snapshot_pa
// First we need to verify that there isn't already a lock file for the snapshot we want to upload
// Only leader uploads a snapshot, but there can be a rare case where we have 2 leaders in NuRaft
if (file_exists(lock_file))
if (S3::objectExists(*s3_client->client, s3_client->uri.bucket, lock_file))
{
LOG_ERROR(log, "Lock file for {} already, exists. Probably a different node is already uploading the snapshot", snapshot_name);
return;

View File

@ -767,7 +767,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
M(Bool, input_format_json_validate_types_from_metadata, true, "For JSON/JSONCompact/JSONColumnsWithMetadata input formats this controls whether format parser should check if data types from input metadata match data types of the corresponding columns from the table", 0) \
M(Bool, input_format_json_read_numbers_as_strings, false, "Allow to parse numbers as strings in JSON input formats", 0) \
M(Bool, input_format_json_read_objects_as_strings, false, "Allow to parse JSON objects as strings in JSON input formats", 0) \
M(Bool, input_format_try_infer_integers, true, "Try to infer numbers from string fields while schema inference in text formats", 0) \
M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \
M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \
M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \
M(Bool, input_format_protobuf_flatten_google_wrappers, false, "Enable Google wrappers for regular non-nested columns, e.g. google.protobuf.StringValue 'str' for String column 'str'. For Nullable columns empty wrappers are recognized as defaults, and missing as nulls", 0) \

View File

@ -525,7 +525,7 @@ ReturnType SerializationNullable::deserializeTextCSVImpl(IColumn & column, ReadB
}
/// Check if we have enough data in buffer to check if it's a null.
if (istr.available() > null_representation.size())
if (settings.csv.custom_delimiter.empty() && istr.available() > null_representation.size())
{
auto check_for_null = [&istr, &null_representation, &settings]()
{
@ -550,8 +550,21 @@ ReturnType SerializationNullable::deserializeTextCSVImpl(IColumn & column, ReadB
{
buf.setCheckpoint();
SCOPE_EXIT(buf.dropCheckpoint());
if (checkString(null_representation, buf) && (buf.eof() || *buf.position() == settings.csv.delimiter || *buf.position() == '\r' || *buf.position() == '\n'))
return true;
if (checkString(null_representation, buf))
{
if (!settings.csv.custom_delimiter.empty())
{
if (checkString(settings.csv.custom_delimiter, buf))
{
/// Rollback to the beginning of custom delimiter.
buf.rollbackToCheckpoint();
assertString(null_representation, buf);
return true;
}
}
else if (buf.eof() || *buf.position() == settings.csv.delimiter || *buf.position() == '\r' || *buf.position() == '\n')
return true;
}
buf.rollbackToCheckpoint();
return false;

View File

@ -66,12 +66,6 @@ namespace ErrorCodes
namespace
{
bool isNotFoundError(Aws::S3::S3Errors error)
{
return error == Aws::S3::S3Errors::RESOURCE_NOT_FOUND
|| error == Aws::S3::S3Errors::NO_SUCH_KEY;
}
template <typename Result, typename Error>
void throwIfError(const Aws::Utils::Outcome<Result, Error> & response)
{
@ -89,7 +83,7 @@ void throwIfUnexpectedError(const Aws::Utils::Outcome<Result, Error> & response,
/// the log will be polluted with error messages from aws sdk.
/// Looks like there is no way to suppress them.
if (!response.IsSuccess() && (!if_exists || !isNotFoundError(response.GetError().GetErrorType())))
if (!response.IsSuccess() && (!if_exists || !S3::isNotFoundError(response.GetError().GetErrorType())))
{
const auto & err = response.GetError();
throw S3Exception(err.GetErrorType(), "{} (Code: {})", err.GetMessage(), static_cast<size_t>(err.GetErrorType()));
@ -130,28 +124,12 @@ std::string S3ObjectStorage::generateBlobNameForPath(const std::string & /* path
Aws::S3::Model::HeadObjectOutcome S3ObjectStorage::requestObjectHeadData(const std::string & bucket_from, const std::string & key) const
{
auto client_ptr = client.get();
ProfileEvents::increment(ProfileEvents::S3HeadObject);
ProfileEvents::increment(ProfileEvents::DiskS3HeadObject);
Aws::S3::Model::HeadObjectRequest request;
request.SetBucket(bucket_from);
request.SetKey(key);
return client_ptr->HeadObject(request);
return S3::headObject(*client.get(), bucket_from, key, "", true);
}
bool S3ObjectStorage::exists(const StoredObject & object) const
{
auto object_head = requestObjectHeadData(bucket, object.absolute_path);
if (!object_head.IsSuccess())
{
if (object_head.GetError().GetErrorType() == Aws::S3::S3Errors::RESOURCE_NOT_FOUND)
return false;
throwIfError(object_head);
}
return true;
return S3::objectExists(*client.get(), bucket, object.absolute_path, "", true);
}
std::unique_ptr<ReadBufferFromFileBase> S3ObjectStorage::readObjects( /// NOLINT

View File

@ -119,6 +119,7 @@ struct FormatSettings
char tuple_delimiter = ',';
bool use_best_effort_in_schema_inference = true;
UInt64 skip_first_lines = 0;
String custom_delimiter;
} csv;
struct HiveText

View File

@ -16,6 +16,7 @@
#include <Functions/FunctionUnaryArithmetic.h>
#include <Common/FieldVisitors.h>
#include <cstring>
#include <algorithm>
@ -94,7 +95,7 @@ void convertAnyColumnToBool(const IColumn * column, UInt8Container & res)
}
template <class Op, typename Func>
template <class Op, bool IsTernary, typename Func>
bool extractConstColumns(ColumnRawPtrs & in, UInt8 & res, Func && func)
{
bool has_res = false;
@ -112,7 +113,10 @@ bool extractConstColumns(ColumnRawPtrs & in, UInt8 & res, Func && func)
if (has_res)
{
res = Op::apply(res, x);
if constexpr (IsTernary)
res = Op::ternaryApply(res, x);
else
res = Op::apply(res, x);
}
else
{
@ -129,7 +133,7 @@ bool extractConstColumns(ColumnRawPtrs & in, UInt8 & res, Func && func)
template <class Op>
inline bool extractConstColumnsAsBool(ColumnRawPtrs & in, UInt8 & res)
{
return extractConstColumns<Op>(
return extractConstColumns<Op, false>(
in, res,
[](const Field & value)
{
@ -141,7 +145,7 @@ inline bool extractConstColumnsAsBool(ColumnRawPtrs & in, UInt8 & res)
template <class Op>
inline bool extractConstColumnsAsTernary(ColumnRawPtrs & in, UInt8 & res_3v)
{
return extractConstColumns<Op>(
return extractConstColumns<Op, true>(
in, res_3v,
[](const Field & value)
{
@ -192,47 +196,74 @@ private:
};
/// A helper class used by AssociativeGenericApplierImpl
/// Allows for on-the-fly conversion of any data type into intermediate ternary representation
using TernaryValueGetter = std::function<Ternary::ResultType (size_t)>;
template <typename ... Types>
struct ValueGetterBuilderImpl;
struct TernaryValueBuilderImpl;
template <typename Type, typename ...Types>
struct ValueGetterBuilderImpl<Type, Types...>
struct TernaryValueBuilderImpl<Type, Types...>
{
static TernaryValueGetter build(const IColumn * x)
static void build(const IColumn * x, UInt8* __restrict ternary_column_data)
{
size_t size = x->size();
if (x->onlyNull())
{
return [](size_t){ return Ternary::Null; };
memset(ternary_column_data, Ternary::Null, size);
}
else if (const auto * nullable_column = typeid_cast<const ColumnNullable *>(x))
{
if (const auto * nested_column = typeid_cast<const ColumnVector<Type> *>(nullable_column->getNestedColumnPtr().get()))
{
return [
&null_data = nullable_column->getNullMapData(),
&column_data = nested_column->getData()](size_t i)
const auto& null_data = nullable_column->getNullMapData();
const auto& column_data = nested_column->getData();
if constexpr (sizeof(Type) == 1)
{
return Ternary::makeValue(column_data[i], null_data[i]);
};
for (size_t i = 0; i < size; ++i)
{
auto has_value = static_cast<UInt8>(column_data[i] != 0);
auto is_null = !!null_data[i];
ternary_column_data[i] = ((has_value << 1) | is_null) & (1 << !is_null);
}
}
else
{
for (size_t i = 0; i < size; ++i)
{
auto has_value = static_cast<UInt8>(column_data[i] != 0);
ternary_column_data[i] = has_value;
}
for (size_t i = 0; i < size; ++i)
{
auto has_value = ternary_column_data[i];
auto is_null = !!null_data[i];
ternary_column_data[i] = ((has_value << 1) | is_null) & (1 << !is_null);
}
}
}
else
return ValueGetterBuilderImpl<Types...>::build(x);
TernaryValueBuilderImpl<Types...>::build(x, ternary_column_data);
}
else if (const auto column = typeid_cast<const ColumnVector<Type> *>(x))
return [&column_data = column->getData()](size_t i) { return Ternary::makeValue(column_data[i]); };
{
auto &column_data = column->getData();
for (size_t i = 0; i < size; ++i)
{
ternary_column_data[i] = (column_data[i] != 0) << 1;
}
}
else
return ValueGetterBuilderImpl<Types...>::build(x);
TernaryValueBuilderImpl<Types...>::build(x, ternary_column_data);
}
};
template <>
struct ValueGetterBuilderImpl<>
struct TernaryValueBuilderImpl<>
{
static TernaryValueGetter build(const IColumn * x)
[[noreturn]] static void build(const IColumn * x, UInt8 * /* nullable_ternary_column_data */)
{
throw Exception(
std::string("Unknown numeric column of type: ") + demangle(typeid(*x).name()),
@ -240,12 +271,12 @@ struct ValueGetterBuilderImpl<>
}
};
using ValueGetterBuilder =
ValueGetterBuilderImpl<UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64>;
using TernaryValueBuilder =
TernaryValueBuilderImpl<UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64>;
/// This class together with helper class ValueGetterBuilder can be used with columns of arbitrary data type
/// Allows for on-the-fly conversion of any type of data into intermediate ternary representation
/// and eliminates the need to materialize data columns in intermediate representation
/// This class together with helper class TernaryValueBuilder can be used with columns of arbitrary data type
/// Converts column of any data type into an intermediate UInt8Column of ternary representation for the
/// vectorized ternary logic evaluation.
template <typename Op, size_t N>
class AssociativeGenericApplierImpl
{
@ -254,20 +285,19 @@ class AssociativeGenericApplierImpl
public:
/// Remembers the last N columns from `in`.
explicit AssociativeGenericApplierImpl(const ColumnRawPtrs & in)
: val_getter{ValueGetterBuilder::build(in[in.size() - N])}, next{in} {}
: vec(in[in.size() - N]->size()), next{in}
{
TernaryValueBuilder::build(in[in.size() - N], vec.data());
}
/// Returns a combination of values in the i-th row of all columns stored in the constructor.
inline ResultValueType apply(const size_t i) const
{
const auto a = val_getter(i);
if constexpr (Op::isSaturable())
return Op::isSaturatedValueTernary(a) ? a : Op::apply(a, next.apply(i));
else
return Op::apply(a, next.apply(i));
return Op::ternaryApply(vec[i], next.apply(i));
}
private:
const TernaryValueGetter val_getter;
UInt8Container vec;
const AssociativeGenericApplierImpl<Op, N - 1> next;
};
@ -280,12 +310,15 @@ class AssociativeGenericApplierImpl<Op, 1>
public:
/// Remembers the last N columns from `in`.
explicit AssociativeGenericApplierImpl(const ColumnRawPtrs & in)
: val_getter{ValueGetterBuilder::build(in[in.size() - 1])} {}
: vec(UInt8Container(in[in.size() - 1]->size()))
{
TernaryValueBuilder::build(in[in.size() - 1], vec.data());
}
inline ResultValueType apply(const size_t i) const { return val_getter(i); }
inline ResultValueType apply(const size_t i) const { return vec[i]; }
private:
const TernaryValueGetter val_getter;
UInt8Container vec;
};
@ -318,7 +351,12 @@ struct OperationApplier
for (size_t i = 0; i < size; ++i)
{
if constexpr (CarryResult)
result_data[i] = Op::apply(result_data[i], operation_applier_impl.apply(i));
{
if constexpr (std::is_same_v<OperationApplierImpl<Op, N>, AssociativeApplierImpl<Op, N>>)
result_data[i] = Op::apply(result_data[i], operation_applier_impl.apply(i));
else
result_data[i] = Op::ternaryApply(result_data[i], operation_applier_impl.apply(i));
}
else
result_data[i] = operation_applier_impl.apply(i);
}

View File

@ -44,21 +44,29 @@ namespace Ternary
{
using ResultType = UInt8;
/** These carefully picked values magically work so bitwise "and", "or" on them
* corresponds to the expected results in three-valued logic.
/** These values are carefully picked so that they could be efficiently evaluated with bitwise operations, which
* are feasible for auto-vectorization by the compiler. The expression for the ternary value evaluation writes:
*
* False and True are represented by all-0 and all-1 bits, so all bitwise operations on them work as expected.
* Null is represented as single 1 bit. So, it is something in between False and True.
* And "or" works like maximum and "and" works like minimum:
* "or" keeps True as is and lifts False with Null to Null.
* "and" keeps False as is and downs True with Null to Null.
* ternary_value = ((value << 1) | is_null) & (1 << !is_null)
*
* The truth table of the above formula lists:
* +---------------+--------------+-------------+
* | is_null\value | 0 | 1 |
* +---------------+--------------+-------------+
* | 0 | 0b00 (False) | 0b10 (True) |
* | 1 | 0b01 (Null) | 0b01 (Null) |
* +---------------+--------------+-------------+
*
* As the numerical values of False, Null and True are assigned in ascending order, the "and" and "or" of
* ternary logic could be implemented with minimum and maximum respectively, which are also vectorizable.
* https://en.wikipedia.org/wiki/Three-valued_logic
*
* This logic does not apply for "not" and "xor" - they work with default implementation for NULLs:
* anything with NULL returns NULL, otherwise use conventional two-valued logic.
*/
static constexpr UInt8 False = 0; /// All zero bits.
static constexpr UInt8 True = -1; /// All one bits.
static constexpr UInt8 Null = 1; /// Single one bit.
static constexpr UInt8 False = 0; /// 0b00
static constexpr UInt8 Null = 1; /// 0b01
static constexpr UInt8 True = 2; /// 0b10
template <typename T>
inline ResultType makeValue(T value)
@ -90,6 +98,8 @@ struct AndImpl
static inline constexpr ResultType apply(UInt8 a, UInt8 b) { return a & b; }
static inline constexpr ResultType ternaryApply(UInt8 a, UInt8 b) { return std::min(a, b); }
/// Will use three-valued logic for NULLs (see above) or default implementation (any operation with NULL returns NULL).
static inline constexpr bool specialImplementationForNulls() { return true; }
};
@ -102,6 +112,7 @@ struct OrImpl
static inline constexpr bool isSaturatedValue(bool a) { return a; }
static inline constexpr bool isSaturatedValueTernary(UInt8 a) { return a == Ternary::True; }
static inline constexpr ResultType apply(UInt8 a, UInt8 b) { return a | b; }
static inline constexpr ResultType ternaryApply(UInt8 a, UInt8 b) { return std::max(a, b); }
static inline constexpr bool specialImplementationForNulls() { return true; }
};
@ -113,6 +124,7 @@ struct XorImpl
static inline constexpr bool isSaturatedValue(bool) { return false; }
static inline constexpr bool isSaturatedValueTernary(UInt8) { return false; }
static inline constexpr ResultType apply(UInt8 a, UInt8 b) { return a != b; }
static inline constexpr ResultType ternaryApply(UInt8 a, UInt8 b) { return a != b; }
static inline constexpr bool specialImplementationForNulls() { return false; }
#if USE_EMBEDDED_COMPILER

View File

@ -0,0 +1,354 @@
#include <algorithm>
#include <cstring>
#include <vector>
#include <string>
#include <type_traits>
#include <gtest/gtest.h>
#include <Columns/ColumnNothing.h>
#include <Columns/ColumnsNumber.h>
#include <Functions/FunctionsLogical.h>
// I know that inclusion of .cpp is not good at all
#include <Functions/FunctionsLogical.cpp> // NOLINT
using namespace DB;
using TernaryValues = std::vector<Ternary::ResultType>;
struct LinearCongruentialGenerator
{
/// Constants from `man lrand48_r`.
static constexpr UInt64 a = 0x5DEECE66D;
static constexpr UInt64 c = 0xB;
/// And this is from `head -c8 /dev/urandom | xxd -p`
UInt64 current = 0x09826f4a081cee35ULL;
UInt32 next()
{
current = current * a + c;
return static_cast<UInt32>(current >> 16);
}
};
void generateRandomTernaryValue(LinearCongruentialGenerator & gen, Ternary::ResultType * output, size_t size, double false_ratio, double null_ratio)
{
/// The LinearCongruentialGenerator generates nonnegative integers uniformly distributed over the interval [0, 2^32).
/// See https://linux.die.net/man/3/nrand48
double false_percentile = false_ratio;
double null_percentile = false_ratio + null_ratio;
false_percentile = false_percentile > 1 ? 1 : false_percentile;
null_percentile = null_percentile > 1 ? 1 : null_percentile;
UInt32 false_threshold = static_cast<UInt32>(static_cast<double>(std::numeric_limits<UInt32>::max()) * false_percentile);
UInt32 null_threshold = static_cast<UInt32>(static_cast<double>(std::numeric_limits<UInt32>::max()) * null_percentile);
for (Ternary::ResultType * end = output + size; output != end; ++output)
{
UInt32 val = gen.next();
*output = val < false_threshold ? Ternary::False : (val < null_threshold ? Ternary::Null : Ternary::True);
}
}
template<typename T>
ColumnPtr createColumnNullable(const Ternary::ResultType * ternary_values, size_t size)
{
auto nested_column = ColumnVector<T>::create(size);
auto null_map = ColumnUInt8::create(size);
auto & nested_column_data = nested_column->getData();
auto & null_map_data = null_map->getData();
for (size_t i = 0; i < size; ++i)
{
if (ternary_values[i] == Ternary::Null)
{
null_map_data[i] = 1;
nested_column_data[i] = 0;
}
else if (ternary_values[i] == Ternary::True)
{
null_map_data[i] = 0;
nested_column_data[i] = 100;
}
else
{
null_map_data[i] = 0;
nested_column_data[i] = 0;
}
}
return ColumnNullable::create(std::move(nested_column), std::move(null_map));
}
template<typename T>
ColumnPtr createColumnVector(const Ternary::ResultType * ternary_values, size_t size)
{
auto column = ColumnVector<T>::create(size);
auto & column_data = column->getData();
for (size_t i = 0; i < size; ++i)
{
if (ternary_values[i] == Ternary::True)
{
column_data[i] = 100;
}
else
{
column_data[i] = 0;
}
}
return column;
}
template<typename ColumnType, typename T>
ColumnPtr createRandomColumn(LinearCongruentialGenerator & gen, TernaryValues & ternary_values)
{
size_t size = ternary_values.size();
Ternary::ResultType * ternary_data = ternary_values.data();
if constexpr (std::is_same_v<ColumnType, ColumnNullable>)
{
generateRandomTernaryValue(gen, ternary_data, size, 0.3, 0.7);
return createColumnNullable<T>(ternary_data, size);
}
else if constexpr (std::is_same_v<ColumnType, ColumnVector<UInt8>>)
{
generateRandomTernaryValue(gen, ternary_data, size, 0.5, 0);
return createColumnVector<T>(ternary_data, size);
}
else
{
auto nested_col = ColumnNothing::create(size);
auto null_map = ColumnUInt8::create(size);
memset(ternary_data, Ternary::Null, size);
return ColumnNullable::create(std::move(nested_col), std::move(null_map));
}
}
/* The truth table of ternary And and Or operations:
* +-------+-------+---------+--------+
* | a | b | a And b | a Or b |
* +-------+-------+---------+--------+
* | False | False | False | False |
* | False | Null | False | Null |
* | False | True | False | True |
* | Null | False | False | Null |
* | Null | Null | Null | Null |
* | Null | True | Null | True |
* | True | False | False | True |
* | True | Null | Null | True |
* | True | True | True | True |
* +-------+-------+---------+--------+
*
* https://en.wikibooks.org/wiki/Structured_Query_Language/NULLs_and_the_Three_Valued_Logic
*/
template <typename Op, typename T>
bool testTernaryLogicTruthTable()
{
constexpr size_t size = 9;
Ternary::ResultType col_a_ternary[] = {Ternary::False, Ternary::False, Ternary::False, Ternary::Null, Ternary::Null, Ternary::Null, Ternary::True, Ternary::True, Ternary::True};
Ternary::ResultType col_b_ternary[] = {Ternary::False, Ternary::Null, Ternary::True, Ternary::False, Ternary::Null, Ternary::True,Ternary::False, Ternary::Null, Ternary::True};
Ternary::ResultType and_expected_ternary[] = {Ternary::False, Ternary::False, Ternary::False, Ternary::False, Ternary::Null, Ternary::Null,Ternary::False, Ternary::Null, Ternary::True};
Ternary::ResultType or_expected_ternary[] = {Ternary::False, Ternary::Null, Ternary::True, Ternary::Null, Ternary::Null, Ternary::True,Ternary::True, Ternary::True, Ternary::True};
Ternary::ResultType * expected_ternary;
if constexpr (std::is_same_v<Op, AndImpl>)
{
expected_ternary = and_expected_ternary;
}
else
{
expected_ternary = or_expected_ternary;
}
auto col_a = createColumnNullable<T>(col_a_ternary, size);
auto col_b = createColumnNullable<T>(col_b_ternary, size);
ColumnRawPtrs arguments = {col_a.get(), col_b.get()};
auto col_res = ColumnUInt8::create(size);
auto & col_res_data = col_res->getData();
OperationApplier<Op, AssociativeGenericApplierImpl>::apply(arguments, col_res->getData(), false);
for (size_t i = 0; i < size; ++i)
{
if (col_res_data[i] != expected_ternary[i]) return false;
}
return true;
}
template <typename Op, typename LeftColumn, typename RightColumn>
bool testTernaryLogicOfTwoColumns(size_t size)
{
LinearCongruentialGenerator gen;
TernaryValues left_column_ternary(size);
TernaryValues right_column_ternary(size);
TernaryValues expected_ternary(size);
ColumnPtr left = createRandomColumn<LeftColumn, UInt8>(gen, left_column_ternary);
ColumnPtr right = createRandomColumn<RightColumn, UInt8>(gen, right_column_ternary);
for (size_t i = 0; i < size; ++i)
{
/// Given that False is less than Null and Null is less than True, the And operation can be implemented
/// with std::min, and the Or operation can be implemented with std::max.
if constexpr (std::is_same_v<Op, AndImpl>)
{
expected_ternary[i] = std::min(left_column_ternary[i], right_column_ternary[i]);
}
else
{
expected_ternary[i] = std::max(left_column_ternary[i], right_column_ternary[i]);
}
}
ColumnRawPtrs arguments = {left.get(), right.get()};
auto col_res = ColumnUInt8::create(size);
auto & col_res_data = col_res->getData();
OperationApplier<Op, AssociativeGenericApplierImpl>::apply(arguments, col_res->getData(), false);
for (size_t i = 0; i < size; ++i)
{
if (col_res_data[i] != expected_ternary[i]) return false;
}
return true;
}
TEST(TernaryLogicTruthTable, NestedUInt8)
{
bool test_1 = testTernaryLogicTruthTable<AndImpl, UInt8>();
bool test_2 = testTernaryLogicTruthTable<OrImpl, UInt8>();
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}
TEST(TernaryLogicTruthTable, NestedUInt16)
{
bool test_1 = testTernaryLogicTruthTable<AndImpl, UInt16>();
bool test_2 = testTernaryLogicTruthTable<OrImpl, UInt16>();
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}
TEST(TernaryLogicTruthTable, NestedUInt32)
{
bool test_1 = testTernaryLogicTruthTable<AndImpl, UInt32>();
bool test_2 = testTernaryLogicTruthTable<OrImpl, UInt32>();
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}
TEST(TernaryLogicTruthTable, NestedUInt64)
{
bool test_1 = testTernaryLogicTruthTable<AndImpl, UInt64>();
bool test_2 = testTernaryLogicTruthTable<OrImpl, UInt64>();
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}
TEST(TernaryLogicTruthTable, NestedInt8)
{
bool test_1 = testTernaryLogicTruthTable<AndImpl, Int8>();
bool test_2 = testTernaryLogicTruthTable<OrImpl, Int8>();
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}
TEST(TernaryLogicTruthTable, NestedInt16)
{
bool test_1 = testTernaryLogicTruthTable<AndImpl, Int16>();
bool test_2 = testTernaryLogicTruthTable<OrImpl, Int16>();
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}
TEST(TernaryLogicTruthTable, NestedInt32)
{
bool test_1 = testTernaryLogicTruthTable<AndImpl, Int32>();
bool test_2 = testTernaryLogicTruthTable<OrImpl, Int32>();
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}
TEST(TernaryLogicTruthTable, NestedInt64)
{
bool test_1 = testTernaryLogicTruthTable<AndImpl, Int64>();
bool test_2 = testTernaryLogicTruthTable<OrImpl, Int64>();
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}
TEST(TernaryLogicTruthTable, NestedFloat32)
{
bool test_1 = testTernaryLogicTruthTable<AndImpl, Float32>();
bool test_2 = testTernaryLogicTruthTable<OrImpl, Float32>();
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}
TEST(TernaryLogicTruthTable, NestedFloat64)
{
bool test_1 = testTernaryLogicTruthTable<AndImpl, Float64>();
bool test_2 = testTernaryLogicTruthTable<OrImpl, Float64>();
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}
TEST(TernaryLogicTwoColumns, TwoNullable)
{
bool test_1 = testTernaryLogicOfTwoColumns<AndImpl, ColumnNullable, ColumnNullable>(100 /*size*/);
bool test_2 = testTernaryLogicOfTwoColumns<OrImpl, ColumnNullable, ColumnNullable>(100 /*size*/);
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}
TEST(TernaryLogicTwoColumns, TwoVector)
{
bool test_1 = testTernaryLogicOfTwoColumns<AndImpl, ColumnUInt8, ColumnUInt8>(100 /*size*/);
bool test_2 = testTernaryLogicOfTwoColumns<OrImpl, ColumnUInt8, ColumnUInt8>(100 /*size*/);
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}
TEST(TernaryLogicTwoColumns, TwoNothing)
{
bool test_1 = testTernaryLogicOfTwoColumns<AndImpl, ColumnNothing, ColumnNothing>(100 /*size*/);
bool test_2 = testTernaryLogicOfTwoColumns<OrImpl, ColumnNothing, ColumnNothing>(100 /*size*/);
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}
TEST(TernaryLogicTwoColumns, NullableVector)
{
bool test_1 = testTernaryLogicOfTwoColumns<AndImpl, ColumnNullable, ColumnUInt8>(100 /*size*/);
bool test_2 = testTernaryLogicOfTwoColumns<OrImpl, ColumnNullable, ColumnUInt8>(100 /*size*/);
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}
TEST(TernaryLogicTwoColumns, NullableNothing)
{
bool test_1 = testTernaryLogicOfTwoColumns<AndImpl, ColumnNullable, ColumnNothing>(100 /*size*/);
bool test_2 = testTernaryLogicOfTwoColumns<OrImpl, ColumnNullable, ColumnNothing>(100 /*size*/);
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}
TEST(TernaryLogicTwoColumns, VectorNothing)
{
bool test_1 = testTernaryLogicOfTwoColumns<AndImpl, ColumnUInt8, ColumnNothing>(100 /*size*/);
bool test_2 = testTernaryLogicOfTwoColumns<OrImpl, ColumnUInt8, ColumnNothing>(100 /*size*/);
ASSERT_EQ(test_1, true);
ASSERT_EQ(test_2, true);
}

View File

@ -340,9 +340,7 @@ SeekableReadBufferPtr ReadBufferS3Factory::getReader()
{
const auto next_range = range_generator.nextRange();
if (!next_range)
{
return nullptr;
}
auto reader = std::make_shared<ReadBufferFromS3>(
client_ptr,
@ -350,10 +348,11 @@ SeekableReadBufferPtr ReadBufferS3Factory::getReader()
key,
version_id,
request_settings,
read_settings,
read_settings.adjustBufferSize(object_size),
false /*use_external_buffer*/,
next_range->first,
next_range->second);
return reader;
}

View File

@ -35,6 +35,8 @@ namespace ErrorCodes
extern const int CANNOT_PARSE_DATE;
extern const int INCORRECT_DATA;
extern const int ATTEMPT_TO_READ_AFTER_EOF;
extern const int LOGICAL_ERROR;
extern const int BAD_ARGUMENTS;
}
template <typename IteratorSrc, typename IteratorDst>
@ -642,9 +644,10 @@ void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV &
const char delimiter = settings.delimiter;
const char maybe_quote = *buf.position();
const String & custom_delimiter = settings.custom_delimiter;
/// Emptiness and not even in quotation marks.
if (maybe_quote == delimiter)
if (custom_delimiter.empty() && maybe_quote == delimiter)
return;
if ((settings.allow_single_quotes && maybe_quote == '\'') || (settings.allow_double_quotes && maybe_quote == '"'))
@ -682,6 +685,42 @@ void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV &
}
else
{
/// If custom_delimiter is specified, we should read until first occurrences of
/// custom_delimiter in buffer.
if (!custom_delimiter.empty())
{
PeekableReadBuffer * peekable_buf = dynamic_cast<PeekableReadBuffer *>(&buf);
if (!peekable_buf)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Reading CSV string with custom delimiter is allowed only when using PeekableReadBuffer");
while (true)
{
if (peekable_buf->eof())
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected EOF while reading CSV string, expected custom delimiter \"{}\"", custom_delimiter);
char * next_pos = reinterpret_cast<char *>(memchr(peekable_buf->position(), custom_delimiter[0], peekable_buf->available()));
if (!next_pos)
next_pos = peekable_buf->buffer().end();
appendToStringOrVector(s, *peekable_buf, next_pos);
peekable_buf->position() = next_pos;
if (!buf.hasPendingData())
continue;
{
PeekableReadBufferCheckpoint checkpoint{*peekable_buf, true};
if (checkString(custom_delimiter, *peekable_buf))
return;
}
s.push_back(*peekable_buf->position());
++peekable_buf->position();
}
return;
}
/// Unquoted case. Look for delimiter or \r or \n.
while (!buf.eof())
{
@ -776,6 +815,72 @@ void readCSVField(String & s, ReadBuffer & buf, const FormatSettings::CSV & sett
s.push_back(quote);
}
void readCSVWithTwoPossibleDelimitersImpl(String & s, PeekableReadBuffer & buf, const String & first_delimiter, const String & second_delimiter)
{
/// Check that delimiters are not empty.
if (first_delimiter.empty() || second_delimiter.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read CSV field with two possible delimiters, one of delimiters '{}' and '{}' is empty", first_delimiter, second_delimiter);
/// Read all data until first_delimiter or second_delimiter
while (true)
{
if (buf.eof())
throw Exception(ErrorCodes::INCORRECT_DATA, R"(Unexpected EOF while reading CSV string, expected on of delimiters "{}" or "{}")", first_delimiter, second_delimiter);
char * next_pos = buf.position();
while (next_pos != buf.buffer().end() && *next_pos != first_delimiter[0] && *next_pos != second_delimiter[0])
++next_pos;
appendToStringOrVector(s, buf, next_pos);
buf.position() = next_pos;
if (!buf.hasPendingData())
continue;
if (*buf.position() == first_delimiter[0])
{
PeekableReadBufferCheckpoint checkpoint(buf, true);
if (checkString(first_delimiter, buf))
return;
}
if (*buf.position() == second_delimiter[0])
{
PeekableReadBufferCheckpoint checkpoint(buf, true);
if (checkString(second_delimiter, buf))
return;
}
s.push_back(*buf.position());
++buf.position();
}
}
String readCSVStringWithTwoPossibleDelimiters(PeekableReadBuffer & buf, const FormatSettings::CSV & settings, const String & first_delimiter, const String & second_delimiter)
{
String res;
/// If value is quoted, use regular CSV reading since we need to read only data inside quotes.
if (!buf.eof() && ((settings.allow_single_quotes && *buf.position() == '\'') || (settings.allow_double_quotes && *buf.position() == '"')))
readCSVStringInto(res, buf, settings);
else
readCSVWithTwoPossibleDelimitersImpl(res, buf, first_delimiter, second_delimiter);
return res;
}
String readCSVFieldWithTwoPossibleDelimiters(PeekableReadBuffer & buf, const FormatSettings::CSV & settings, const String & first_delimiter, const String & second_delimiter)
{
String res;
/// If value is quoted, use regular CSV reading since we need to read only data inside quotes.
if (!buf.eof() && ((settings.allow_single_quotes && *buf.position() == '\'') || (settings.allow_double_quotes && *buf.position() == '"')))
readCSVField(res, buf, settings);
else
readCSVWithTwoPossibleDelimitersImpl(res, buf, first_delimiter, second_delimiter);
return res;
}
template void readCSVStringInto<PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf, const FormatSettings::CSV & settings);
template void readCSVStringInto<NullOutput>(NullOutput & s, ReadBuffer & buf, const FormatSettings::CSV & settings);

View File

@ -558,9 +558,10 @@ void readStringUntilWhitespace(String & s, ReadBuffer & buf);
* - string could be placed in quotes; quotes could be single: ' if FormatSettings::CSV::allow_single_quotes is true
* or double: " if FormatSettings::CSV::allow_double_quotes is true;
* - or string could be unquoted - this is determined by first character;
* - if string is unquoted, then it is read until next delimiter,
* either until end of line (CR or LF),
* or until end of stream;
* - if string is unquoted, then:
* - If settings.custom_delimiter is not specified, it is read until next settings.delimiter, either until end of line (CR or LF) or until end of stream;
* - If settings.custom_delimiter is specified it reads until first occurrences of settings.custom_delimiter in buffer.
* This works only if provided buffer is PeekableReadBuffer.
* but spaces and tabs at begin and end of unquoted string are consumed but ignored (note that this behaviour differs from RFC).
* - if string is in quotes, then it will be read until closing quote,
* but sequences of two consecutive quotes are parsed as single quote inside string;
@ -570,6 +571,13 @@ void readCSVString(String & s, ReadBuffer & buf, const FormatSettings::CSV & set
/// Differ from readCSVString in that it doesn't remove quotes around field if any.
void readCSVField(String & s, ReadBuffer & buf, const FormatSettings::CSV & settings);
/// Read string in CSV format until the first occurrence of first_delimiter or second_delimiter.
/// Similar to readCSVString if string is in quotes, we read only data in quotes.
String readCSVStringWithTwoPossibleDelimiters(PeekableReadBuffer & buf, const FormatSettings::CSV & settings, const String & first_delimiter, const String & second_delimiter);
/// Same as above but includes quotes in the result if any.
String readCSVFieldWithTwoPossibleDelimiters(PeekableReadBuffer & buf, const FormatSettings::CSV & settings, const String & first_delimiter, const String & second_delimiter);
/// Read and append result to array of characters.
template <typename Vector>
void readStringInto(Vector & s, ReadBuffer & buf);

View File

@ -851,8 +851,12 @@ namespace S3
quoteString(bucket), !uri.empty() ? " (" + uri.toString() + ")" : "");
}
bool isNotFoundError(Aws::S3::S3Errors error)
{
return error == Aws::S3::S3Errors::RESOURCE_NOT_FOUND || error == Aws::S3::S3Errors::NO_SUCH_KEY;
}
S3::ObjectInfo getObjectInfo(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3)
Aws::S3::Model::HeadObjectOutcome headObject(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3)
{
ProfileEvents::increment(ProfileEvents::S3HeadObject);
if (for_disk_s3)
@ -865,7 +869,12 @@ namespace S3
if (!version_id.empty())
req.SetVersionId(version_id);
Aws::S3::Model::HeadObjectOutcome outcome = client.HeadObject(req);
return client.HeadObject(req);
}
S3::ObjectInfo getObjectInfo(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3)
{
auto outcome = headObject(client, bucket, key, version_id, for_disk_s3);
if (outcome.IsSuccess())
{
@ -874,7 +883,10 @@ namespace S3
}
else if (throw_on_error)
{
throw DB::Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
const auto & error = outcome.GetError();
throw DB::Exception(ErrorCodes::S3_ERROR,
"Failed to HEAD object: {}. HTTP response code: {}",
error.GetMessage(), static_cast<size_t>(error.GetResponseCode()));
}
return {};
}
@ -884,6 +896,21 @@ namespace S3
return getObjectInfo(client, bucket, key, version_id, throw_on_error, for_disk_s3).size;
}
bool objectExists(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3)
{
auto outcome = headObject(client, bucket, key, version_id, for_disk_s3);
if (outcome.IsSuccess())
return true;
const auto & error = outcome.GetError();
if (isNotFoundError(error.GetErrorType()))
return false;
throw S3Exception(error.GetErrorType(),
"Failed to check existence of key {} in bucket {}: {}",
key, bucket, error.GetMessage());
}
}
}

View File

@ -13,20 +13,17 @@
#include <base/types.h>
#include <aws/core/Aws.h>
#include <aws/core/client/ClientConfiguration.h>
#include <aws/s3/S3Client.h>
#include <aws/s3/S3Errors.h>
#include <Poco/URI.h>
#include <Common/Exception.h>
#include <Common/Throttler_fwd.h>
namespace Aws::S3
{
class S3Client;
}
namespace DB
{
namespace ErrorCodes
{
extern const int S3_ERROR;
@ -130,16 +127,22 @@ struct ObjectInfo
time_t last_modification_time = 0;
};
bool isNotFoundError(Aws::S3::S3Errors error);
Aws::S3::Model::HeadObjectOutcome headObject(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id = "", bool for_disk_s3 = false);
S3::ObjectInfo getObjectInfo(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3);
size_t getObjectSize(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3);
bool objectExists(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id = "", bool for_disk_s3 = false);
}
#endif
namespace Poco::Util
{
class AbstractConfiguration;
class AbstractConfiguration;
};
namespace DB::S3

View File

@ -179,17 +179,7 @@ void WriteBufferFromS3::finalizeImpl()
{
LOG_TRACE(log, "Checking object {} exists after upload", key);
Aws::S3::Model::HeadObjectRequest request;
request.SetBucket(bucket);
request.SetKey(key);
ProfileEvents::increment(ProfileEvents::S3HeadObject);
if (write_settings.for_object_storage)
ProfileEvents::increment(ProfileEvents::DiskS3HeadObject);
auto response = client_ptr->HeadObject(request);
auto response = S3::headObject(*client_ptr, bucket, key, "", write_settings.for_object_storage);
if (!response.IsSuccess())
throw S3Exception(fmt::format("Object {} from bucket {} disappeared immediately after upload, it's a bug in S3 or S3 API.", key, bucket), response.GetError().GetErrorType());
else

View File

@ -40,13 +40,18 @@ BlockIO InterpreterCreateRoleQuery::execute()
else
getContext()->checkAccess(AccessType::CREATE_ROLE);
if (!query.cluster.empty())
return executeDDLQueryOnCluster(query_ptr, getContext());
std::optional<SettingsProfileElements> settings_from_query;
if (query.settings)
{
settings_from_query = SettingsProfileElements{*query.settings, access_control};
if (!query.attach)
getContext()->checkSettingsConstraints(*settings_from_query);
}
if (!query.cluster.empty())
return executeDDLQueryOnCluster(query_ptr, getContext());
if (query.alter)
{
auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr

View File

@ -48,16 +48,21 @@ BlockIO InterpreterCreateSettingsProfileQuery::execute()
else
getContext()->checkAccess(AccessType::CREATE_SETTINGS_PROFILE);
std::optional<SettingsProfileElements> settings_from_query;
if (query.settings)
{
settings_from_query = SettingsProfileElements{*query.settings, access_control};
if (!query.attach)
getContext()->checkSettingsConstraints(*settings_from_query);
}
if (!query.cluster.empty())
{
query.replaceCurrentUserTag(getContext()->getUserName());
return executeDDLQueryOnCluster(query_ptr, getContext());
}
std::optional<SettingsProfileElements> settings_from_query;
if (query.settings)
settings_from_query = SettingsProfileElements{*query.settings, access_control};
std::optional<RolesOrUsersSet> roles_from_query;
if (query.to_roles)
roles_from_query = RolesOrUsersSet{*query.to_roles, access_control, getContext()->getUserID()};

View File

@ -124,12 +124,19 @@ BlockIO InterpreterCreateUserQuery::execute()
access->checkAdminOption(role);
}
}
if (!query.cluster.empty())
return executeDDLQueryOnCluster(query_ptr, getContext());
std::optional<SettingsProfileElements> settings_from_query;
if (query.settings)
{
settings_from_query = SettingsProfileElements{*query.settings, access_control};
if (!query.attach)
getContext()->checkSettingsConstraints(*settings_from_query);
}
if (!query.cluster.empty())
return executeDDLQueryOnCluster(query_ptr, getContext());
if (query.alter)
{
std::optional<RolesOrUsersSet> grantees_from_query;

View File

@ -1107,8 +1107,10 @@ void FileCache::reduceSizeToDownloaded(
file_segment->getInfoForLogUnlocked(segment_lock));
}
CreateFileSegmentSettings create_settings{ .is_persistent = file_segment->is_persistent };
cell->file_segment = std::make_shared<FileSegment>(
offset, downloaded_size, key, this, FileSegment::State::DOWNLOADED, CreateFileSegmentSettings{});
offset, downloaded_size, key, this, FileSegment::State::DOWNLOADED, create_settings);
assert(file_segment->reserved_size == downloaded_size);
}

View File

@ -56,6 +56,7 @@ FileSegment::FileSegment(
{
reserved_size = downloaded_size = size_;
is_downloaded = true;
chassert(std::filesystem::file_size(getPathInLocalCache()) == size_);
break;
}
case (State::SKIP_CACHE):
@ -331,6 +332,8 @@ void FileSegment::write(const char * from, size_t size, size_t offset)
cache_writer->next();
downloaded_size += size;
chassert(std::filesystem::file_size(getPathInLocalCache()) == downloaded_size);
}
catch (Exception & e)
{
@ -345,9 +348,7 @@ void FileSegment::write(const char * from, size_t size, size_t offset)
throw;
}
#ifndef NDEBUG
chassert(getFirstNonDownloadedOffset() == offset + size);
#endif
}
FileSegment::State FileSegment::wait()
@ -545,6 +546,13 @@ void FileSegment::completeBasedOnCurrentState(std::lock_guard<std::mutex> & cach
resetDownloaderUnlocked(segment_lock);
}
if (cache_writer && (is_downloader || is_last_holder))
{
cache_writer->finalize();
cache_writer.reset();
remote_file_reader.reset();
}
switch (download_state)
{
case State::SKIP_CACHE:
@ -557,8 +565,9 @@ void FileSegment::completeBasedOnCurrentState(std::lock_guard<std::mutex> & cach
case State::DOWNLOADED:
{
chassert(getDownloadedSizeUnlocked(segment_lock) == range().size());
assert(is_downloaded);
assert(!cache_writer);
chassert(getDownloadedSizeUnlocked(segment_lock) == std::filesystem::file_size(getPathInLocalCache()));
chassert(is_downloaded);
chassert(!cache_writer);
break;
}
case State::DOWNLOADING:

View File

@ -1254,6 +1254,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
if (select_query_hint && getSettingsRef().use_structure_from_insertion_table_in_table_functions == 2)
{
const auto * expression_list = select_query_hint->select()->as<ASTExpressionList>();
std::unordered_set<String> virtual_column_names = table_function_ptr->getVirtualsToCheckBeforeUsingStructureHint();
Names columns_names;
bool have_asterisk = false;
/// First, check if we have only identifiers, asterisk and literals in select expression,
@ -1275,10 +1276,10 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
}
}
/// Check that all identifiers are column names from insertion table.
/// Check that all identifiers are column names from insertion table and not virtual column names from storage.
for (const auto & column_name : columns_names)
{
if (!structure_hint.has(column_name))
if (!structure_hint.has(column_name) || virtual_column_names.contains(column_name))
{
use_columns_from_insert_query = false;
break;
@ -1411,6 +1412,11 @@ void Context::applySettingsChanges(const SettingsChanges & changes)
}
void Context::checkSettingsConstraints(const SettingsProfileElements & profile_elements) const
{
getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, profile_elements);
}
void Context::checkSettingsConstraints(const SettingChange & change) const
{
getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, change);

View File

@ -110,6 +110,7 @@ class AccessControl;
class Credentials;
class GSSAcceptorContext;
struct SettingsConstraintsAndProfileIDs;
class SettingsProfileElements;
class RemoteHostFilter;
struct StorageID;
class IDisk;
@ -658,6 +659,7 @@ public:
void applySettingsChanges(const SettingsChanges & changes);
/// Checks the constraints.
void checkSettingsConstraints(const SettingsProfileElements & profile_elements) const;
void checkSettingsConstraints(const SettingChange & change) const;
void checkSettingsConstraints(const SettingsChanges & changes) const;
void checkSettingsConstraints(SettingsChanges & changes) const;

View File

@ -3,7 +3,7 @@
#include <Access/ContextAccess.h>
#include <Interpreters/Context.h>
#include <Interpreters/executeDDLQueryOnCluster.h>
#include <Storages/NamedCollectionUtils.h>
#include <Storages/NamedCollections/NamedCollectionUtils.h>
namespace DB

View File

@ -4,7 +4,7 @@
#include <Access/ContextAccess.h>
#include <Interpreters/Context.h>
#include <Interpreters/executeDDLQueryOnCluster.h>
#include <Storages/NamedCollectionUtils.h>
#include <Storages/NamedCollections/NamedCollectionUtils.h>
namespace DB

View File

@ -3,7 +3,7 @@
#include <Access/ContextAccess.h>
#include <Interpreters/Context.h>
#include <Interpreters/executeDDLQueryOnCluster.h>
#include <Storages/NamedCollectionUtils.h>
#include <Storages/NamedCollections/NamedCollectionUtils.h>
namespace DB

View File

@ -112,6 +112,11 @@ QueryPlan && InterpreterSelectQueryAnalyzer::extractQueryPlan() &&
return std::move(planner).extractQueryPlan();
}
void InterpreterSelectQueryAnalyzer::addStorageLimits(const StorageLimitsList & storage_limits)
{
planner.addStorageLimits(storage_limits);
}
void InterpreterSelectQueryAnalyzer::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr) const
{
elem.query_kind = "Select";

View File

@ -41,6 +41,8 @@ public:
QueryPlan && extractQueryPlan() &&;
void addStorageLimits(const StorageLimitsList & storage_limits);
bool supportsTransactions() const override { return true; }
bool ignoreLimits() const override { return select_query_options.ignore_limits; }

View File

@ -97,6 +97,9 @@ void TraceCollector::run()
Int64 size;
readPODBinary(size, in);
UInt64 ptr;
readPODBinary(ptr, in);
ProfileEvents::Event event;
readPODBinary(event, in);
@ -112,7 +115,7 @@ void TraceCollector::run()
UInt64 time = static_cast<UInt64>(ts.tv_sec * 1000000000LL + ts.tv_nsec);
UInt64 time_in_microseconds = static_cast<UInt64>((ts.tv_sec * 1000000LL) + (ts.tv_nsec / 1000));
TraceLogElement element{time_t(time / 1000000000), time_in_microseconds, time, trace_type, thread_id, query_id, trace, size, event, increment};
TraceLogElement element{time_t(time / 1000000000), time_in_microseconds, time, trace_type, thread_id, query_id, trace, size, ptr, event, increment};
trace_log->add(element);
}
}

View File

@ -38,6 +38,7 @@ NamesAndTypesList TraceLogElement::getNamesAndTypes()
{"query_id", std::make_shared<DataTypeString>()},
{"trace", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())},
{"size", std::make_shared<DataTypeInt64>()},
{"ptr", std::make_shared<DataTypeUInt64>()},
{"event", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
{"increment", std::make_shared<DataTypeInt64>()},
};
@ -57,6 +58,7 @@ void TraceLogElement::appendToBlock(MutableColumns & columns) const
columns[i++]->insertData(query_id.data(), query_id.size());
columns[i++]->insert(trace);
columns[i++]->insert(size);
columns[i++]->insert(ptr);
String event_name;
if (event != ProfileEvents::end())

View File

@ -27,8 +27,10 @@ struct TraceLogElement
UInt64 thread_id{};
String query_id{};
Array trace{};
/// Allocation size in bytes for TraceType::Memory.
/// Allocation size in bytes for TraceType::Memory and TraceType::MemorySample.
Int64 size{};
/// Allocation ptr for TraceType::MemorySample.
UInt64 ptr{};
/// ProfileEvent for TraceType::ProfileEvent.
ProfileEvents::Event event{ProfileEvents::end()};
/// Increment of profile event for TraceType::ProfileEvent.

View File

@ -80,7 +80,7 @@ void complete(const DB::FileSegmentsHolder & holder)
{
ASSERT_TRUE(file_segment->getOrSetDownloader() == DB::FileSegment::getCallerId());
prepareAndDownload(file_segment);
file_segment->completeWithState(DB::FileSegment::State::DOWNLOADED);
file_segment->completeWithoutState();
}
}
@ -127,7 +127,7 @@ TEST(FileCache, get)
assertRange(2, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADING);
download(segments[0]);
segments[0]->completeWithState(DB::FileSegment::State::DOWNLOADED);
segments[0]->completeWithoutState();
assertRange(3, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADED);
}
@ -148,7 +148,7 @@ TEST(FileCache, get)
ASSERT_TRUE(segments[1]->getOrSetDownloader() == DB::FileSegment::getCallerId());
prepareAndDownload(segments[1]);
segments[1]->completeWithState(DB::FileSegment::State::DOWNLOADED);
segments[1]->completeWithoutState();
assertRange(6, segments[1], DB::FileSegment::Range(10, 14), DB::FileSegment::State::DOWNLOADED);
}
@ -205,7 +205,7 @@ TEST(FileCache, get)
ASSERT_TRUE(segments[2]->getOrSetDownloader() == DB::FileSegment::getCallerId());
prepareAndDownload(segments[2]);
segments[2]->completeWithState(DB::FileSegment::State::DOWNLOADED);
segments[2]->completeWithoutState();
assertRange(14, segments[3], DB::FileSegment::Range(17, 20), DB::FileSegment::State::DOWNLOADED);
@ -246,7 +246,7 @@ TEST(FileCache, get)
ASSERT_TRUE(segments[3]->getOrSetDownloader() == DB::FileSegment::getCallerId());
prepareAndDownload(segments[3]);
segments[3]->completeWithState(DB::FileSegment::State::DOWNLOADED);
segments[3]->completeWithoutState();
ASSERT_TRUE(segments[3]->state() == DB::FileSegment::State::DOWNLOADED);
}
@ -269,8 +269,8 @@ TEST(FileCache, get)
ASSERT_TRUE(segments[2]->getOrSetDownloader() == DB::FileSegment::getCallerId());
prepareAndDownload(segments[0]);
prepareAndDownload(segments[2]);
segments[0]->completeWithState(DB::FileSegment::State::DOWNLOADED);
segments[2]->completeWithState(DB::FileSegment::State::DOWNLOADED);
segments[0]->completeWithoutState();
segments[2]->completeWithoutState();
}
/// Current cache: [____][_] [][___][__]
@ -292,8 +292,8 @@ TEST(FileCache, get)
ASSERT_TRUE(s1[0]->getOrSetDownloader() == DB::FileSegment::getCallerId());
prepareAndDownload(s5[0]);
prepareAndDownload(s1[0]);
s5[0]->completeWithState(DB::FileSegment::State::DOWNLOADED);
s1[0]->completeWithState(DB::FileSegment::State::DOWNLOADED);
s5[0]->completeWithoutState();
s1[0]->completeWithoutState();
/// Current cache: [___] [_][___][_] [__]
/// ^ ^ ^ ^ ^ ^ ^ ^
@ -395,7 +395,7 @@ TEST(FileCache, get)
}
prepareAndDownload(segments[2]);
segments[2]->completeWithState(DB::FileSegment::State::DOWNLOADED);
segments[2]->completeWithoutState();
ASSERT_TRUE(segments[2]->state() == DB::FileSegment::State::DOWNLOADED);
other_1.join();
@ -460,7 +460,7 @@ TEST(FileCache, get)
ASSERT_TRUE(segments_2[1]->getOrSetDownloader() == DB::FileSegment::getCallerId());
prepareAndDownload(segments_2[1]);
segments_2[1]->completeWithState(DB::FileSegment::State::DOWNLOADED);
segments_2[1]->completeWithoutState();
});
{

View File

@ -1,44 +0,0 @@
#include "threadPoolCallbackRunner.h"
#include <Common/scope_guard_safe.h>
#include <Common/CurrentThread.h>
#include <Common/setThreadName.h>
#include <IO/AsynchronousReader.h>
#include <future>
namespace DB
{
template <typename Result> ThreadPoolCallbackRunner<Result> threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name)
{
return [pool = &pool, thread_group = CurrentThread::getGroup(), thread_name](std::function<Result()> && callback, size_t priority) mutable -> std::future<Result>
{
auto task = std::make_shared<std::packaged_task<Result()>>([thread_group, thread_name, callback = std::move(callback)]() -> Result
{
if (thread_group)
CurrentThread::attachTo(thread_group);
SCOPE_EXIT_SAFE({
if (thread_group)
CurrentThread::detachQueryIfNotDetached();
});
setThreadName(thread_name.data());
return callback();
});
auto future = task->get_future();
/// ThreadPool is using "bigger is higher priority" instead of "smaller is more priority".
pool->scheduleOrThrow([task]{ (*task)(); }, -priority);
return future;
};
}
template ThreadPoolCallbackRunner<void> threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name);
template ThreadPoolCallbackRunner<IAsynchronousReader::Result> threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name);
}

View File

@ -1,9 +1,11 @@
#pragma once
#include <Common/ThreadPool.h>
#include <Common/scope_guard_safe.h>
#include <Common/CurrentThread.h>
#include <Common/setThreadName.h>
#include <future>
namespace DB
{
@ -13,6 +15,32 @@ using ThreadPoolCallbackRunner = std::function<std::future<Result>(std::function
/// Creates CallbackRunner that runs every callback with 'pool->scheduleOrThrow()'.
template <typename Result>
ThreadPoolCallbackRunner<Result> threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name);
ThreadPoolCallbackRunner<Result> threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name)
{
return [pool = &pool, thread_group = CurrentThread::getGroup(), thread_name](std::function<Result()> && callback, size_t priority) mutable -> std::future<Result>
{
auto task = std::make_shared<std::packaged_task<Result()>>([thread_group, thread_name, callback = std::move(callback)]() -> Result
{
if (thread_group)
CurrentThread::attachTo(thread_group);
SCOPE_EXIT_SAFE({
if (thread_group)
CurrentThread::detachQueryIfNotDetached();
});
setThreadName(thread_name.data());
return callback();
});
auto future = task->get_future();
/// ThreadPool is using "bigger is higher priority" instead of "smaller is more priority".
pool->scheduleOrThrow([task]{ (*task)(); }, -priority);
return future;
};
}
}

View File

@ -365,9 +365,9 @@ void Planner::buildQueryPlanIfNeeded()
select_query_info.query = select_query_info.original_query;
select_query_info.planner_context = planner_context;
StorageLimitsList storage_limits;
storage_limits.push_back(buildStorageLimits(*query_context, select_query_options));
select_query_info.storage_limits = std::make_shared<StorageLimitsList>(storage_limits);
auto current_storage_limits = storage_limits;
current_storage_limits.push_back(buildStorageLimits(*query_context, select_query_options));
select_query_info.storage_limits = std::make_shared<StorageLimitsList>(std::move(current_storage_limits));
collectTableExpressionData(query_tree, *planner_context);
checkStoragesSupportTransactions(planner_context);
@ -847,4 +847,10 @@ void Planner::buildQueryPlanIfNeeded()
extendQueryContextAndStoragesLifetime(query_plan, planner_context);
}
void Planner::addStorageLimits(const StorageLimitsList & limits)
{
for (const auto & limit : limits)
storage_limits.push_back(limit);
}
}

View File

@ -45,11 +45,14 @@ public:
return std::move(query_plan);
}
void addStorageLimits(const StorageLimitsList & limits);
private:
QueryTreeNodePtr query_tree;
QueryPlan query_plan;
SelectQueryOptions select_query_options;
PlannerContextPtr planner_context;
StorageLimitsList storage_limits;
};
}

View File

@ -12,16 +12,6 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
}
static FormatSettings updateFormatSettings(const FormatSettings & settings)
{
if (settings.custom.escaping_rule != FormatSettings::EscapingRule::CSV || settings.custom.field_delimiter.empty())
return settings;
auto updated = settings;
updated.csv.delimiter = settings.custom.field_delimiter.front();
return updated;
}
CustomSeparatedRowInputFormat::CustomSeparatedRowInputFormat(
const Block & header_,
ReadBuffer & in_buf_,
@ -31,7 +21,7 @@ CustomSeparatedRowInputFormat::CustomSeparatedRowInputFormat(
bool ignore_spaces_,
const FormatSettings & format_settings_)
: CustomSeparatedRowInputFormat(
header_, std::make_unique<PeekableReadBuffer>(in_buf_), params_, with_names_, with_types_, ignore_spaces_, updateFormatSettings(format_settings_))
header_, std::make_unique<PeekableReadBuffer>(in_buf_), params_, with_names_, with_types_, ignore_spaces_, format_settings_)
{
}
@ -171,15 +161,31 @@ bool CustomSeparatedFormatReader::checkEndOfRow()
}
template <bool is_header>
String CustomSeparatedFormatReader::readFieldIntoString(bool is_first)
String CustomSeparatedFormatReader::readFieldIntoString(bool is_first, bool is_last, bool is_unknown)
{
if (!is_first)
skipFieldDelimiter();
skipSpaces();
updateFormatSettings(is_last);
if constexpr (is_header)
{
/// If the number of columns is unknown and we use CSV escaping rule,
/// we don't know what delimiter to expect after the value,
/// so we should read until we meet field_delimiter or row_after_delimiter.
if (is_unknown && format_settings.custom.escaping_rule == FormatSettings::EscapingRule::CSV)
return readCSVStringWithTwoPossibleDelimiters(
*buf, format_settings.csv, format_settings.custom.field_delimiter, format_settings.custom.row_after_delimiter);
return readStringByEscapingRule(*buf, format_settings.custom.escaping_rule, format_settings);
}
else
{
if (is_unknown && format_settings.custom.escaping_rule == FormatSettings::EscapingRule::CSV)
return readCSVFieldWithTwoPossibleDelimiters(
*buf, format_settings.csv, format_settings.custom.field_delimiter, format_settings.custom.row_after_delimiter);
return readFieldByEscapingRule(*buf, format_settings.custom.escaping_rule, format_settings);
}
}
template <bool is_header>
@ -192,14 +198,14 @@ std::vector<String> CustomSeparatedFormatReader::readRowImpl()
{
do
{
values.push_back(readFieldIntoString<is_header>(values.empty()));
values.push_back(readFieldIntoString<is_header>(values.empty(), false, true));
} while (!checkEndOfRow());
columns = values.size();
}
else
{
for (size_t i = 0; i != columns; ++i)
values.push_back(readFieldIntoString<is_header>(i == 0));
values.push_back(readFieldIntoString<is_header>(i == 0, i + 1 == columns, false));
}
skipRowEndDelimiter();
@ -223,9 +229,41 @@ void CustomSeparatedFormatReader::skipHeaderRow()
skipRowEndDelimiter();
}
bool CustomSeparatedFormatReader::readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool, const String &)
void CustomSeparatedFormatReader::updateFormatSettings(bool is_last_column)
{
if (format_settings.custom.escaping_rule != FormatSettings::EscapingRule::CSV)
return;
/// Clean custom delimiter from previous delimiter.
format_settings.csv.custom_delimiter.clear();
/// If delimiter has length = 1, it will be more efficient to use csv.delimiter.
/// If we have some complex delimiter, normal CSV reading will now work properly if we will
/// use just the first character of delimiter (for example, if delimiter='||' and we have data 'abc|d||')
/// We have special implementation for such case that uses custom delimiter, it's not so efficient,
/// but works properly.
if (is_last_column)
{
/// If field delimiter has length = 1, it will be more efficient to use csv.delimiter.
if (format_settings.custom.row_after_delimiter.size() == 1)
format_settings.csv.delimiter = format_settings.custom.row_after_delimiter.front();
else
format_settings.csv.custom_delimiter = format_settings.custom.row_after_delimiter;
}
else
{
if (format_settings.custom.field_delimiter.size() == 1)
format_settings.csv.delimiter = format_settings.custom.field_delimiter.front();
else
format_settings.csv.custom_delimiter = format_settings.custom.field_delimiter;
}
}
bool CustomSeparatedFormatReader::readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool is_last_file_column, const String &)
{
skipSpaces();
updateFormatSettings(is_last_file_column);
return deserializeFieldByEscapingRule(type, serialization, column, *buf, format_settings.custom.escaping_rule, format_settings);
}
@ -237,6 +275,8 @@ bool CustomSeparatedFormatReader::checkForSuffixImpl(bool check_eof)
if (!check_eof)
return false;
/// Allow optional \n before eof.
checkChar('\n', *buf);
return buf->eof();
}
@ -246,6 +286,8 @@ bool CustomSeparatedFormatReader::checkForSuffixImpl(bool check_eof)
if (!check_eof)
return true;
/// Allow optional \n before eof.
checkChar('\n', *buf);
if (buf->eof())
return true;
}
@ -312,7 +354,7 @@ CustomSeparatedSchemaReader::CustomSeparatedSchemaReader(
&reader,
getDefaultDataTypeForEscapingRule(format_setting_.custom.escaping_rule))
, buf(in_)
, reader(buf, ignore_spaces_, updateFormatSettings(format_setting_))
, reader(buf, ignore_spaces_, format_setting_)
{
}

View File

@ -83,7 +83,9 @@ private:
std::vector<String> readRowImpl();
template <bool read_string>
String readFieldIntoString(bool is_first);
String readFieldIntoString(bool is_first, bool is_last, bool is_unknown);
void updateFormatSettings(bool is_last_column);
PeekableReadBuffer * buf;
bool ignore_spaces;

View File

@ -25,6 +25,27 @@ namespace ErrorCodes
ErrorCodes::CANNOT_READ_ALL_DATA);
}
static void updateFormatSettingsIfNeeded(FormatSettings::EscapingRule escaping_rule, FormatSettings & settings, const ParsedTemplateFormatString & row_format, char default_csv_delimiter, size_t file_column)
{
if (escaping_rule != FormatSettings::EscapingRule::CSV)
return;
/// Clean custom_delimiter from previous column.
settings.csv.custom_delimiter.clear();
/// If field delimiter is empty, we read until default csv delimiter.
if (row_format.delimiters[file_column + 1].empty())
settings.csv.delimiter = default_csv_delimiter;
/// If field delimiter has length = 1, it will be more efficient to use csv.delimiter.
else if (row_format.delimiters[file_column + 1].size() == 1)
settings.csv.delimiter = row_format.delimiters[file_column + 1].front();
/// If we have some complex delimiter, normal CSV reading will now work properly if we will
/// use the first character of delimiter (for example, if delimiter='||' and we have data 'abc|d||')
/// We have special implementation for such case that uses custom delimiter, it's not so efficient,
/// but works properly.
else
settings.csv.custom_delimiter = row_format.delimiters[file_column + 1];
}
TemplateRowInputFormat::TemplateRowInputFormat(
const Block & header_,
ReadBuffer & in_,
@ -129,10 +150,8 @@ bool TemplateRowInputFormat::deserializeField(const DataTypePtr & type,
const SerializationPtr & serialization, IColumn & column, size_t file_column)
{
EscapingRule escaping_rule = row_format.escaping_rules[file_column];
if (escaping_rule == EscapingRule::CSV)
/// Will read unquoted string until settings.csv.delimiter
settings.csv.delimiter = row_format.delimiters[file_column + 1].empty() ? default_csv_delimiter :
row_format.delimiters[file_column + 1].front();
updateFormatSettingsIfNeeded(escaping_rule, settings, row_format, default_csv_delimiter, file_column);
try
{
return deserializeFieldByEscapingRule(type, serialization, column, *buf, escaping_rule, settings);
@ -466,6 +485,7 @@ TemplateSchemaReader::TemplateSchemaReader(
, format(format_)
, row_format(row_format_)
, format_reader(buf, ignore_spaces_, format, row_format, row_between_delimiter, format_settings)
, default_csv_delimiter(format_settings_.csv.delimiter)
{
setColumnNames(row_format.column_names);
}
@ -489,9 +509,7 @@ DataTypes TemplateSchemaReader::readRowAndGetDataTypes()
for (size_t i = 0; i != row_format.columnsCount(); ++i)
{
format_reader.skipDelimiter(i);
if (row_format.escaping_rules[i] == FormatSettings::EscapingRule::CSV)
format_settings.csv.delimiter = row_format.delimiters[i + 1].empty() ? format_settings.csv.delimiter : row_format.delimiters[i + 1].front();
updateFormatSettingsIfNeeded(row_format.escaping_rules[i], format_settings, row_format, default_csv_delimiter, i);
field = readFieldByEscapingRule(buf, row_format.escaping_rules[i], format_settings);
data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, row_format.escaping_rules[i]));
}

View File

@ -128,6 +128,7 @@ private:
const ParsedTemplateFormatString row_format;
TemplateFormatReader format_reader;
bool first_row = true;
const char default_csv_delimiter;
};
bool parseDelimiterWithDiagnosticInfo(WriteBuffer & out, ReadBuffer & buf, const String & delimiter, const String & description, bool skip_spaces);

View File

@ -111,7 +111,7 @@ public:
protected:
ReadBuffer * in;
const FormatSettings format_settings;
FormatSettings format_settings;
};
/// Base class for schema inference for formats with -WithNames and -WithNamesAndTypes suffixes.

View File

@ -737,14 +737,20 @@ void TCPHandler::processOrdinaryQueryWithProcessors()
auto & pipeline = state.io.pipeline;
if (query_context->getSettingsRef().allow_experimental_query_deduplication)
{
std::lock_guard lock(task_callback_mutex);
sendPartUUIDs();
}
/// Send header-block, to allow client to prepare output format for data to send.
{
const auto & header = pipeline.getHeader();
if (header)
{
std::lock_guard lock(task_callback_mutex);
sendData(header);
}
}
{

View File

@ -46,6 +46,17 @@ class MarkCache;
class UncompressedCache;
class MergeTreeTransaction;
enum class DataPartRemovalState
{
NOT_ATTEMPTED,
VISIBLE_TO_TRANSACTIONS,
NON_UNIQUE_OWNERSHIP,
NOT_REACHED_REMOVAL_TIME,
HAS_SKIPPED_MUTATION_PARENT,
REMOVED,
};
/// Description of the data part.
class IMergeTreeDataPart : public std::enable_shared_from_this<IMergeTreeDataPart>, public DataPartStorageHolder
{
@ -446,6 +457,10 @@ public:
void removeDeleteOnDestroyMarker();
void removeVersionMetadata();
mutable std::atomic<DataPartRemovalState> removal_state = DataPartRemovalState::NOT_ATTEMPTED;
mutable std::atomic<time_t> last_removal_attemp_time = 0;
protected:
/// Total size of all columns, calculated once in calcuateColumnSizesOnDisk

View File

@ -88,6 +88,10 @@ MergeListElement::MergeListElement(
/// thread_group::memory_tracker, but MemoryTrackerThreadSwitcher will reset parent).
memory_tracker.setProfilerStep(settings.memory_profiler_step);
memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability);
/// Specify sample probability also for current thread to track more deallocations.
if (auto * thread_memory_tracker = DB::CurrentThread::getMemoryTracker())
thread_memory_tracker->setSampleProbability(settings.memory_profiler_sample_probability);
memory_tracker.setSoftLimit(settings.memory_overcommit_ratio_denominator);
if (settings.memory_tracker_fault_probability > 0.0)
memory_tracker.setFaultProbability(settings.memory_tracker_fault_probability);

View File

@ -84,6 +84,7 @@
#include <base/sort.h>
#include <algorithm>
#include <atomic>
#include <iomanip>
#include <optional>
#include <set>
@ -1762,9 +1763,12 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force)
{
const DataPartPtr & part = *it;
part->last_removal_attemp_time.store(time_now, std::memory_order_relaxed);
/// Do not remove outdated part if it may be visible for some transaction
if (!part->version.canBeRemoved())
{
part->removal_state.store(DataPartRemovalState::VISIBLE_TO_TRANSACTIONS, std::memory_order_relaxed);
skipped_parts.push_back(part->info);
continue;
}
@ -1772,20 +1776,27 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force)
/// Grab only parts that are not used by anyone (SELECTs for example).
if (!part.unique())
{
part->removal_state.store(DataPartRemovalState::NON_UNIQUE_OWNERSHIP, std::memory_order_relaxed);
skipped_parts.push_back(part->info);
continue;
}
auto part_remove_time = part->remove_time.load(std::memory_order_relaxed);
if ((part_remove_time < time_now && time_now - part_remove_time > getSettings()->old_parts_lifetime.totalSeconds() && !has_skipped_mutation_parent(part))
bool reached_removal_time = part_remove_time < time_now && time_now - part_remove_time > getSettings()->old_parts_lifetime.totalSeconds();
if ((reached_removal_time && !has_skipped_mutation_parent(part))
|| force
|| isInMemoryPart(part) /// Remove in-memory parts immediately to not store excessive data in RAM
|| (part->version.creation_csn == Tx::RolledBackCSN && getSettings()->remove_rolled_back_parts_immediately))
{
part->removal_state.store(DataPartRemovalState::REMOVED, std::memory_order_relaxed);
parts_to_delete.emplace_back(it);
}
else
{
if (!reached_removal_time)
part->removal_state.store(DataPartRemovalState::NOT_REACHED_REMOVAL_TIME, std::memory_order_relaxed);
else
part->removal_state.store(DataPartRemovalState::HAS_SKIPPED_MUTATION_PARENT, std::memory_order_relaxed);
skipped_parts.push_back(part->info);
continue;
}

View File

@ -172,7 +172,7 @@ ColumnWithTypeAndName RPNBuilderTreeNode::getConstantColumn() const
if (ast_node)
{
const auto * literal = assert_cast<const ASTLiteral *>(ast_node);
const auto * literal = typeid_cast<const ASTLiteral *>(ast_node);
if (literal)
{
result.type = applyVisitor(FieldToDataType(), literal->value);

Some files were not shown because too many files have changed in this diff Show More