Merge branch 'master' into 42648_Support_scalar_subqueries_cache

This commit is contained in:
Smita Kulkarni 2022-12-22 18:38:41 +01:00
commit e6ab6bda92
467 changed files with 12587 additions and 6045 deletions

View File

@ -7,6 +7,6 @@ assignees: ''
---
> Make sure to check documentation https://clickhouse.com/docs/en/ first. If the question is concise and probably has a short answer, asking it in Telegram chat https://telegram.me/clickhouse_en is probably the fastest way to find the answer. For more complicated questions, consider asking them on StackOverflow with "clickhouse" tag https://stackoverflow.com/questions/tagged/clickhouse
> Make sure to check documentation https://clickhouse.com/docs/en/ first. If the question is concise and probably has a short answer, asking it in [community Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-1gh9ds7f4-PgDhJAaF8ad5RbWBAAjzFg) is probably the fastest way to find the answer. For more complicated questions, consider asking them on StackOverflow with "clickhouse" tag https://stackoverflow.com/questions/tagged/clickhouse
> If you still prefer GitHub issues, remove all this text and ask your question here.

View File

@ -16,6 +16,7 @@ on: # yamllint disable-line rule:truthy
- 'docker/docs/**'
- 'docs/**'
- 'website/**'
- 'utils/check-style/aspell-ignore/**'
jobs:
CheckLabels:
runs-on: [self-hosted, style-checker]

View File

@ -17,6 +17,7 @@ concurrency:
- 'docs/**'
- 'utils/list-versions/version_date.tsv'
- 'website/**'
- 'utils/check-style/aspell-ignore/**'
workflow_dispatch:
jobs:
DockerHubPushAarch64:

View File

@ -842,7 +842,7 @@ jobs:
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinAmd64SSE2:
BuilderBinAmd64Compat:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
@ -853,7 +853,7 @@ jobs:
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_amd64sse2
BUILD_NAME=binary_amd64_compat
EOF
- name: Download changed images
uses: actions/download-artifact@v2
@ -1017,7 +1017,7 @@ jobs:
- BuilderBinFreeBSD
# - BuilderBinGCC
- BuilderBinPPC64
- BuilderBinAmd64SSE2
- BuilderBinAmd64Compat
- BuilderBinAarch64V80Compat
- BuilderBinClangTidy
- BuilderDebShared

View File

@ -16,6 +16,7 @@ on: # yamllint disable-line rule:truthy
- 'docker/docs/**'
- 'docs/**'
- 'website/**'
- 'utils/check-style/aspell-ignore/**'
##########################################################################################
##################################### SMALL CHECKS #######################################
##########################################################################################
@ -900,7 +901,7 @@ jobs:
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinAmd64SSE2:
BuilderBinAmd64Compat:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
@ -911,7 +912,7 @@ jobs:
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_amd64sse2
BUILD_NAME=binary_amd64_compat
EOF
- name: Download changed images
uses: actions/download-artifact@v2
@ -1070,7 +1071,7 @@ jobs:
- BuilderBinFreeBSD
# - BuilderBinGCC
- BuilderBinPPC64
- BuilderBinAmd64SSE2
- BuilderBinAmd64Compat
- BuilderBinAarch64V80Compat
- BuilderBinClangTidy
- BuilderDebShared

6
.gitmodules vendored
View File

@ -269,9 +269,6 @@
[submodule "contrib/vectorscan"]
path = contrib/vectorscan
url = https://github.com/VectorCamp/vectorscan.git
[submodule "contrib/liburing"]
path = contrib/liburing
url = https://github.com/axboe/liburing.git
[submodule "contrib/c-ares"]
path = contrib/c-ares
url = https://github.com/ClickHouse/c-ares
@ -294,3 +291,6 @@
[submodule "contrib/google-benchmark"]
path = contrib/google-benchmark
url = https://github.com/google/benchmark.git
[submodule "contrib/libdivide"]
path = contrib/libdivide
url = https://github.com/ridiculousfish/libdivide.git

View File

@ -17,14 +17,16 @@
### <a id="2212"></a> ClickHouse release 22.12, 2022-12-15
#### Backward Incompatible Change
* Add `GROUP BY ALL` syntax: [#37631](https://github.com/ClickHouse/ClickHouse/issues/37631). [#42265](https://github.com/ClickHouse/ClickHouse/pull/42265) ([刘陶峰](https://github.com/taofengliu)). If you have a column or an alias named `all` and doing `GROUP BY all` without the intention to group by all the columns, the query will have a different semantic. To keep the old semantic, put `all` into backticks or double quotes `"all"` to make it an identifier instead of a keyword.
#### Upgrade Notes
* Fixed backward incompatibility in (de)serialization of states of `min`, `max`, `any*`, `argMin`, `argMax` aggregate functions with `String` argument. The incompatibility affects 22.9, 22.10 and 22.11 branches (fixed since 22.9.6, 22.10.4 and 22.11.2 correspondingly). Some minor releases of 22.3, 22.7 and 22.8 branches are also affected: 22.3.13...22.3.14 (fixed since 22.3.15), 22.8.6...22.8.9 (fixed since 22.8.10), 22.7.6 and newer (will not be fixed in 22.7, we recommend to upgrade from 22.7.* to 22.8.10 or newer). This release note does not concern users that have never used affected versions. Incompatible versions append extra `'\0'` to strings when reading states of the aggregate functions mentioned above. For example, if an older version saved state of `anyState('foobar')` to `state_column` then incompatible version will print `'foobar\0'` on `anyMerge(state_column)`. Also incompatible versions write states of the aggregate functions without trailing `'\0'`. Newer versions (that have the fix) can correctly read data written by all versions including incompatible versions, except one corner case. If an incompatible version saved a state with a string that actually ends with null character, then newer version will trim trailing `'\0'` when reading state of affected aggregate function. For example, if an incompatible version saved state of `anyState('abrac\0dabra\0')` to `state_column` then newer versions will print `'abrac\0dabra'` on `anyMerge(state_column)`. The issue also affects distributed queries when an incompatible version works in a cluster together with older or newer versions. [#43038](https://github.com/ClickHouse/ClickHouse/pull/43038) ([Alexander Tokmakov](https://github.com/tavplubix), [Raúl Marín](https://github.com/Algunenano)). Note: all the official ClickHouse builds already include the patches. This is not necessarily true for unofficial third-party builds that should be avoided.
* Fixed backward incompatibility in (de)serialization of states of `min`, `max`, `any*`, `argMin`, `argMax` aggregate functions with `String` argument. The incompatibility affects 22.9, 22.10 and 22.11 branches (fixed since 22.9.6, 22.10.4 and 22.11.2 correspondingly). Some minor releases of 22.3, 22.7 and 22.8 branches are also affected: 22.3.13...22.3.14 (fixed since 22.3.15), 22.8.6...22.8.9 (fixed since 22.8.10), 22.7.6 and newer (will not be fixed in 22.7, we recommend upgrading from 22.7.* to 22.8.10 or newer). This release note does not concern users that have never used affected versions. Incompatible versions append an extra `'\0'` to strings when reading states of the aggregate functions mentioned above. For example, if an older version saved state of `anyState('foobar')` to `state_column` then the incompatible version will print `'foobar\0'` on `anyMerge(state_column)`. Also incompatible versions write states of the aggregate functions without trailing `'\0'`. Newer versions (that have the fix) can correctly read data written by all versions including incompatible versions, except one corner case. If an incompatible version saved a state with a string that actually ends with null character, then newer version will trim trailing `'\0'` when reading state of affected aggregate function. For example, if an incompatible version saved state of `anyState('abrac\0dabra\0')` to `state_column` then newer versions will print `'abrac\0dabra'` on `anyMerge(state_column)`. The issue also affects distributed queries when an incompatible version works in a cluster together with older or newer versions. [#43038](https://github.com/ClickHouse/ClickHouse/pull/43038) ([Alexander Tokmakov](https://github.com/tavplubix), [Raúl Marín](https://github.com/Algunenano)). Note: all the official ClickHouse builds already include the patches. This is not necessarily true for unofficial third-party builds that should be avoided.
#### New Feature
* Add `BSONEachRow` input/output format. In this format, ClickHouse formats/parses each row as a separate BSON document and each column is formatted/parsed as a single BSON field with column name as a key. [#42033](https://github.com/ClickHouse/ClickHouse/pull/42033) ([mark-polokhov](https://github.com/mark-polokhov)).
* Add `BSONEachRow` input/output format. In this format, ClickHouse formats/parses each row as a separate BSON document and each column is formatted/parsed as a single BSON field with the column name as the key. [#42033](https://github.com/ClickHouse/ClickHouse/pull/42033) ([mark-polokhov](https://github.com/mark-polokhov)).
* Add `grace_hash` JOIN algorithm, it can be enabled with `SET join_algorithm = 'grace_hash'`. [#38191](https://github.com/ClickHouse/ClickHouse/pull/38191) ([BigRedEye](https://github.com/BigRedEye), [Vladimir C](https://github.com/vdimir)).
* Allow configuring password complexity rules and checks for creating and changing users. [#43719](https://github.com/ClickHouse/ClickHouse/pull/43719) ([Nikolay Degterinsky](https://github.com/evillique)).
* Add `CREATE / ALTER / DROP NAMED COLLECTION` queries. [#43252](https://github.com/ClickHouse/ClickHouse/pull/43252) ([Kseniia Sumarokova](https://github.com/kssenii)). Restrict default access to named collections for user defined in config. It must have explicit `show_named_collections = 1` to be able to see them. [#43325](https://github.com/ClickHouse/ClickHouse/pull/43325) ([Kseniia Sumarokova](https://github.com/kssenii)). The `system.named_collections` table is introduced [#43147](https://github.com/ClickHouse/ClickHouse/pull/43147) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Mask sensitive information in logs; mask secret parts in the output of queries `SHOW CREATE TABLE` and `SELECT FROM system.tables`. Also resolves [#41418](https://github.com/ClickHouse/ClickHouse/issues/41418). [#43227](https://github.com/ClickHouse/ClickHouse/pull/43227) ([Vitaly Baranov](https://github.com/vitlibar)).
* Add `GROUP BY ALL` syntax: [#37631](https://github.com/ClickHouse/ClickHouse/issues/37631). [#42265](https://github.com/ClickHouse/ClickHouse/pull/42265) ([刘陶峰](https://github.com/taofengliu)).
* Add `FROM table SELECT column` syntax. [#41095](https://github.com/ClickHouse/ClickHouse/pull/41095) ([Nikolay Degterinsky](https://github.com/evillique)).
@ -32,23 +34,24 @@
* Added `multiplyDecimal` and `divideDecimal` functions for decimal operations with fixed precision. [#42438](https://github.com/ClickHouse/ClickHouse/pull/42438) ([Andrey Zvonov](https://github.com/zvonand)).
* Added `system.moves` table with list of currently moving parts. [#42660](https://github.com/ClickHouse/ClickHouse/pull/42660) ([Sergei Trifonov](https://github.com/serxa)).
* Add support for embedded Prometheus endpoint for ClickHouse Keeper. [#43087](https://github.com/ClickHouse/ClickHouse/pull/43087) ([Antonio Andelic](https://github.com/antonio2368)).
* Support numeric literals with `_` as separator as, for example, `1_000_000`. [#43925](https://github.com/ClickHouse/ClickHouse/pull/43925) ([jh0x](https://github.com/jh0x)).
* Added possibility to use array as a second parameter for `cutURLParameter` function. It will cut multiple parameters. Close [#6827](https://github.com/ClickHouse/ClickHouse/issues/6827). [#43788](https://github.com/ClickHouse/ClickHouse/pull/43788) ([Roman Vasin](https://github.com/rvasin)).
* Support numeric literals with `_` as the separator, for example, `1_000_000`. [#43925](https://github.com/ClickHouse/ClickHouse/pull/43925) ([jh0x](https://github.com/jh0x)).
* Added possibility to use an array as a second parameter for `cutURLParameter` function. It will cut multiple parameters. Close [#6827](https://github.com/ClickHouse/ClickHouse/issues/6827). [#43788](https://github.com/ClickHouse/ClickHouse/pull/43788) ([Roman Vasin](https://github.com/rvasin)).
* Add a column with the expression of the index in the `system.data_skipping_indices` table. [#43308](https://github.com/ClickHouse/ClickHouse/pull/43308) ([Guillaume Tassery](https://github.com/YiuRULE)).
* Add column `engine_full` to system table `databases` so that users can access whole engine definition of database via system tables. [#43468](https://github.com/ClickHouse/ClickHouse/pull/43468) ([凌涛](https://github.com/lingtaolf)).
* New hash function [xxh3](https://github.com/Cyan4973/xxHash) added. Also performance of `xxHash32` and `xxHash64` improved on arm thanks to library update. [#43411](https://github.com/ClickHouse/ClickHouse/pull/43411) ([Nikita Taranov](https://github.com/nickitat)).
* Add column `engine_full` to system table `databases` so that users can access the entire engine definition of a database via system tables. [#43468](https://github.com/ClickHouse/ClickHouse/pull/43468) ([凌涛](https://github.com/lingtaolf)).
* New hash function [xxh3](https://github.com/Cyan4973/xxHash) added. Also, the performance of `xxHash32` and `xxHash64` are improved on ARM thanks to a library update. [#43411](https://github.com/ClickHouse/ClickHouse/pull/43411) ([Nikita Taranov](https://github.com/nickitat)).
* Added support to define constraints for merge tree settings. For example you can forbid overriding the `storage_policy` by users. [#43903](https://github.com/ClickHouse/ClickHouse/pull/43903) ([Sergei Trifonov](https://github.com/serxa)).
* Add a new setting `input_format_json_read_objects_as_strings` that allows to parse nested JSON objects into Strings in all JSON input formats. This setting is disabled by default. [#44052](https://github.com/ClickHouse/ClickHouse/pull/44052) ([Kruglov Pavel](https://github.com/Avogar)).
* Add a new setting `input_format_json_read_objects_as_strings` that allows the parsing of nested JSON objects into Strings in all JSON input formats. This setting is disabled by default. [#44052](https://github.com/ClickHouse/ClickHouse/pull/44052) ([Kruglov Pavel](https://github.com/Avogar)).
#### Experimental Feature
* Support deduplication for asynchronous inserts. Before this change async inserts don't support deduplication, because multiple small inserts will coexist in one inserted batch. Closes [#38075](https://github.com/ClickHouse/ClickHouse/issues/38075). [#43304](https://github.com/ClickHouse/ClickHouse/pull/43304) ([Han Fei](https://github.com/hanfei1991)).
* Support deduplication for asynchronous inserts. Before this change, async inserts did not support deduplication, because multiple small inserts coexisted in one inserted batch. Closes [#38075](https://github.com/ClickHouse/ClickHouse/issues/38075). [#43304](https://github.com/ClickHouse/ClickHouse/pull/43304) ([Han Fei](https://github.com/hanfei1991)).
* Add support for cosine distance for the experimental Annoy (vector similarity search) index. [#42778](https://github.com/ClickHouse/ClickHouse/pull/42778) ([Filatenkov Artur](https://github.com/FArthur-cmd)).
* Add `CREATE / ALTER / DROP NAMED COLLECTION` queries. [#43252](https://github.com/ClickHouse/ClickHouse/pull/43252) ([Kseniia Sumarokova](https://github.com/kssenii)). This feature is under development and the queries are not effective as of version 22.12. This changelog entry is added only to avoid confusion. Restrict default access to named collections to the user defined in config. This requires that `show_named_collections = 1` is set to be able to see them. [#43325](https://github.com/ClickHouse/ClickHouse/pull/43325) ([Kseniia Sumarokova](https://github.com/kssenii)). The `system.named_collections` table is introduced [#43147](https://github.com/ClickHouse/ClickHouse/pull/43147) ([Kseniia Sumarokova](https://github.com/kssenii)).
#### Performance Improvement
* Add settings `max_streams_for_merge_tree_reading` and `allow_asynchronous_read_from_io_pool_for_merge_tree`. Setting `max_streams_for_merge_tree_reading` limits the number of reading streams for MergeTree tables. Setting `allow_asynchronous_read_from_io_pool_for_merge_tree` enables background I/O pool to read from `MergeTree` tables. This may increase performance for I/O bound queries if used together with `max_streams_to_max_threads_ratio` or `max_streams_for_merge_tree_reading`. [#43260](https://github.com/ClickHouse/ClickHouse/pull/43260) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). This improves performance up to 100 times in case of high latency storage, low number of CPU and high number of data parts.
* Settings `merge_tree_min_rows_for_concurrent_read_for_remote_filesystem/merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem` did not respect adaptive granularity. Fat rows did not decrease the number of read rows (as it is was done for `merge_tree_min_rows_for_concurrent_read/merge_tree_min_bytes_for_concurrent_read`, which could lead to high memory usage when using remote filesystems. [#43965](https://github.com/ClickHouse/ClickHouse/pull/43965) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Optimized number of list requests to ZooKeeper or Keeper when selecting a part to merge. Previously it could produce thousands of requests in some cases. Fixes [#43647](https://github.com/ClickHouse/ClickHouse/issues/43647). [#43675](https://github.com/ClickHouse/ClickHouse/pull/43675) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Optimisation is getting skipped now if `max_size_to_preallocate_for_aggregation` has too small value. Default value of this setting increased to `10^8`. [#43945](https://github.com/ClickHouse/ClickHouse/pull/43945) ([Nikita Taranov](https://github.com/nickitat)).
* Add settings `max_streams_for_merge_tree_reading` and `allow_asynchronous_read_from_io_pool_for_merge_tree`. Setting `max_streams_for_merge_tree_reading` limits the number of reading streams for MergeTree tables. Setting `allow_asynchronous_read_from_io_pool_for_merge_tree` enables a background I/O pool to read from `MergeTree` tables. This may increase performance for I/O bound queries if used together with `max_streams_to_max_threads_ratio` or `max_streams_for_merge_tree_reading`. [#43260](https://github.com/ClickHouse/ClickHouse/pull/43260) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). This improves performance up to 100 times in case of high latency storage, low number of CPU and high number of data parts.
* Settings `merge_tree_min_rows_for_concurrent_read_for_remote_filesystem/merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem` did not respect adaptive granularity. Fat rows did not decrease the number of read rows (as it was done for `merge_tree_min_rows_for_concurrent_read/merge_tree_min_bytes_for_concurrent_read`, which could lead to high memory usage when using remote filesystems. [#43965](https://github.com/ClickHouse/ClickHouse/pull/43965) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Optimized the number of list requests to ZooKeeper or ClickHouse Keeper when selecting a part to merge. Previously it could produce thousands of requests in some cases. Fixes [#43647](https://github.com/ClickHouse/ClickHouse/issues/43647). [#43675](https://github.com/ClickHouse/ClickHouse/pull/43675) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Optimization is getting skipped now if `max_size_to_preallocate_for_aggregation` has too small a value. The default value of this setting increased to `10^8`. [#43945](https://github.com/ClickHouse/ClickHouse/pull/43945) ([Nikita Taranov](https://github.com/nickitat)).
* Speed-up server shutdown by avoiding cleaning up of old data parts. Because it is unnecessary after https://github.com/ClickHouse/ClickHouse/pull/41145. [#43760](https://github.com/ClickHouse/ClickHouse/pull/43760) ([Sema Checherinda](https://github.com/CheSema)).
* Merging on initiator now uses the same memory bound approach as merging of local aggregation results if `enable_memory_bound_merging_of_aggregation_results` is set. [#40879](https://github.com/ClickHouse/ClickHouse/pull/40879) ([Nikita Taranov](https://github.com/nickitat)).
* Keeper improvement: try syncing logs to disk in parallel with replication. [#43450](https://github.com/ClickHouse/ClickHouse/pull/43450) ([Antonio Andelic](https://github.com/antonio2368)).
@ -56,25 +59,24 @@
#### Improvement
* Implement referential dependencies and use them to create tables in the correct order while restoring from a backup. [#43834](https://github.com/ClickHouse/ClickHouse/pull/43834) ([Vitaly Baranov](https://github.com/vitlibar)).
* Substitute UDFs in `CREATE` query to avoid failures during loading at the startup. Additionally, UDFs can now be used as `DEFAULT` expressions for columns. [#43539](https://github.com/ClickHouse/ClickHouse/pull/43539) ([Antonio Andelic](https://github.com/antonio2368)).
* Change how the followed queries delete parts: TRUNCATE TABLE, ALTER TABLE DROP PART, ALTER TABLE DROP PARTITION. Now these queries make empty parts which cover old parts. This makes TRUNCATE query works without exclusive lock which means concurrent reads aren't locked. Also achieved durability in all those queries. If request is succeeded then no resurrected pars appear later. Note that atomicity is achieved only with transaction scope. [#41145](https://github.com/ClickHouse/ClickHouse/pull/41145) ([Sema Checherinda](https://github.com/CheSema)).
* Substitute UDFs in `CREATE` query to avoid failures during loading at startup. Additionally, UDFs can now be used as `DEFAULT` expressions for columns. [#43539](https://github.com/ClickHouse/ClickHouse/pull/43539) ([Antonio Andelic](https://github.com/antonio2368)).
* Change how the following queries delete parts: TRUNCATE TABLE, ALTER TABLE DROP PART, ALTER TABLE DROP PARTITION. Now, these queries make empty parts which cover the old parts. This makes the TRUNCATE query work without a followedexclusive lock which means concurrent reads aren't locked. Also achieved durability in all those queries. If the request succeeds, then no resurrected parts appear later. Note that atomicity is achieved only with transaction scope. [#41145](https://github.com/ClickHouse/ClickHouse/pull/41145) ([Sema Checherinda](https://github.com/CheSema)).
* `SET param_x` query no longer requires manual string serialization for the value of the parameter. For example, query `SET param_a = '[\'a\', \'b\']'` can now be written like `SET param_a = ['a', 'b']`. [#41874](https://github.com/ClickHouse/ClickHouse/pull/41874) ([Nikolay Degterinsky](https://github.com/evillique)).
* Show read rows in the progress indication while reading from stdin from client. Closes [#43423](https://github.com/ClickHouse/ClickHouse/issues/43423). [#43442](https://github.com/ClickHouse/ClickHouse/pull/43442) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Show read rows in the progress indication while reading from STDIN from client. Closes [#43423](https://github.com/ClickHouse/ClickHouse/issues/43423). [#43442](https://github.com/ClickHouse/ClickHouse/pull/43442) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Show progress bar while reading from s3 table function / engine. [#43454](https://github.com/ClickHouse/ClickHouse/pull/43454) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Progress bar will show both read and written rows. [#43496](https://github.com/ClickHouse/ClickHouse/pull/43496) ([Ilya Yatsishin](https://github.com/qoega)).
* `filesystemAvailable` and related functions support one optional argument with disk name, and change `filesystemFree` to `filesystemUnreserved`. Closes [#35076](https://github.com/ClickHouse/ClickHouse/issues/35076). [#42064](https://github.com/ClickHouse/ClickHouse/pull/42064) ([flynn](https://github.com/ucasfl)).
* Integration with LDAP: increased the default value of search_limit to 256, and added LDAP server config option to change that to an arbitrary value. Closes: [#42276](https://github.com/ClickHouse/ClickHouse/issues/42276). [#42461](https://github.com/ClickHouse/ClickHouse/pull/42461) ([Vasily Nemkov](https://github.com/Enmk)).
* Allow to remove sensitive information (see the `query_masking_rules` in the configuration file) from the exception messages as well. Resolves [#41418](https://github.com/ClickHouse/ClickHouse/issues/41418). [#42940](https://github.com/ClickHouse/ClickHouse/pull/42940) ([filimonov](https://github.com/filimonov)).
* Support query like `SHOW FULL TABLES ...` for MySQL compatibility. [#43910](https://github.com/ClickHouse/ClickHouse/pull/43910) ([Filatenkov Artur](https://github.com/FArthur-cmd)).
* Allow the removal of sensitive information (see the `query_masking_rules` in the configuration file) from the exception messages as well. Resolves [#41418](https://github.com/ClickHouse/ClickHouse/issues/41418). [#42940](https://github.com/ClickHouse/ClickHouse/pull/42940) ([filimonov](https://github.com/filimonov)).
* Support queries like `SHOW FULL TABLES ...` for MySQL compatibility. [#43910](https://github.com/ClickHouse/ClickHouse/pull/43910) ([Filatenkov Artur](https://github.com/FArthur-cmd)).
* Keeper improvement: Add 4lw command `rqld` which can manually assign a node as leader. [#43026](https://github.com/ClickHouse/ClickHouse/pull/43026) ([JackyWoo](https://github.com/JackyWoo)).
* Apply connection timeouts settings for Distributed async INSERT from the query. [#43156](https://github.com/ClickHouse/ClickHouse/pull/43156) ([Azat Khuzhin](https://github.com/azat)).
* Apply connection timeout settings for Distributed async INSERT from the query. [#43156](https://github.com/ClickHouse/ClickHouse/pull/43156) ([Azat Khuzhin](https://github.com/azat)).
* The `unhex` function now supports `FixedString` arguments. [issue42369](https://github.com/ClickHouse/ClickHouse/issues/42369). [#43207](https://github.com/ClickHouse/ClickHouse/pull/43207) ([DR](https://github.com/freedomDR)).
* Priority is given to deleting completely expired parts according to the TTL rules, see [#42869](https://github.com/ClickHouse/ClickHouse/issues/42869). [#43222](https://github.com/ClickHouse/ClickHouse/pull/43222) ([zhongyuankai](https://github.com/zhongyuankai)).
* More precise and reactive CPU load indication in clickhouse-client. [#43307](https://github.com/ClickHouse/ClickHouse/pull/43307) ([Sergei Trifonov](https://github.com/serxa)).
* Support reading of subcolumns of nested types from storage `S3` and table function `s3` with formats `Parquet`, `Arrow` and `ORC`. [#43329](https://github.com/ClickHouse/ClickHouse/pull/43329) ([chen](https://github.com/xiedeyantu)).
* Add `table_uuid` column to the `system.parts` table. [#43404](https://github.com/ClickHouse/ClickHouse/pull/43404) ([Azat Khuzhin](https://github.com/azat)).
* Added client option to display the number of locally processed rows in non-interactive mode (`--print-num-processed-rows`). [#43407](https://github.com/ClickHouse/ClickHouse/pull/43407) ([jh0x](https://github.com/jh0x)).
* Implement `aggregation-in-order` optimization on top of query plan. It is enabled by default (but works only together with `optimize_aggregation_in_order`, which is disabled by default). Set `query_plan_aggregation_in_order = 0` to use previous AST-based version. [#43592](https://github.com/ClickHouse/ClickHouse/pull/43592) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Implement `aggregation-in-order` optimization on top of a query plan. It is enabled by default (but works only together with `optimize_aggregation_in_order`, which is disabled by default). Set `query_plan_aggregation_in_order = 0` to use the previous AST-based version. [#43592](https://github.com/ClickHouse/ClickHouse/pull/43592) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Allow to collect profile events with `trace_type = 'ProfileEvent'` to `system.trace_log` on each increment with current stack, profile event name and value of the increment. It can be enabled by the setting `trace_profile_events` and used to investigate performance of queries. [#43639](https://github.com/ClickHouse/ClickHouse/pull/43639) ([Anton Popov](https://github.com/CurtizJ)).
* Add a new setting `input_format_max_binary_string_size` to limit string size in RowBinary format. [#43842](https://github.com/ClickHouse/ClickHouse/pull/43842) ([Kruglov Pavel](https://github.com/Avogar)).
* When ClickHouse requests a remote HTTP server, and it returns an error, the numeric HTTP code was not displayed correctly in the exception message. Closes [#43919](https://github.com/ClickHouse/ClickHouse/issues/43919). [#43920](https://github.com/ClickHouse/ClickHouse/pull/43920) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
@ -82,50 +84,50 @@
#### Build/Testing/Packaging Improvement
* Systemd integration now correctly notifies systemd that service is really started and is ready to server requests. [#43400](https://github.com/ClickHouse/ClickHouse/pull/43400) ([Коренберг Марк](https://github.com/socketpair)).
* If someone wants, they can build ClickHouse with OpenSSL instead of BoringSSL, and even use dynamic library. This type of build is unsupported and not recommended anyhow. It is not tested and therefore not secure. The use-case is to supply the FIPS 140-2 certified build of OpenSSL. [#43991](https://github.com/ClickHouse/ClickHouse/pull/43991) ([Boris Kuschel](https://github.com/bkuschel)).
* This is to upgrade the new `DeflateQpl` compression codec which has been implemented on previous PR (details: https://github.com/ClickHouse/ClickHouse/pull/39494). This patch improves codec on below aspects: 1. QPL v0.2.0 to QPL v0.3.0 [Intel® Query Processing Library (QPL)](https://github.com/intel/qpl) 2. Improve CMake file for fixing QPL build issues for QPL v0.3.0. 3. Link the QPL library with libaccel-config at build time instead of runtime loading on QPL v0.2.0 (dlopen) 4. Fixed log print issue in CompressionCodecDeflateQpl.cpp. [#44024](https://github.com/ClickHouse/ClickHouse/pull/44024) ([jasperzhu](https://github.com/jinjunzh)).
* Systemd integration now correctly notifies systemd that the service is really started and is ready to serve requests. [#43400](https://github.com/ClickHouse/ClickHouse/pull/43400) ([Коренберг Марк](https://github.com/socketpair)).
* Added the option to build ClickHouse with OpenSSL using the [OpenSSL FIPS Module](https://www.openssl.org/docs/man3.0/man7/fips_module.html). This build type has not been tested to validate security and is not supported. [#43991](https://github.com/ClickHouse/ClickHouse/pull/43991) ([Boris Kuschel](https://github.com/bkuschel)).
* Upgrade to the new `DeflateQpl` compression codec which has been implemented in a previous PR (details: https://github.com/ClickHouse/ClickHouse/pull/39494). This patch improves codec on below aspects: 1. QPL v0.2.0 to QPL v0.3.0 [Intel® Query Processing Library (QPL)](https://github.com/intel/qpl) 2. Improve CMake file for fixing QPL build issues for QPL v0.3.0. 3. Link the QPL library with libaccel-config at build time instead of runtime loading on QPL v0.2.0 (dlopen) 4. Fixed log print issue in CompressionCodecDeflateQpl.cpp. [#44024](https://github.com/ClickHouse/ClickHouse/pull/44024) ([jasperzhu](https://github.com/jinjunzh)).
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Fixed bug which could lead to deadlock while using asynchronous inserts. [#43233](https://github.com/ClickHouse/ClickHouse/pull/43233) ([Anton Popov](https://github.com/CurtizJ)).
* Fix some incorrect logic in AST level optimization `optimize_normalize_count_variants`. [#43873](https://github.com/ClickHouse/ClickHouse/pull/43873) ([Duc Canh Le](https://github.com/canhld94)).
* Fix a case when mutations not making progress when checksums do not match between replicas (e.g. caused by a change in data format on an upgrade). [#36877](https://github.com/ClickHouse/ClickHouse/pull/36877) ([nvartolomei](https://github.com/nvartolomei)).
* Fix a case when mutations are not making progress when checksums do not match between replicas (e.g. caused by a change in data format on an upgrade). [#36877](https://github.com/ClickHouse/ClickHouse/pull/36877) ([nvartolomei](https://github.com/nvartolomei)).
* Fix the `skip_unavailable_shards` optimization which did not work with the `hdfsCluster` table function. [#43236](https://github.com/ClickHouse/ClickHouse/pull/43236) ([chen](https://github.com/xiedeyantu)).
* Fix `s3` support for the `?` wildcard. Closes [#42731](https://github.com/ClickHouse/ClickHouse/issues/42731). [#43253](https://github.com/ClickHouse/ClickHouse/pull/43253) ([chen](https://github.com/xiedeyantu)).
* Fix functions `arrayFirstOrNull` and `arrayLastOrNull` or null when array contains `Nullable` elements. [#43274](https://github.com/ClickHouse/ClickHouse/pull/43274) ([Duc Canh Le](https://github.com/canhld94)).
* Fix functions `arrayFirstOrNull` and `arrayLastOrNull` or null when the array contains `Nullable` elements. [#43274](https://github.com/ClickHouse/ClickHouse/pull/43274) ([Duc Canh Le](https://github.com/canhld94)).
* Fix incorrect `UserTimeMicroseconds`/`SystemTimeMicroseconds` accounting related to Kafka tables. [#42791](https://github.com/ClickHouse/ClickHouse/pull/42791) ([Azat Khuzhin](https://github.com/azat)).
* Do not suppress exceptions in `web` disks. Fix retries for the `web` disk. [#42800](https://github.com/ClickHouse/ClickHouse/pull/42800) ([Azat Khuzhin](https://github.com/azat)).
* Fixed (logical) race condition between inserts and dropping materialized views. A race condition happened when a Materialized View was dropped at the same time as an INSERT, where the MVs was present as a dependency of the insert at the beggining of the execution, but the table has been dropped by the time the insert chain tries to access to it, producing either an `UNKNOWN_TABLE` or `TABLE_IS_DROPPED` exception, and stopping the insertion. After this change we avoid these exceptions and just continue with the insert if the dependency is gone. [#43161](https://github.com/ClickHouse/ClickHouse/pull/43161) ([AlfVII](https://github.com/AlfVII)).
* Fixed (logical) race condition between inserts and dropping materialized views. A race condition happened when a Materialized View was dropped at the same time as an INSERT, where the MVs were present as a dependency of the insert at the begining of the execution, but the table has been dropped by the time the insert chain tries to access it, producing either an `UNKNOWN_TABLE` or `TABLE_IS_DROPPED` exception, and stopping the insertion. After this change, we avoid these exceptions and just continue with the insert if the dependency is gone. [#43161](https://github.com/ClickHouse/ClickHouse/pull/43161) ([AlfVII](https://github.com/AlfVII)).
* Fix undefined behavior in the `quantiles` function, which might lead to uninitialized memory. Found by fuzzer. This closes [#44066](https://github.com/ClickHouse/ClickHouse/issues/44066). [#44067](https://github.com/ClickHouse/ClickHouse/pull/44067) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Additional check on zero uncompressed size is added to `CompressionCodecDelta`. [#43255](https://github.com/ClickHouse/ClickHouse/pull/43255) ([Nikita Taranov](https://github.com/nickitat)).
* Flatten arrays from Parquet to avoid an issue with inconsistent data in arrays. These incorrect files can be generated by Apache Iceberg. [#43297](https://github.com/ClickHouse/ClickHouse/pull/43297) ([Arthur Passos](https://github.com/arthurpassos)).
* Fix bad cast from `LowCardinality` column when using short circuit function execution. [#43311](https://github.com/ClickHouse/ClickHouse/pull/43311) ([Kruglov Pavel](https://github.com/Avogar)).
* Fixed queries with `SAMPLE BY` with prewhere optimization on tables using `Merge` engine. [#43315](https://github.com/ClickHouse/ClickHouse/pull/43315) ([Antonio Andelic](https://github.com/antonio2368)).
* Check and compare the content of the `format_version` file in `MergeTreeData` so tables can be loaded even if the storage policy was changed. [#43328](https://github.com/ClickHouse/ClickHouse/pull/43328) ([Antonio Andelic](https://github.com/antonio2368)).
* Check and compare the content of the `format_version` file in `MergeTreeData` so that tables can be loaded even if the storage policy was changed. [#43328](https://github.com/ClickHouse/ClickHouse/pull/43328) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix possible (very unlikely) "No column to rollback" logical error during INSERT into `Buffer` tables. [#43336](https://github.com/ClickHouse/ClickHouse/pull/43336) ([Azat Khuzhin](https://github.com/azat)).
* Fix a bug that allowed the parser to parse an unlimited amount of round brackets into one function if `allow_function_parameters` is set. [#43350](https://github.com/ClickHouse/ClickHouse/pull/43350) ([Nikolay Degterinsky](https://github.com/evillique)).
* `MaterializeMySQL` (experimental feature) support DDL `drop table t1, t2` and compatible with most of MySQL DROP DDL. [#43366](https://github.com/ClickHouse/ClickHouse/pull/43366) ([zzsmdfj](https://github.com/zzsmdfj)).
* `session_log` (experimental feature): Fixed the unability to log in (because of failure to create the session_log entry) in a very rare case of messed up setting profiles. [#42641](https://github.com/ClickHouse/ClickHouse/pull/42641) ([Vasily Nemkov](https://github.com/Enmk)).
* `session_log` (experimental feature): Fixed the inability to log in (because of failure to create the session_log entry) in a very rare case of messed up setting profiles. [#42641](https://github.com/ClickHouse/ClickHouse/pull/42641) ([Vasily Nemkov](https://github.com/Enmk)).
* Fix possible `Cannot create non-empty column with type Nothing` in functions `if`/`multiIf`. Closes [#43356](https://github.com/ClickHouse/ClickHouse/issues/43356). [#43368](https://github.com/ClickHouse/ClickHouse/pull/43368) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix a bug when a row level filter uses default value of column. [#43387](https://github.com/ClickHouse/ClickHouse/pull/43387) ([Alexander Gololobov](https://github.com/davenger)).
* Fix a bug when a row level filter uses the default value of a column. [#43387](https://github.com/ClickHouse/ClickHouse/pull/43387) ([Alexander Gololobov](https://github.com/davenger)).
* Query with `DISTINCT` + `LIMIT BY` + `LIMIT` can return fewer rows than expected. Fixes [#43377](https://github.com/ClickHouse/ClickHouse/issues/43377). [#43410](https://github.com/ClickHouse/ClickHouse/pull/43410) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix `sumMap` for `Nullable(Decimal(...))`. [#43414](https://github.com/ClickHouse/ClickHouse/pull/43414) ([Azat Khuzhin](https://github.com/azat)).
* Fix `date_diff` for hour/minute on macOS. Close [#42742](https://github.com/ClickHouse/ClickHouse/issues/42742). [#43466](https://github.com/ClickHouse/ClickHouse/pull/43466) ([zzsmdfj](https://github.com/zzsmdfj)).
* Fix incorrect memory accounting because of merges/mutations. [#43516](https://github.com/ClickHouse/ClickHouse/pull/43516) ([Azat Khuzhin](https://github.com/azat)).
* Fixed primary key analysis with conditions involving `toString(enum)`. [#43596](https://github.com/ClickHouse/ClickHouse/pull/43596) ([Nikita Taranov](https://github.com/nickitat)). This error has been found by @tisonkun.
* Ensure consistency when `clickhouse-copier` update status and `attach_is_done` in keeper after partition attach is done. [#43602](https://github.com/ClickHouse/ClickHouse/pull/43602) ([lzydmxy](https://github.com/lzydmxy)).
* During recovering of the lost replica of a `Replicated` database (experimental feature) there could a situation where we need to atomically swap two table names (use EXCHANGE), but instead previously we tried to use two RENAME queries. Which was obviously failed and moreover failed the whole recovery process of the database replica. [#43628](https://github.com/ClickHouse/ClickHouse/pull/43628) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Fix the case when `s3Cluster` function throws `NOT_FOUND_COLUMN_IN_BLOCK` error. Closes [#43534](https://github.com/ClickHouse/ClickHouse/issues/43534). [#43629](https://github.com/ClickHouse/ClickHouse/pull/43629) ([chen](https://github.com/xiedeyantu)).
* Fix posssible logical error `Array sizes mismatched` while parsing JSON object with arrays with same key names but with different nesting level. Closes [#43569](https://github.com/ClickHouse/ClickHouse/issues/43569). [#43693](https://github.com/ClickHouse/ClickHouse/pull/43693) ([Kruglov Pavel](https://github.com/Avogar)).
* Fixed possible exception in case of distributed `GROUP BY` with an `ALIAS` column among aggregation keys. [#43709](https://github.com/ClickHouse/ClickHouse/pull/43709) ([Nikita Taranov](https://github.com/nickitat)).
* Ensure consistency when `clickhouse-copier` updates status and `attach_is_done` in Keeper after partition attach is done. [#43602](https://github.com/ClickHouse/ClickHouse/pull/43602) ([lzydmxy](https://github.com/lzydmxy)).
* During the recovery of a lost replica of a `Replicated` database (experimental feature), there could a situation where we need to atomically swap two table names (use EXCHANGE). Previously we tried to use two RENAME queries, which was obviously failing and moreover, failed the whole recovery process of the database replica. [#43628](https://github.com/ClickHouse/ClickHouse/pull/43628) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Fix the case when the `s3Cluster` function throws `NOT_FOUND_COLUMN_IN_BLOCK` error. Closes [#43534](https://github.com/ClickHouse/ClickHouse/issues/43534). [#43629](https://github.com/ClickHouse/ClickHouse/pull/43629) ([chen](https://github.com/xiedeyantu)).
* Fix possible logical error `Array sizes mismatched` while parsing JSON object with arrays with same key names but with different nesting level. Closes [#43569](https://github.com/ClickHouse/ClickHouse/issues/43569). [#43693](https://github.com/ClickHouse/ClickHouse/pull/43693) ([Kruglov Pavel](https://github.com/Avogar)).
* Fixed possible exception in the case of distributed `GROUP BY` with an `ALIAS` column among aggregation keys. [#43709](https://github.com/ClickHouse/ClickHouse/pull/43709) ([Nikita Taranov](https://github.com/nickitat)).
* Fix bug which can lead to broken projections if zero-copy replication (experimental feature) is enabled and used. [#43764](https://github.com/ClickHouse/ClickHouse/pull/43764) ([alesapin](https://github.com/alesapin)).
* Fix using multipart upload for very large S3 objects in AWS S3. [#43824](https://github.com/ClickHouse/ClickHouse/pull/43824) ([ianton-ru](https://github.com/ianton-ru)).
* Fixed `ALTER ... RESET SETTING` with `ON CLUSTER`. It could be applied to one replica only. Fixes [#43843](https://github.com/ClickHouse/ClickHouse/issues/43843). [#43848](https://github.com/ClickHouse/ClickHouse/pull/43848) ([Elena Torró](https://github.com/elenatorro)).
* Fixed `ALTER ... RESET SETTING` with `ON CLUSTER`. It could have been applied to one replica only. Fixes [#43843](https://github.com/ClickHouse/ClickHouse/issues/43843). [#43848](https://github.com/ClickHouse/ClickHouse/pull/43848) ([Elena Torró](https://github.com/elenatorro)).
* Fix a logical error in JOIN with `Join` table engine at right hand side, if `USING` is being used. [#43963](https://github.com/ClickHouse/ClickHouse/pull/43963) ([Vladimir C](https://github.com/vdimir)). Fix a bug with wrong order of keys in `Join` table engine. [#44012](https://github.com/ClickHouse/ClickHouse/pull/44012) ([Vladimir C](https://github.com/vdimir)).
* Keeper fix: throw if interserver port for Raft is already in use. [#43984](https://github.com/ClickHouse/ClickHouse/pull/43984) ([Antonio Andelic](https://github.com/antonio2368)).
* Keeper fix: throw if the interserver port for Raft is already in use. [#43984](https://github.com/ClickHouse/ClickHouse/pull/43984) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix ORDER BY positional argument (example: `ORDER BY 1, 2`) in case of unneeded columns pruning from subqueries. Closes [#43964](https://github.com/ClickHouse/ClickHouse/issues/43964). [#43987](https://github.com/ClickHouse/ClickHouse/pull/43987) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fixed exception when a subquery contains HAVING but doesn't contain actual aggregation. [#44051](https://github.com/ClickHouse/ClickHouse/pull/44051) ([Nikita Taranov](https://github.com/nickitat)).
* Fixed exception when a subquery contains HAVING but doesn't contain an actual aggregation. [#44051](https://github.com/ClickHouse/ClickHouse/pull/44051) ([Nikita Taranov](https://github.com/nickitat)).
* Fix race in s3 multipart upload. This race could cause the error `Part number must be an integer between 1 and 10000, inclusive. (S3_ERROR)` while restoring from a backup. [#44065](https://github.com/ClickHouse/ClickHouse/pull/44065) ([Vitaly Baranov](https://github.com/vitlibar)).
@ -651,30 +653,30 @@
* Add counters (ProfileEvents) for cases when query complexity limitation has been set and has reached (a separate counter for `overflow_mode` = `break` and `throw`). For example, if you have set up `max_rows_to_read` with `read_overflow_mode = 'break'`, looking at the value of `OverflowBreak` counter will allow distinguishing incomplete results. [#40205](https://github.com/ClickHouse/ClickHouse/pull/40205) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix memory accounting in case of "Memory limit exceeded" errors (previously [peak] memory usage was takes failed allocations into account). [#40249](https://github.com/ClickHouse/ClickHouse/pull/40249) ([Azat Khuzhin](https://github.com/azat)).
* Add metrics for filesystem cache: `FilesystemCacheSize` and `FilesystemCacheElements`. [#40260](https://github.com/ClickHouse/ClickHouse/pull/40260) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Support hadoop secure RPC transfer (hadoop.rpc.protection=privacy and hadoop.rpc.protection=integrity). [#39411](https://github.com/ClickHouse/ClickHouse/pull/39411) ([michael1589](https://github.com/michael1589)).
* Support Hadoop secure RPC transfer (hadoop.rpc.protection=privacy and hadoop.rpc.protection=integrity). [#39411](https://github.com/ClickHouse/ClickHouse/pull/39411) ([michael1589](https://github.com/michael1589)).
* Avoid continuously growing memory consumption of pattern cache when using functions multi(Fuzzy)Match(Any|AllIndices|AnyIndex)(). [#40264](https://github.com/ClickHouse/ClickHouse/pull/40264) ([Robert Schulze](https://github.com/rschu1ze)).
* Add cache for schema inference for file/s3/hdfs/url table functions. Now, schema inference will be performed only on the first query to the file, all subsequent queries to the same file will use the schema from cache if data wasn't changed. Add system table system.schema_inference_cache with all current schemas in cache and system queries SYSTEM DROP SCHEMA CACHE [FOR FILE/S3/HDFS/URL] to drop schemas from cache. [#38286](https://github.com/ClickHouse/ClickHouse/pull/38286) ([Kruglov Pavel](https://github.com/Avogar)).
* Add cache for schema inference for file/s3/hdfs/url table functions. Now, schema inference will be performed only on the first query to the file, all subsequent queries to the same file will use the schema from the cache if data has not changed. Add system table system.schema_inference_cache with all current schemas in cache and system queries SYSTEM DROP SCHEMA CACHE [FOR FILE/S3/HDFS/URL] to drop schemas from cache. [#38286](https://github.com/ClickHouse/ClickHouse/pull/38286) ([Kruglov Pavel](https://github.com/Avogar)).
* Add support for LARGE_BINARY/LARGE_STRING with Arrow (Closes [#32401](https://github.com/ClickHouse/ClickHouse/issues/32401)). [#40293](https://github.com/ClickHouse/ClickHouse/pull/40293) ([Josh Taylor](https://github.com/joshuataylor)).
#### Build/Testing/Packaging Improvement
* [ClickFiddle](https://fiddle.clickhouse.com/): A new tool for testing ClickHouse versions in read/write mode (**Igor Baliuk**).
* ClickHouse binary is made self-extracting [#35775](https://github.com/ClickHouse/ClickHouse/pull/35775) ([Yakov Olkhovskiy, Arthur Filatenkov](https://github.com/yakov-olkhovskiy)).
* Update tzdata to 2022b to support the new timezone changes. See https://github.com/google/cctz/pull/226. Chile's 2022 DST start is delayed from September 4 to September 11. Iran plans to stop observing DST permanently, after it falls back on 2022-09-21. There are corrections of the historical time zone of Asia/Tehran in the year 1977: Iran adopted standard time in 1935, not 1946. In 1977 it observed DST from 03-21 23:00 to 10-20 24:00; its 1978 transitions were on 03-24 and 08-05, not 03-20 and 10-20; and its spring 1979 transition was on 05-27, not 03-21 (https://data.iana.org/time-zones/tzdb/NEWS). ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Former packages used to install systemd.service file to `/etc`. The files there are marked as `conf` and are not cleaned out, and not updated automatically. This PR cleans them out. [#39323](https://github.com/ClickHouse/ClickHouse/pull/39323) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Update `tzdata` to 2022b to support the new timezone changes. See https://github.com/google/cctz/pull/226. Chile's 2022 DST start is delayed from September 4 to September 11. Iran plans to stop observing DST permanently after it falls back on 2022-09-21. There are corrections to the historical time zone of Asia/Tehran in the year 1977: Iran adopted standard time in 1935, not 1946. In 1977 it observed DST from 03-21 23:00 to 10-20 24:00; its 1978 transitions were on 03-24 and 08-05, not 03-20 and 10-20; and its spring 1979 transition was on 05-27, not 03-21 (https://data.iana.org/time-zones/tzdb/NEWS). ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Former packages used to install systemd.service file to `/etc`. The files there are marked as `conf` and are not cleaned out, and are not updated automatically. This PR cleans them out. [#39323](https://github.com/ClickHouse/ClickHouse/pull/39323) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Ensure LSan is effective. [#39430](https://github.com/ClickHouse/ClickHouse/pull/39430) ([Azat Khuzhin](https://github.com/azat)).
* TSAN has issues with clang-14 (https://github.com/google/sanitizers/issues/1552, https://github.com/google/sanitizers/issues/1540), so here we build the TSAN binaries with clang-15. [#39450](https://github.com/ClickHouse/ClickHouse/pull/39450) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Remove the option to build ClickHouse tools as separate executable programs. This fixes [#37847](https://github.com/ClickHouse/ClickHouse/issues/37847). [#39520](https://github.com/ClickHouse/ClickHouse/pull/39520) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Small preparations for build on s390x (which is big-endian). [#39627](https://github.com/ClickHouse/ClickHouse/pull/39627) ([Harry Lee](https://github.com/HarryLeeIBM)). [#39656](https://github.com/ClickHouse/ClickHouse/pull/39656) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed Endian issue in BitHelpers for s390x. [#39656](https://github.com/ClickHouse/ClickHouse/pull/39656) ([Harry Lee](https://github.com/HarryLeeIBM)). Implement a piece of code related to SipHash for s390x architecture (which is not supported by ClickHouse). [#39732](https://github.com/ClickHouse/ClickHouse/pull/39732) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed an Endian issue in Coordination snapshot code for s390x architecture (which is not supported by ClickHouse). [#39931](https://github.com/ClickHouse/ClickHouse/pull/39931) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed Endian issues in Codec code for s390x architecture (which is not supported by ClickHouse). [#40008](https://github.com/ClickHouse/ClickHouse/pull/40008) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed Endian issues in reading/writing BigEndian binary data in ReadHelpers and WriteHelpers code for s390x architecture (which is not supported by ClickHouse). [#40179](https://github.com/ClickHouse/ClickHouse/pull/40179) ([Harry Lee](https://github.com/HarryLeeIBM)).
* Small preparations for build on s390x (which is big-endian). [#39627](https://github.com/ClickHouse/ClickHouse/pull/39627) ([Harry Lee](https://github.com/HarryLeeIBM)). [#39656](https://github.com/ClickHouse/ClickHouse/pull/39656) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed Endian issue in BitHelpers for s390x. [#39656](https://github.com/ClickHouse/ClickHouse/pull/39656) ([Harry Lee](https://github.com/HarryLeeIBM)). Implement a piece of code related to SipHash for s390x architecture (which is not supported by ClickHouse). [#39732](https://github.com/ClickHouse/ClickHouse/pull/39732) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed an Endian issue in the Coordination snapshot code for s390x architecture (which is not supported by ClickHouse). [#39931](https://github.com/ClickHouse/ClickHouse/pull/39931) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed Endian issues in Codec code for s390x architecture (which is not supported by ClickHouse). [#40008](https://github.com/ClickHouse/ClickHouse/pull/40008) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed Endian issues in reading/writing BigEndian binary data in ReadHelpers and WriteHelpers code for s390x architecture (which is not supported by ClickHouse). [#40179](https://github.com/ClickHouse/ClickHouse/pull/40179) ([Harry Lee](https://github.com/HarryLeeIBM)).
* Support build with `clang-16` (trunk). This closes [#39949](https://github.com/ClickHouse/ClickHouse/issues/39949). [#40181](https://github.com/ClickHouse/ClickHouse/pull/40181) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Prepare RISC-V 64 build to run in CI. This is for [#40141](https://github.com/ClickHouse/ClickHouse/issues/40141). [#40197](https://github.com/ClickHouse/ClickHouse/pull/40197) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Simplified function registration macro interface (`FUNCTION_REGISTER*`) to eliminate the step to add and call an extern function in the registerFunctions.cpp, it also makes incremental builds of a new function faster. [#38615](https://github.com/ClickHouse/ClickHouse/pull/38615) ([Li Yin](https://github.com/liyinsg)).
* Docker: Now entrypoint.sh in docker image creates and executes chown for all folders it found in config for multidisk setup [#17717](https://github.com/ClickHouse/ClickHouse/issues/17717). [#39121](https://github.com/ClickHouse/ClickHouse/pull/39121) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Docker: Now entrypoint.sh in docker image creates and executes chown for all folders it finds in the config for multidisk setup [#17717](https://github.com/ClickHouse/ClickHouse/issues/17717). [#39121](https://github.com/ClickHouse/ClickHouse/pull/39121) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
#### Bug Fix
* Fix possible segfault in `CapnProto` input format. This bug was found and send through ClickHouse bug-bounty [program](https://github.com/ClickHouse/ClickHouse/issues/38986) by *kiojj*. [#40241](https://github.com/ClickHouse/ClickHouse/pull/40241) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix a very rare case of incorrect behavior of array subscript operator. This closes [#28720](https://github.com/ClickHouse/ClickHouse/issues/28720). [#40185](https://github.com/ClickHouse/ClickHouse/pull/40185) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix possible segfault in `CapnProto` input format. This bug was found and sent in through the ClickHouse bug-bounty [program](https://github.com/ClickHouse/ClickHouse/issues/38986) by *kiojj*. [#40241](https://github.com/ClickHouse/ClickHouse/pull/40241) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix a very rare case of incorrect behavior of the array subscript operator. This closes [#28720](https://github.com/ClickHouse/ClickHouse/issues/28720). [#40185](https://github.com/ClickHouse/ClickHouse/pull/40185) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix insufficient argument check for encryption functions (found by query fuzzer). This closes [#39987](https://github.com/ClickHouse/ClickHouse/issues/39987). [#40194](https://github.com/ClickHouse/ClickHouse/pull/40194) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix the case when the order of columns can be incorrect if the `IN` operator is used with a table with `ENGINE = Set` containing multiple columns. This fixes [#13014](https://github.com/ClickHouse/ClickHouse/issues/13014). [#40225](https://github.com/ClickHouse/ClickHouse/pull/40225) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix seeking while reading from encrypted disk. This PR fixes [#38381](https://github.com/ClickHouse/ClickHouse/issues/38381). [#39687](https://github.com/ClickHouse/ClickHouse/pull/39687) ([Vitaly Baranov](https://github.com/vitlibar)).

View File

@ -16,6 +16,6 @@ ClickHouse® is an open-source column-oriented database management system that a
* [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any.
## Upcoming events
* [**v22.12 Release Webinar**](https://clickhouse.com/company/events/v22-12-release-webinar) 22.12 is the ClickHouse Christmas release. There are plenty of gifts (a new JOIN algorithm among them) and we adopted something from MongoDB. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.
* **Recording available**: [**v22.12 Release Webinar**](https://www.youtube.com/watch?v=sREupr6uc2k) 22.12 is the ClickHouse Christmas release. There are plenty of gifts (a new JOIN algorithm among them) and we adopted something from MongoDB. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.
* [**ClickHouse Meetup at the CHEQ office in Tel Aviv**](https://www.meetup.com/clickhouse-tel-aviv-user-group/events/289599423/) - Jan 16 - We are very excited to be holding our next in-person ClickHouse meetup at the CHEQ office in Tel Aviv! Hear from CHEQ, ServiceNow and Contentsquare, as well as a deep dive presentation from ClickHouse CTO Alexey Milovidov. Join us for a fun evening of talks, food and discussion!
* [**ClickHouse Meetup at Microsoft Office in Seattle**](https://www.meetup.com/clickhouse-seattle-user-group/events/290310025/) - Jan 18 - Keep an eye on this space as we will be announcing speakers soon!

View File

@ -13,9 +13,10 @@ The following versions of ClickHouse server are currently being supported with s
| Version | Supported |
|:-|:-|
| 22.12 | ✔️ |
| 22.11 | ✔️ |
| 22.10 | ✔️ |
| 22.9 | ✔️ |
| 22.9 | |
| 22.8 | ✔️ |
| 22.7 | ❌ |
| 22.6 | ❌ |

View File

@ -402,7 +402,15 @@ ReplxxLineReader::ReplxxLineReader(
words.push_back(hs.get().text());
}
std::string new_query(skim(words));
std::string new_query;
try
{
new_query = std::string(skim(words));
}
catch (const std::exception & e)
{
rx.print("skim failed: %s (consider using Ctrl-T for a regular non-fuzzy reverse search)\n", e.what());
}
if (!new_query.empty())
rx.set_state(replxx::Replxx::State(new_query.c_str(), static_cast<int>(new_query.size())));
@ -413,8 +421,18 @@ ReplxxLineReader::ReplxxLineReader(
return rx.invoke(Replxx::ACTION::REPAINT, code);
};
/// NOTE: You can use Ctrl-S for non-fuzzy complete.
rx.bind_key(Replxx::KEY::control('R'), interactive_history_search);
/// Rebind regular incremental search to C-T.
///
/// NOTE: C-T by default this is a binding to swap adjustent chars
/// (TRANSPOSE_CHARACTERS), but for SQL it sounds pretty useless.
rx.bind_key(Replxx::KEY::control('T'), [this](char32_t)
{
/// Reverse search is detected by C-R.
uint32_t reverse_search = Replxx::KEY::control('R');
return rx.invoke(Replxx::ACTION::HISTORY_INCREMENTAL_SEARCH, reverse_search);
});
#endif
}

View File

@ -2,11 +2,11 @@
# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
SET(VERSION_REVISION 54469)
SET(VERSION_REVISION 54470)
SET(VERSION_MAJOR 22)
SET(VERSION_MINOR 12)
SET(VERSION_MINOR 13)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH 0d211ed19849fe44b0e43fdebe2c15d76d560a77)
SET(VERSION_DESCRIBE v22.12.1.1-testing)
SET(VERSION_STRING 22.12.1.1)
SET(VERSION_GITHASH 688e488e930c83eefeac4f87c4cc029cc5b231e3)
SET(VERSION_DESCRIBE v22.13.1.1-testing)
SET(VERSION_STRING 22.13.1.1)
# end of autochange

View File

@ -1,3 +1,6 @@
# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file.
include_guard(GLOBAL)
set (CMAKE_SYSTEM_NAME "Darwin")
set (CMAKE_SYSTEM_PROCESSOR "aarch64")
set (CMAKE_C_COMPILER_TARGET "aarch64-apple-darwin")

View File

@ -1,3 +1,6 @@
# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file.
include_guard(GLOBAL)
set (CMAKE_SYSTEM_NAME "Darwin")
set (CMAKE_SYSTEM_PROCESSOR "x86_64")
set (CMAKE_C_COMPILER_TARGET "x86_64-apple-darwin")

View File

@ -1,3 +1,6 @@
# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file.
include_guard(GLOBAL)
set (CMAKE_SYSTEM_NAME "FreeBSD")
set (CMAKE_SYSTEM_PROCESSOR "aarch64")
set (CMAKE_C_COMPILER_TARGET "aarch64-unknown-freebsd12")

View File

@ -1,3 +1,6 @@
# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file.
include_guard(GLOBAL)
set (CMAKE_SYSTEM_NAME "FreeBSD")
set (CMAKE_SYSTEM_PROCESSOR "ppc64le")
set (CMAKE_C_COMPILER_TARGET "powerpc64le-unknown-freebsd13")

View File

@ -1,3 +1,6 @@
# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file.
include_guard(GLOBAL)
set (CMAKE_SYSTEM_NAME "FreeBSD")
set (CMAKE_SYSTEM_PROCESSOR "x86_64")
set (CMAKE_C_COMPILER_TARGET "x86_64-pc-freebsd11")

View File

@ -1,3 +1,6 @@
# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file.
include_guard(GLOBAL)
set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
set (CMAKE_SYSTEM_NAME "Linux")

View File

@ -1,3 +1,6 @@
# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file.
include_guard(GLOBAL)
set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
set (CMAKE_SYSTEM_NAME "Linux")

View File

@ -1,3 +1,6 @@
# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file.
include_guard(GLOBAL)
set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
set (CMAKE_SYSTEM_NAME "Linux")

View File

@ -1,3 +1,6 @@
# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file.
include_guard(GLOBAL)
set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
set (CMAKE_SYSTEM_NAME "Linux")

View File

@ -1,18 +1,15 @@
if (_CLICKHOUSE_TOOLCHAIN_FILE_LOADED)
# During first run of cmake the toolchain file will be loaded twice,
# - /usr/share/cmake-3.23/Modules/CMakeDetermineSystem.cmake
# - /bld/CMakeFiles/3.23.2/CMakeSystem.cmake
#
# But once you already have non-empty cmake cache it will be loaded only
# once:
# - /bld/CMakeFiles/3.23.2/CMakeSystem.cmake
#
# This has no harm except for double load of toolchain will add
# --gcc-toolchain multiple times that will not allow ccache to reuse the
# cache.
return()
endif()
set (_CLICKHOUSE_TOOLCHAIN_FILE_LOADED ON)
# During first run of cmake the toolchain file will be loaded twice,
# - /usr/share/cmake-3.23/Modules/CMakeDetermineSystem.cmake
# - /bld/CMakeFiles/3.23.2/CMakeSystem.cmake
#
# But once you already have non-empty cmake cache it will be loaded only
# once:
# - /bld/CMakeFiles/3.23.2/CMakeSystem.cmake
#
# This has no harm except for double load of toolchain will add
# --gcc-toolchain multiple times that will not allow ccache to reuse the
# cache.
include_guard(GLOBAL)
set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)

View File

@ -65,7 +65,7 @@ add_contrib (dragonbox-cmake dragonbox)
add_contrib (vectorscan-cmake vectorscan)
add_contrib (jemalloc-cmake jemalloc)
add_contrib (libcpuid-cmake libcpuid)
add_contrib (libdivide)
add_contrib (libdivide-cmake)
add_contrib (libmetrohash)
add_contrib (lz4-cmake lz4)
add_contrib (murmurhash)

View File

@ -39,5 +39,7 @@ endif()
message(STATUS "Switched Rust target to ${Rust_CARGO_TARGET}")
# FindRust.cmake
list(APPEND CMAKE_MODULE_PATH "${ClickHouse_SOURCE_DIR}/contrib/corrosion/cmake")
# Define function corrosion_import_crate()
include ("${ClickHouse_SOURCE_DIR}/contrib/corrosion/cmake/Corrosion.cmake")

1
contrib/libdivide vendored Submodule

@ -0,0 +1 @@
Subproject commit 3bd34388573681ce563348cdf04fe15d24770d04

View File

@ -0,0 +1,7 @@
set(LIBDIVIDE_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libdivide")
add_library (_libdivide INTERFACE)
# for libdivide.h
target_include_directories (_libdivide SYSTEM BEFORE INTERFACE ${LIBDIVIDE_SOURCE_DIR})
# for libdivide-config.h
target_include_directories (_libdivide SYSTEM BEFORE INTERFACE .)
add_library (ch_contrib::libdivide ALIAS _libdivide)

View File

@ -0,0 +1,9 @@
#if defined(__SSE2__)
# define LIBDIVIDE_SSE2
#elif defined(__AVX512F__) || defined(__AVX512BW__) || defined(__AVX512VL__)
# define LIBDIVIDE_AVX512
#elif defined(__AVX2__)
# define LIBDIVIDE_AVX2
#elif defined(__aarch64__) && defined(__ARM_NEON)
# define LIBDIVIDE_NEON
#endif

View File

@ -1,3 +0,0 @@
add_library (_libdivide INTERFACE)
target_include_directories (_libdivide SYSTEM BEFORE INTERFACE .)
add_library (ch_contrib::libdivide ALIAS _libdivide)

View File

@ -1,20 +0,0 @@
libdivide
Copyright (C) 2010 ridiculous_fish
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
libdivide@ridiculousfish.com

View File

@ -1,2 +0,0 @@
https://github.com/ridiculousfish/libdivide
http://libdivide.com/

File diff suppressed because it is too large Load Diff

View File

@ -131,7 +131,7 @@ def parse_env_variables(
ARM_V80COMPAT_SUFFIX = "-aarch64-v80compat"
FREEBSD_SUFFIX = "-freebsd"
PPC_SUFFIX = "-ppc64le"
AMD64_SSE2_SUFFIX = "-amd64sse2"
AMD64_COMPAT_SUFFIX = "-amd64-compat"
result = []
result.append("OUTPUT_DIR=/output")
@ -144,7 +144,7 @@ def parse_env_variables(
is_cross_arm_v80compat = compiler.endswith(ARM_V80COMPAT_SUFFIX)
is_cross_ppc = compiler.endswith(PPC_SUFFIX)
is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX)
is_amd64_sse2 = compiler.endswith(AMD64_SSE2_SUFFIX)
is_amd64_compat = compiler.endswith(AMD64_COMPAT_SUFFIX)
if is_cross_darwin:
cc = compiler[: -len(DARWIN_SUFFIX)]
@ -197,8 +197,8 @@ def parse_env_variables(
cmake_flags.append(
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake"
)
elif is_amd64_sse2:
cc = compiler[: -len(AMD64_SSE2_SUFFIX)]
elif is_amd64_compat:
cc = compiler[: -len(AMD64_COMPAT_SUFFIX)]
result.append("DEB_ARCH=amd64")
cmake_flags.append("-DNO_SSE3_OR_HIGHER=1")
else:
@ -358,7 +358,7 @@ if __name__ == "__main__":
"clang-15-aarch64",
"clang-15-aarch64-v80compat",
"clang-15-ppc64le",
"clang-15-amd64sse2",
"clang-15-amd64-compat",
"clang-15-freebsd",
"gcc-11",
),

View File

@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="22.11.2.30"
ARG VERSION="22.12.1.1752"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# user/group precreated explicitly with fixed uid/gid on purpose.

View File

@ -21,7 +21,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="22.11.2.30"
ARG VERSION="22.12.1.1752"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image

View File

@ -116,6 +116,7 @@ function clone_submodules
contrib/base64
contrib/cctz
contrib/libcpuid
contrib/libdivide
contrib/double-conversion
contrib/llvm-project
contrib/lz4

View File

@ -12,6 +12,10 @@ echo '{
"registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"]
}' | dd of=/etc/docker/daemon.json 2>/dev/null
# In case of test hung it is convenient to use pytest --pdb to debug it,
# and on hung you can simply press Ctrl-C and it will spawn a python pdb,
# but on SIGINT dockerd will exit, so ignore it to preserve the daemon.
trap '' INT
dockerd --host=unix:///var/run/docker.sock --host=tcp://0.0.0.0:2375 --default-address-pool base=172.17.0.0/12,size=24 &>/ClickHouse/tests/integration/dockerd.log &
set +e

View File

@ -17,6 +17,7 @@ ENV S3_URL="https://clickhouse-datasets.s3.amazonaws.com"
ENV DATASETS="hits visits"
RUN npm install -g azurite
RUN npm install tslib
COPY run.sh /
CMD ["/bin/bash", "/run.sh"]

View File

@ -80,6 +80,7 @@ ENV MINIO_ROOT_PASSWORD="clickhouse"
ENV EXPORT_S3_STORAGE_POLICIES=1
RUN npm install -g azurite
RUN npm install tslib
COPY run.sh /
COPY setup_minio.sh /

View File

@ -127,23 +127,24 @@ EOL
function stop()
{
local max_tries=""
if [ -n "$1" ]
then
max_tries="--max-tries $1"
fi
local pid
# Preserve the pid, since the server can hung after the PID will be deleted.
pid="$(cat /var/run/clickhouse-server/clickhouse-server.pid)"
clickhouse stop $max_tries --do-not-kill && return
if [ -n "$1" ]
then
# temporarily disable it in BC check
clickhouse stop --force
return
fi
# We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces.
kill -TERM "$(pidof gdb)" ||:
sleep 5
echo "thread apply all backtrace (on stop)" >> /test_output/gdb.log
gdb -batch -ex 'thread apply all backtrace' -p "$pid" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log
timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$pid" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log
clickhouse stop --force
}
@ -431,7 +432,7 @@ else
clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables"
stop 180
stop 1
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.stress.log
# Start new server

View File

@ -19,6 +19,7 @@ def process_result(result_folder):
"typos",
"whitespaces",
"workflows",
"submodules",
"docs spelling",
)

View File

@ -19,6 +19,8 @@ echo "Check whitespaces" | ts
./check-whitespaces -n |& tee /test_output/whitespaces_output.txt
echo "Check workflows" | ts
./check-workflows |& tee /test_output/workflows_output.txt
echo "Check submodules" | ts
./check-submodules |& tee /test_output/submodules_output.txt
echo "Check shell scripts with shellcheck" | ts
./shellcheck-run.sh |& tee /test_output/shellcheck_output.txt
/process_style_check_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv

View File

@ -10,6 +10,10 @@ cat > /etc/docker/daemon.json << EOF
}
EOF
# In case of test hung it is convenient to use pytest --pdb to debug it,
# and on hung you can simply press Ctrl-C and it will spawn a python pdb,
# but on SIGINT dockerd will exit, so ignore it to preserve the daemon.
trap '' INT
dockerd --host=unix:///var/run/docker.sock --host=tcp://0.0.0.0:2375 &>/var/log/somefile &
set +e

View File

@ -8,15 +8,23 @@ DIR=
if [ "${OS}" = "Linux" ]
then
if [ "${ARCH}" = "x86_64" -o "${ARCH}" = "amd64" ]
then
# Require at least x86-64 + SSE4.2 (introduced in 2006). On older hardware fall back to plain x86-64 (introduced in 1999) which
# guarantees at least SSE2. The caveat is that plain x86-64 builds are much less tested than SSE 4.2 builds.
HAS_SSE42=$(grep sse4_2 /proc/cpuinfo)
if [ "${HAS_SSE42}" ]
then
DIR="amd64"
else
DIR="amd64compat"
fi
elif [ "${ARCH}" = "aarch64" -o "${ARCH}" = "arm64" ]
then
# If the system has >=ARMv8.2 (https://en.wikipedia.org/wiki/AArch64), choose the corresponding build, else fall back to a v8.0
# compat build. Unfortunately, the ARM ISA level cannot be read directly, we need to guess from the "features" in /proc/cpuinfo.
# Also, the flags in /proc/cpuinfo are named differently than the flags passed to the compiler (cmake/cpu_features.cmake).
ARMV82=$(grep -m 1 'Features' /proc/cpuinfo | awk '/asimd/ && /sha1/ && /aes/ && /atomics/ && /lrcpc/')
if [ "${ARMV82}" ]
HAS_ARMV82=$(grep -m 1 'Features' /proc/cpuinfo | awk '/asimd/ && /sha1/ && /aes/ && /atomics/ && /lrcpc/')
if [ "${HAS_ARMV82}" ]
then
DIR="aarch64"
else

View File

@ -0,0 +1,320 @@
---
sidebar_position: 1
sidebar_label: 2022
---
# 2022 Changelog
### ClickHouse release v22.12.1.1752-stable (688e488e930) FIXME as compared to v22.11.1.1360-stable (0d211ed1984)
#### Backward Incompatible Change
* Fixed backward incompatibility in (de)serialization of states of `min`, `max`, `any*`, `argMin`, `argMax` aggregate functions with `String` argument. The incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/41431 and affects 22.9, 22.10 and 22.11 branches (fixed since 22.9.6, 22.10.4 and 22.11.2 correspondingly). Some minor releases of 22.3, 22.7 and 22.8 branches are also affected: 22.3.13...22.3.14 (fixed since 22.3.15), 22.8.6...22.8.9 (fixed since 22.8.10), 22.7.6 and newer (will not be fixed in 22.7, we recommend to upgrade from 22.7.* to 22.8.10 or newer). This release note does not concern users that have never used affected versions. Incompatible versions append extra `'\0'` to strings when reading states of the aggregate functions mentioned above. For example, if an older version saved state of `anyState('foobar')` to `state_column` then incompatible version will print `'foobar\0'` on `anyMerge(state_column)`. Also incompatible versions write states of the aggregate functions without trailing `'\0'`. Newer versions (that have the fix) can correctly read data written by all versions including incompatible versions, except one corner case. If an incompatible version saved a state with a string that actually ends with null character, then newer version will trim trailing `'\0'` when reading state of affected aggregate function. For example, if an incompatible version saved state of `anyState('abrac\0dabra\0')` to `state_column` then newer versions will print `'abrac\0dabra'` on `anyMerge(state_column)`. The issue also affects distributed queries when an incompatible version works in a cluster together with older or newer versions. [#43038](https://github.com/ClickHouse/ClickHouse/pull/43038) ([Raúl Marín](https://github.com/Algunenano)).
#### New Feature
* Add "grace_hash" join_algorithm. [#38191](https://github.com/ClickHouse/ClickHouse/pull/38191) ([BigRedEye](https://github.com/BigRedEye)).
* Merging on initiator now uses the same memory bound approach as merging of local aggregation results if `enable_memory_bound_merging_of_aggregation_results` is set. [#40879](https://github.com/ClickHouse/ClickHouse/pull/40879) ([Nikita Taranov](https://github.com/nickitat)).
* Add BSONEachRow input/output format. In this format, ClickHouse formats/parses each row as a separated BSON Document and each column is formatted/parsed as a single BSON field with column name as a key. [#42033](https://github.com/ClickHouse/ClickHouse/pull/42033) ([mark-polokhov](https://github.com/mark-polokhov)).
* close: [#37631](https://github.com/ClickHouse/ClickHouse/issues/37631). [#42265](https://github.com/ClickHouse/ClickHouse/pull/42265) ([刘陶峰](https://github.com/taofengliu)).
* Added `multiplyDecimal` and `divideDecimal` functions for decimal operations with fixed precision. [#42438](https://github.com/ClickHouse/ClickHouse/pull/42438) ([Andrey Zvonov](https://github.com/zvonand)).
* Added `system.moves` table with list of currently moving parts. [#42660](https://github.com/ClickHouse/ClickHouse/pull/42660) ([Sergei Trifonov](https://github.com/serxa)).
* Keeper feature: add support for embedded Prometheus endpoint. [#43087](https://github.com/ClickHouse/ClickHouse/pull/43087) ([Antonio Andelic](https://github.com/antonio2368)).
* Added age function to calculate difference between two dates or dates with time values expressed as number of full units. Close [#41115](https://github.com/ClickHouse/ClickHouse/issues/41115). [#43123](https://github.com/ClickHouse/ClickHouse/pull/43123) ([Roman Vasin](https://github.com/rvasin)).
* Add settings `max_streams_for_merge_tree_reading` and `allow_asynchronous_read_from_io_pool_for_merge_tree`. Setting `max_streams_for_merge_tree_reading` limits the number of reading streams for MergeTree tables. Setting `allow_asynchronous_read_from_io_pool_for_merge_tree` enables background I/O pool to read from `MergeTree` tables. This may increase performance for I/O bound queries if used together with `max_streams_to_max_threads_ratio` or `max_streams_for_merge_tree_reading`. [#43260](https://github.com/ClickHouse/ClickHouse/pull/43260) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Add the expression of the index on `data_skipping_indices` system table. [#43308](https://github.com/ClickHouse/ClickHouse/pull/43308) ([Guillaume Tassery](https://github.com/YiuRULE)).
* New hash function [xxh3](https://github.com/Cyan4973/xxHash) added. Also performance of `xxHash32` and `xxHash64` improved on arm thanks to library update. [#43411](https://github.com/ClickHouse/ClickHouse/pull/43411) ([Nikita Taranov](https://github.com/nickitat)).
* - Temporary data (for external sorting, aggregation, and JOINs) can share storage with the filesystem cache for remote disks and evict it, close [#42158](https://github.com/ClickHouse/ClickHouse/issues/42158). [#43457](https://github.com/ClickHouse/ClickHouse/pull/43457) ([Vladimir C](https://github.com/vdimir)).
* Add column `engine_full` to system table `databases` so that users can access whole engine definition of database via system tables. [#43468](https://github.com/ClickHouse/ClickHouse/pull/43468) ([凌涛](https://github.com/lingtaolf)).
* Add password complexity rules and checks for creating a new user. [#43719](https://github.com/ClickHouse/ClickHouse/pull/43719) ([Nikolay Degterinsky](https://github.com/evillique)).
* Add function concatWithSeparator , like concat_ws in spark. [#43749](https://github.com/ClickHouse/ClickHouse/pull/43749) ([李扬](https://github.com/taiyang-li)).
* Added constraints for merge tree settings. [#43903](https://github.com/ClickHouse/ClickHouse/pull/43903) ([Sergei Trifonov](https://github.com/serxa)).
* Support numeric literals with _ as separator. [#43925](https://github.com/ClickHouse/ClickHouse/pull/43925) ([jh0x](https://github.com/jh0x)).
* Add a new setting `input_format_json_read_objects_as_strings` that allows to parse nested JSON objects into Strings in all JSON input formats. This setting is disable by default. [#44052](https://github.com/ClickHouse/ClickHouse/pull/44052) ([Kruglov Pavel](https://github.com/Avogar)).
#### Performance Improvement
* Optimisation is getting skipped now if `max_size_to_preallocate_for_aggregation` has too small value. Default value of this setting increased to `10^8`. [#43945](https://github.com/ClickHouse/ClickHouse/pull/43945) ([Nikita Taranov](https://github.com/nickitat)).
#### Improvement
* Support numeric literals with underscores. closes [#28967](https://github.com/ClickHouse/ClickHouse/issues/28967). [#39129](https://github.com/ClickHouse/ClickHouse/pull/39129) ([unbyte](https://github.com/unbyte)).
* Add `FROM table SELECT column` syntax. [#41095](https://github.com/ClickHouse/ClickHouse/pull/41095) ([Nikolay Degterinsky](https://github.com/evillique)).
* This PR changes how followed queries delete parts: truncate table, alter table drop part, alter table drop partition. Now these queries make empty parts which cover old parts. This makes truncate query works without exclusive lock which means concurrent reads aren't locked. Also achieved durability in all those queries. If request is succeeded then no resurrected pars appear later. Note that atomicity is achieved only with transaction scope. [#41145](https://github.com/ClickHouse/ClickHouse/pull/41145) ([Sema Checherinda](https://github.com/CheSema)).
* `SET param_x` query no longer requires manual string serialization for the value of the parameter. For example, query `SET param_a = '[\'a\', \'b\']'` can now be written like `SET param_a = ['a', 'b']`. [#41874](https://github.com/ClickHouse/ClickHouse/pull/41874) ([Nikolay Degterinsky](https://github.com/evillique)).
* `filesystemAvailable` and related functions support one optional argument with disk name, and change `filesystemFree` to `filesystemUnreserved`. Closes [#35076](https://github.com/ClickHouse/ClickHouse/issues/35076). [#42064](https://github.com/ClickHouse/ClickHouse/pull/42064) ([flynn](https://github.com/ucasfl)).
* Increased the default value of search_limit to 256, and added LDAP server config option to change that to an arbitrary value. Closes: [#42276](https://github.com/ClickHouse/ClickHouse/issues/42276). [#42461](https://github.com/ClickHouse/ClickHouse/pull/42461) ([Vasily Nemkov](https://github.com/Enmk)).
* Add cosine distance for annoy. [#42778](https://github.com/ClickHouse/ClickHouse/pull/42778) ([Filatenkov Artur](https://github.com/FArthur-cmd)).
* Allow to remove sensitive information from the exception messages also. Resolves [#41418](https://github.com/ClickHouse/ClickHouse/issues/41418). [#42940](https://github.com/ClickHouse/ClickHouse/pull/42940) ([filimonov](https://github.com/filimonov)).
* Keeper improvement: Add 4lw command `rqld` which can manually assign a node as leader. [#43026](https://github.com/ClickHouse/ClickHouse/pull/43026) ([JackyWoo](https://github.com/JackyWoo)).
* Apply connection timeouts settings for Distributed async INSERT from the query. [#43156](https://github.com/ClickHouse/ClickHouse/pull/43156) ([Azat Khuzhin](https://github.com/azat)).
* unhex function support FixedString arguments. [issue42369](https://github.com/ClickHouse/ClickHouse/issues/42369). [#43207](https://github.com/ClickHouse/ClickHouse/pull/43207) ([DR](https://github.com/freedomDR)).
* Priority is given to deleting completely expired Partsrelated [#42869](https://github.com/ClickHouse/ClickHouse/issues/42869). [#43222](https://github.com/ClickHouse/ClickHouse/pull/43222) ([zhongyuankai](https://github.com/zhongyuankai)).
* Follow-up to https://github.com/ClickHouse/ClickHouse/pull/42484. Mask sensitive information in logs better; mask secret parts in the output of queries `SHOW CREATE TABLE` and `SELECT FROM system.tables`. Also resolves [#41418](https://github.com/ClickHouse/ClickHouse/issues/41418). [#43227](https://github.com/ClickHouse/ClickHouse/pull/43227) ([Vitaly Baranov](https://github.com/vitlibar)).
* Enable compress marks and primary key. [#43288](https://github.com/ClickHouse/ClickHouse/pull/43288) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* resolve issue [#38075](https://github.com/ClickHouse/ClickHouse/issues/38075) . Right now async insert doesn't support deduplication, because multiple small inserts will coexist in one part, which corespond multiple `block id`s. This solution is straitfoward: The change involves: 1. mark offsets for every inserts in every chunk 2. calculate multiple `block_id`s when sinker receive a chunk 3. get block number lock by these `block_id`s 3.1. if fails, remove the dup insert(s) and dup `block_id`(s) from block and recalculate `offsets` agian. 3.2. if succeeds, commit `block_id`'s and other items into keeper a. if fails, do 3.1 b. if succeeds, everything succeeds. [#43304](https://github.com/ClickHouse/ClickHouse/pull/43304) ([Han Fei](https://github.com/hanfei1991)).
* More precise and reactive CPU load indication on client. [#43307](https://github.com/ClickHouse/ClickHouse/pull/43307) ([Sergei Trifonov](https://github.com/serxa)).
* Restrict default access to named collections for user defined in config. It must have explicit `show_named_collections=1` to be able to see them. [#43325](https://github.com/ClickHouse/ClickHouse/pull/43325) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Support reading of subcolumns of nested types from storage `S3` and table function `s3` with formats `Parquet`, `Arrow` and `ORC`. [#43329](https://github.com/ClickHouse/ClickHouse/pull/43329) ([chen](https://github.com/xiedeyantu)).
* - Systemd integration now correctly notifies systemd that service is really started and is ready to server requests. [#43400](https://github.com/ClickHouse/ClickHouse/pull/43400) ([Коренберг Марк](https://github.com/socketpair)).
* Add table_uuid to system.parts. [#43404](https://github.com/ClickHouse/ClickHouse/pull/43404) ([Azat Khuzhin](https://github.com/azat)).
* Added client option to display the number of locally processed rows in non-interactive mode (--print-num-processed-rows). [#43407](https://github.com/ClickHouse/ClickHouse/pull/43407) ([jh0x](https://github.com/jh0x)).
* Show read rows while reading from stdin from client. Closes [#43423](https://github.com/ClickHouse/ClickHouse/issues/43423). [#43442](https://github.com/ClickHouse/ClickHouse/pull/43442) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Keeper improvement: try syncing logs to disk in parallel with replication. [#43450](https://github.com/ClickHouse/ClickHouse/pull/43450) ([Antonio Andelic](https://github.com/antonio2368)).
* Show progress bar while reading from s3 table function / engine. [#43454](https://github.com/ClickHouse/ClickHouse/pull/43454) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Progress bar will show both read and written rows. [#43496](https://github.com/ClickHouse/ClickHouse/pull/43496) ([Ilya Yatsishin](https://github.com/qoega)).
* Implement `aggregation-in-order` optimization on top of query plan. It is enabled by default (but works only together with `optimize_aggregation_in_order`, which is disabled by default). Set `query_plan_aggregation_in_order = 0` to use previous AST-based version. [#43592](https://github.com/ClickHouse/ClickHouse/pull/43592) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Allow to send profile events with `trace_type = 'ProfileEvent'` to `system.trace_log` on each increment with current stack, profile event name and value of increment. It can be enabled by setting `trace_profile_events` and used to debug performance of queries. [#43639](https://github.com/ClickHouse/ClickHouse/pull/43639) ([Anton Popov](https://github.com/CurtizJ)).
* Keeper improvement: requests are batched more often. The batching can be controlled with the new setting `max_requests_quick_batch_size`. [#43686](https://github.com/ClickHouse/ClickHouse/pull/43686) ([Antonio Andelic](https://github.com/antonio2368)).
* Added possibility to use array as a second parameter for cutURLParameter function. Close [#6827](https://github.com/ClickHouse/ClickHouse/issues/6827). [#43788](https://github.com/ClickHouse/ClickHouse/pull/43788) ([Roman Vasin](https://github.com/rvasin)).
* Implement referential dependencies and use them to create tables in the correct order while restoring from a backup. [#43834](https://github.com/ClickHouse/ClickHouse/pull/43834) ([Vitaly Baranov](https://github.com/vitlibar)).
* Add a new setting `input_format_max_binary_string_size` to limit string size in RowBinary format. [#43842](https://github.com/ClickHouse/ClickHouse/pull/43842) ([Kruglov Pavel](https://github.com/Avogar)).
* - Fix some incorrect logic in ast level optimization related. [#43873](https://github.com/ClickHouse/ClickHouse/pull/43873) ([Duc Canh Le](https://github.com/canhld94)).
* Support query like `SHOW FULL TABLES ...`. [#43910](https://github.com/ClickHouse/ClickHouse/pull/43910) ([Filatenkov Artur](https://github.com/FArthur-cmd)).
* When ClickHouse requests a remote HTTP server, and it returns an error, the numeric HTTP code was not displayed correctly in the exception message. Closes [#43919](https://github.com/ClickHouse/ClickHouse/issues/43919). [#43920](https://github.com/ClickHouse/ClickHouse/pull/43920) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Settings `merge_tree_min_rows_for_concurrent_read_for_remote_filesystem/merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem` did not respect adaptive granularity. Fat rows did not decrease the number of read rows (as it is was done for `merge_tree_min_rows_for_concurrent_read/merge_tree_min_bytes_for_concurrent_read`, which could lead to high memory usage. [#43965](https://github.com/ClickHouse/ClickHouse/pull/43965) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Support `optimize_if_transform_strings_to_enum` in new analyzer. [#43999](https://github.com/ClickHouse/ClickHouse/pull/43999) ([Antonio Andelic](https://github.com/antonio2368)).
* This is to upgrade the new "DeflateQpl" compression codec which has been implemented on previous PR (details: https://github.com/ClickHouse/ClickHouse/pull/39494). This patch improves codec on below aspects: 1. QPL v0.2.0 to QPL v0.3.0 [Intel® Query Processing Library (QPL)](https://github.com/intel/qpl) 2. Improve CMake file for fixing QPL build issues for QPL v0.3.0。 3. Link the QPL library with libaccel-config at build time instead of runtime loading on QPL v0.2.0 (dlopen) 4. Fixed log print issue in CompressionCodecDeflateQpl.cpp. [#44024](https://github.com/ClickHouse/ClickHouse/pull/44024) ([jasperzhu](https://github.com/jinjunzh)).
* Follow-up to https://github.com/ClickHouse/ClickHouse/pull/43834 Fix review issues; dependencies from `Distributed` table engine and from `cluster()` function are also considered now; as well as dependencies of a dictionary defined without host & port specified. [#44158](https://github.com/ClickHouse/ClickHouse/pull/44158) ([Vitaly Baranov](https://github.com/vitlibar)).
#### Bug Fix
* Fix mutations not making progress when checksums do not match between replicas (e.g. caused by a change in data format on an upgrade). [#36877](https://github.com/ClickHouse/ClickHouse/pull/36877) ([nvartolomei](https://github.com/nvartolomei)).
* fix skip_unavailable_shards does not work using hdfsCluster table function. [#43236](https://github.com/ClickHouse/ClickHouse/pull/43236) ([chen](https://github.com/xiedeyantu)).
* fix s3 support question mark wildcard. Closes [#42731](https://github.com/ClickHouse/ClickHouse/issues/42731). [#43253](https://github.com/ClickHouse/ClickHouse/pull/43253) ([chen](https://github.com/xiedeyantu)).
* - Fix functions arrayFirstOrNull and arrayLastOrNull or null when array is Nullable. [#43274](https://github.com/ClickHouse/ClickHouse/pull/43274) ([Duc Canh Le](https://github.com/canhld94)).
* - we create a new zk path called "async_blocks" for replicated tables in [#43304](https://github.com/ClickHouse/ClickHouse/issues/43304) . However, for tables created in older versions, this path does not exist and will cause error when doing partition operations. This PR will create this node when initializing replicated tree. - This PR created a flag `async_insert_deduplicate` with `false` default value to control whether to use this function. As mentioned in [#38075](https://github.com/ClickHouse/ClickHouse/issues/38075) , this function is not yet fully finished. I would turn off it by default. [#44223](https://github.com/ClickHouse/ClickHouse/pull/44223) ([Han Fei](https://github.com/hanfei1991)).
#### Build/Testing/Packaging Improvement
* Add support for FreeBSD/powerpc64le. [#40422](https://github.com/ClickHouse/ClickHouse/pull/40422) ([pkubaj](https://github.com/pkubaj)).
* Bump Testcontainers for Go to v0.15.0. [#43278](https://github.com/ClickHouse/ClickHouse/pull/43278) ([Manuel de la Peña](https://github.com/mdelapenya)).
* ... Enable base64 on s390x > Information about CI checks: https://clickhouse.com/docs/en/development/continuous-integration/. [#43352](https://github.com/ClickHouse/ClickHouse/pull/43352) ([Suzy Wang](https://github.com/SuzyWangIBMer)).
* Shutdown will be much faster if do not call clearOldPartsFromFilesystem. Especially this is right for tests with zero-copy due to single thread deletion parts. clearOldPartsFromFilesystem is unnecessary after https://github.com/ClickHouse/ClickHouse/pull/41145. [#43760](https://github.com/ClickHouse/ClickHouse/pull/43760) ([Sema Checherinda](https://github.com/CheSema)).
* Integrate skim into the client/local. [#43922](https://github.com/ClickHouse/ClickHouse/pull/43922) ([Azat Khuzhin](https://github.com/azat)).
* Allow clickhouse to use openssl as a dynamic library and in-tree for development purposes. [#43991](https://github.com/ClickHouse/ClickHouse/pull/43991) ([Boris Kuschel](https://github.com/bkuschel)).
* Closes [#43912](https://github.com/ClickHouse/ClickHouse/issues/43912). [#43992](https://github.com/ClickHouse/ClickHouse/pull/43992) ([Nikolay Degterinsky](https://github.com/evillique)).
* Bring sha512 sums back to the building step. [#44017](https://github.com/ClickHouse/ClickHouse/pull/44017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Kill stress tests after 2.5h in case of hanging process. [#44214](https://github.com/ClickHouse/ClickHouse/pull/44214) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Fixed unable to log in (because of failure to create session_log entry) in rare case of messed up setting profiles. ... [#42641](https://github.com/ClickHouse/ClickHouse/pull/42641) ([Vasily Nemkov](https://github.com/Enmk)).
* Fix incorrect UserTimeMicroseconds/SystemTimeMicroseconds accounting. [#42791](https://github.com/ClickHouse/ClickHouse/pull/42791) ([Azat Khuzhin](https://github.com/azat)).
* Do not suppress exceptions in web disk. Fix retries for web disk. [#42800](https://github.com/ClickHouse/ClickHouse/pull/42800) ([Azat Khuzhin](https://github.com/azat)).
* Fixed race condition between inserts and dropping MVs. [#43161](https://github.com/ClickHouse/ClickHouse/pull/43161) ([AlfVII](https://github.com/AlfVII)).
* Fixed bug which could lead to deadlock while using asynchronous inserts. [#43233](https://github.com/ClickHouse/ClickHouse/pull/43233) ([Anton Popov](https://github.com/CurtizJ)).
* Additional check on zero uncompressed size is added to `CompressionCodecDelta`. [#43255](https://github.com/ClickHouse/ClickHouse/pull/43255) ([Nikita Taranov](https://github.com/nickitat)).
* An issue with the following exception has been reported while trying to read a Parquet file from S3 into ClickHouse:. [#43297](https://github.com/ClickHouse/ClickHouse/pull/43297) ([Arthur Passos](https://github.com/arthurpassos)).
* Fix bad cast from LowCardinality column when using short circuit function execution. Proper fix of https://github.com/ClickHouse/ClickHouse/pull/42937. [#43311](https://github.com/ClickHouse/ClickHouse/pull/43311) ([Kruglov Pavel](https://github.com/Avogar)).
* Fixed queries with `SAMPLE BY` with prewhere optimization on tables using `Merge` engine. [#43315](https://github.com/ClickHouse/ClickHouse/pull/43315) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix `DESCRIBE` for `deltaLake` and `hudi` table functions. [#43323](https://github.com/ClickHouse/ClickHouse/pull/43323) ([Antonio Andelic](https://github.com/antonio2368)).
* Check and compare the content of `format_version` file in `MergeTreeData` so tables can be loaded even if the storage policy was changed. [#43328](https://github.com/ClickHouse/ClickHouse/pull/43328) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix possible (very unlikely) "No column to rollback" logical error during INSERT into Buffer. [#43336](https://github.com/ClickHouse/ClickHouse/pull/43336) ([Azat Khuzhin](https://github.com/azat)).
* Fix a bug that allowed FucntionParser to parse an unlimited amount of round brackets into one function if `allow_function_parameters` is set. [#43350](https://github.com/ClickHouse/ClickHouse/pull/43350) ([Nikolay Degterinsky](https://github.com/evillique)).
* MaterializeMySQL support ddl drop table t1,t2 and Compatible with most of MySQL drop ddl. [#43366](https://github.com/ClickHouse/ClickHouse/pull/43366) ([zzsmdfj](https://github.com/zzsmdfj)).
* Fix possible `Cannot create non-empty column with type Nothing` in functions if/multiIf. Closes [#43356](https://github.com/ClickHouse/ClickHouse/issues/43356). [#43368](https://github.com/ClickHouse/ClickHouse/pull/43368) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix a bug when row level filter uses default value of column. [#43387](https://github.com/ClickHouse/ClickHouse/pull/43387) ([Alexander Gololobov](https://github.com/davenger)).
* Query with DISTINCT + LIMIT BY + LIMIT can return fewer rows than expected. Fixes [#43377](https://github.com/ClickHouse/ClickHouse/issues/43377). [#43410](https://github.com/ClickHouse/ClickHouse/pull/43410) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix sumMap() for Nullable(Decimal()). [#43414](https://github.com/ClickHouse/ClickHouse/pull/43414) ([Azat Khuzhin](https://github.com/azat)).
* Fix date_diff() for hour/minute on macOS. Close [#42742](https://github.com/ClickHouse/ClickHouse/issues/42742). [#43466](https://github.com/ClickHouse/ClickHouse/pull/43466) ([zzsmdfj](https://github.com/zzsmdfj)).
* Fix incorrect memory accounting because of merges/mutations. [#43516](https://github.com/ClickHouse/ClickHouse/pull/43516) ([Azat Khuzhin](https://github.com/azat)).
* Substitute UDFs in `CREATE` query to avoid failures during loading at the startup. Additionally, UDFs can now be used as `DEFAULT` expressions for columns. [#43539](https://github.com/ClickHouse/ClickHouse/pull/43539) ([Antonio Andelic](https://github.com/antonio2368)).
* Correctly report errors in queries even when multiple JOINs optimization is taking place. [#43583](https://github.com/ClickHouse/ClickHouse/pull/43583) ([Salvatore](https://github.com/tbsal)).
* Fixed primary key analysis with conditions involving `toString(enum)`. [#43596](https://github.com/ClickHouse/ClickHouse/pull/43596) ([Nikita Taranov](https://github.com/nickitat)).
* - Ensure consistency when copier update status and `attach_is_done` in keeper after partition attach is done. [#43602](https://github.com/ClickHouse/ClickHouse/pull/43602) ([lizhuoyu5](https://github.com/lzydmxy)).
* During recovering of the lost replica there could a situation where we need to atomically swap two table names (use EXCHANGE), but instead previously we tried to use two RENAME queries. Which was obviously failed and moreover failed the whole recovery process of the database replica. [#43628](https://github.com/ClickHouse/ClickHouse/pull/43628) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* fix s3Cluster function returns NOT_FOUND_COLUMN_IN_BLOCK error. Closes [#43534](https://github.com/ClickHouse/ClickHouse/issues/43534). [#43629](https://github.com/ClickHouse/ClickHouse/pull/43629) ([chen](https://github.com/xiedeyantu)).
* Optimized number of List requests to ZooKeeper when selecting a part to merge. Previously it could produce thousands of requests in some cases. Fixes [#43647](https://github.com/ClickHouse/ClickHouse/issues/43647). [#43675](https://github.com/ClickHouse/ClickHouse/pull/43675) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix posssible logical error 'Array sizes mismatched' while parsing JSON object with arrays with same key names but with different nesting level. Closes [#43569](https://github.com/ClickHouse/ClickHouse/issues/43569). [#43693](https://github.com/ClickHouse/ClickHouse/pull/43693) ([Kruglov Pavel](https://github.com/Avogar)).
* Fixed possible exception in case of distributed group by with an alias column among aggregation keys. [#43709](https://github.com/ClickHouse/ClickHouse/pull/43709) ([Nikita Taranov](https://github.com/nickitat)).
* Fix bug which can lead to broken projections if zero-copy replication is enabled and used. [#43764](https://github.com/ClickHouse/ClickHouse/pull/43764) ([alesapin](https://github.com/alesapin)).
* - Fix using multipart upload for large S3 objects in AWS S3. [#43824](https://github.com/ClickHouse/ClickHouse/pull/43824) ([ianton-ru](https://github.com/ianton-ru)).
* Fixed `ALTER ... RESET SETTING` with `ON CLUSTER`. It could be applied to one replica only. Fixes [#43843](https://github.com/ClickHouse/ClickHouse/issues/43843). [#43848](https://github.com/ClickHouse/ClickHouse/pull/43848) ([Elena Torró](https://github.com/elenatorro)).
* * Fix logical error in right storage join with using. [#43963](https://github.com/ClickHouse/ClickHouse/pull/43963) ([Vladimir C](https://github.com/vdimir)).
* Keeper fix: throw if interserver port for Raft is already in use. Fix segfault in Prometheus when Raft server failed to initialize. [#43984](https://github.com/ClickHouse/ClickHouse/pull/43984) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix order by positional arg in case unneeded columns pruning. Closes [#43964](https://github.com/ClickHouse/ClickHouse/issues/43964). [#43987](https://github.com/ClickHouse/ClickHouse/pull/43987) ([Kseniia Sumarokova](https://github.com/kssenii)).
* * Fix bug with wrong order of keys in Storage Join. [#44012](https://github.com/ClickHouse/ClickHouse/pull/44012) ([Vladimir C](https://github.com/vdimir)).
* Fixed exception when subquery contains having but doesn't contain actual aggregation. [#44051](https://github.com/ClickHouse/ClickHouse/pull/44051) ([Nikita Taranov](https://github.com/nickitat)).
* Fix race in s3 multipart upload. This race could cause the error `Part number must be an integer between 1 and 10000, inclusive. (S3_ERROR)` while restoring from a backup. [#44065](https://github.com/ClickHouse/ClickHouse/pull/44065) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix undefined behavior in the `quantiles` function, which might lead to uninitialized memory. Found by fuzzer. This closes [#44066](https://github.com/ClickHouse/ClickHouse/issues/44066). [#44067](https://github.com/ClickHouse/ClickHouse/pull/44067) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Prevent dropping nested column if it creates empty part. [#44159](https://github.com/ClickHouse/ClickHouse/pull/44159) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix `LOGICAL_ERROR` in case when fetch of part was stopped while fetching projection to the disk with enabled zero-copy replication. [#44173](https://github.com/ClickHouse/ClickHouse/pull/44173) ([Anton Popov](https://github.com/CurtizJ)).
* Fix possible Bad cast from type DB::IAST const* to DB::ASTLiteral const*. Closes [#44191](https://github.com/ClickHouse/ClickHouse/issues/44191). [#44192](https://github.com/ClickHouse/ClickHouse/pull/44192) ([Kruglov Pavel](https://github.com/Avogar)).
* Prevent `ReadonlyReplica` metric from having negative values. [#44220](https://github.com/ClickHouse/ClickHouse/pull/44220) ([Antonio Andelic](https://github.com/antonio2368)).
#### Build Improvement
* Fixed Endian issues in hex string conversion on s390x (which is not supported by ClickHouse). [#41245](https://github.com/ClickHouse/ClickHouse/pull/41245) ([Harry Lee](https://github.com/HarryLeeIBM)).
* ... toDateTime64 conversion generates wrong time on z build, add bit_cast swap fix to support toDateTime64 on s390x platform. [#42847](https://github.com/ClickHouse/ClickHouse/pull/42847) ([Suzy Wang](https://github.com/SuzyWangIBMer)).
* ... s390x support for ip coding functions. [#43078](https://github.com/ClickHouse/ClickHouse/pull/43078) ([Suzy Wang](https://github.com/SuzyWangIBMer)).
* Fix byte order issue of wide integers for s390x. [#43228](https://github.com/ClickHouse/ClickHouse/pull/43228) ([Harry Lee](https://github.com/HarryLeeIBM)).
* Fixed endian issue in bloom filter serialization for s390x. [#43642](https://github.com/ClickHouse/ClickHouse/pull/43642) ([Harry Lee](https://github.com/HarryLeeIBM)).
* Fixed setting TCP_KEEPIDLE of client connection for s390x. [#43850](https://github.com/ClickHouse/ClickHouse/pull/43850) ([Harry Lee](https://github.com/HarryLeeIBM)).
* Fix endian issue in StringHashTable for s390x. [#44049](https://github.com/ClickHouse/ClickHouse/pull/44049) ([Harry Lee](https://github.com/HarryLeeIBM)).
#### NO CL ENTRY
* NO CL ENTRY: 'Revert "Revert "S3 request per second rate throttling""'. [#43335](https://github.com/ClickHouse/ClickHouse/pull/43335) ([Sergei Trifonov](https://github.com/serxa)).
* NO CL ENTRY: 'Update version after release'. [#43348](https://github.com/ClickHouse/ClickHouse/pull/43348) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* NO CL ENTRY: 'Revert "Add table_uuid to system.parts"'. [#43571](https://github.com/ClickHouse/ClickHouse/pull/43571) ([Alexander Tokmakov](https://github.com/tavplubix)).
* NO CL ENTRY: 'Revert "Fix endian issue in integer hex string conversion"'. [#43613](https://github.com/ClickHouse/ClickHouse/pull/43613) ([Vladimir C](https://github.com/vdimir)).
* NO CL ENTRY: 'Update replication.md'. [#43643](https://github.com/ClickHouse/ClickHouse/pull/43643) ([Peignon Melvyn](https://github.com/melvynator)).
* NO CL ENTRY: 'Revert "Temporary files evict fs cache"'. [#43883](https://github.com/ClickHouse/ClickHouse/pull/43883) ([Vladimir C](https://github.com/vdimir)).
* NO CL ENTRY: 'Update html interface doc'. [#44064](https://github.com/ClickHouse/ClickHouse/pull/44064) ([San](https://github.com/santrancisco)).
* NO CL ENTRY: 'Revert "Add function 'age'"'. [#44203](https://github.com/ClickHouse/ClickHouse/pull/44203) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'Revert "Builtin skim"'. [#44227](https://github.com/ClickHouse/ClickHouse/pull/44227) ([Azat Khuzhin](https://github.com/azat)).
* NO CL ENTRY: 'Revert "Add information about written rows in progress indicator"'. [#44255](https://github.com/ClickHouse/ClickHouse/pull/44255) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Build libcxx and libcxxabi from llvm-project [#42730](https://github.com/ClickHouse/ClickHouse/pull/42730) ([Robert Schulze](https://github.com/rschu1ze)).
* Allow release only from ready commits [#43019](https://github.com/ClickHouse/ClickHouse/pull/43019) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add global flags to base/ libraries [#43082](https://github.com/ClickHouse/ClickHouse/pull/43082) ([Raúl Marín](https://github.com/Algunenano)).
* Enable strict typing check in tests/ci [#43132](https://github.com/ClickHouse/ClickHouse/pull/43132) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add server UUID for disks access checks (read/read-by-offset/write/delete) to avoid possible races [#43143](https://github.com/ClickHouse/ClickHouse/pull/43143) ([Azat Khuzhin](https://github.com/azat)).
* Do not include libcxx library for C [#43166](https://github.com/ClickHouse/ClickHouse/pull/43166) ([Azat Khuzhin](https://github.com/azat)).
* Followup fixes for FuseFunctionsPass [#43217](https://github.com/ClickHouse/ClickHouse/pull/43217) ([Vladimir C](https://github.com/vdimir)).
* Fix bug in replication queue which can lead to premature mutation finish [#43231](https://github.com/ClickHouse/ClickHouse/pull/43231) ([alesapin](https://github.com/alesapin)).
* Support `CREATE / ALTER / DROP NAMED COLLECTION` queries under according access types [#43252](https://github.com/ClickHouse/ClickHouse/pull/43252) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix race in `IColumn::dumpStructure` [#43269](https://github.com/ClickHouse/ClickHouse/pull/43269) ([Anton Popov](https://github.com/CurtizJ)).
* Sanitize thirdparty libraries for public flags [#43275](https://github.com/ClickHouse/ClickHouse/pull/43275) ([Azat Khuzhin](https://github.com/azat)).
* stress: increase timeout for server waiting after TERM [#43277](https://github.com/ClickHouse/ClickHouse/pull/43277) ([Azat Khuzhin](https://github.com/azat)).
* Fix cloning of ASTIdentifier [#43282](https://github.com/ClickHouse/ClickHouse/pull/43282) ([Nikolay Degterinsky](https://github.com/evillique)).
* Fix race on write in `ReplicatedMergeTree` [#43289](https://github.com/ClickHouse/ClickHouse/pull/43289) ([Antonio Andelic](https://github.com/antonio2368)).
* Cancel lambda api url [#43295](https://github.com/ClickHouse/ClickHouse/pull/43295) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fixed: Typo [#43312](https://github.com/ClickHouse/ClickHouse/pull/43312) ([Raevsky Rudolf](https://github.com/lanesket)).
* Analyzer small fixes [#43321](https://github.com/ClickHouse/ClickHouse/pull/43321) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix: make test_read_only_table more stable [#43326](https://github.com/ClickHouse/ClickHouse/pull/43326) ([Igor Nikonov](https://github.com/devcrafter)).
* Make insertRangeFrom() more exception safe [#43338](https://github.com/ClickHouse/ClickHouse/pull/43338) ([Azat Khuzhin](https://github.com/azat)).
* Analyzer added indexes support [#43341](https://github.com/ClickHouse/ClickHouse/pull/43341) ([Maksim Kita](https://github.com/kitaisreal)).
* Allow to "drop tables" from s3_plain disk (so as from web disk) [#43343](https://github.com/ClickHouse/ClickHouse/pull/43343) ([Azat Khuzhin](https://github.com/azat)).
* Add --max-consecutive-errors for clickhouse-benchmark [#43344](https://github.com/ClickHouse/ClickHouse/pull/43344) ([Azat Khuzhin](https://github.com/azat)).
* Add [#43072](https://github.com/ClickHouse/ClickHouse/issues/43072) [#43345](https://github.com/ClickHouse/ClickHouse/pull/43345) ([Nikita Taranov](https://github.com/nickitat)).
* Suggest users installation troubleshooting [#43346](https://github.com/ClickHouse/ClickHouse/pull/43346) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Update version_date.tsv and changelogs after v22.11.1.1360-stable [#43349](https://github.com/ClickHouse/ClickHouse/pull/43349) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Provide full stacktrace in case of uncaught exception during server startup [#43364](https://github.com/ClickHouse/ClickHouse/pull/43364) ([Azat Khuzhin](https://github.com/azat)).
* Update SECURITY.md on new stable tags [#43365](https://github.com/ClickHouse/ClickHouse/pull/43365) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Splitting checks in CI more [#43373](https://github.com/ClickHouse/ClickHouse/pull/43373) ([alesapin](https://github.com/alesapin)).
* Update version_date.tsv and changelogs after v22.8.9.24-lts [#43393](https://github.com/ClickHouse/ClickHouse/pull/43393) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Fix mess with signed sizes in SingleValueDataString [#43401](https://github.com/ClickHouse/ClickHouse/pull/43401) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Add a comment [#43403](https://github.com/ClickHouse/ClickHouse/pull/43403) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Avoid race condition for updating system.distribution_queue values [#43406](https://github.com/ClickHouse/ClickHouse/pull/43406) ([Azat Khuzhin](https://github.com/azat)).
* Fix flaky 01926_order_by_desc_limit [#43408](https://github.com/ClickHouse/ClickHouse/pull/43408) ([Azat Khuzhin](https://github.com/azat)).
* Fix possible heap-use-after-free in local if history file cannot be created [#43409](https://github.com/ClickHouse/ClickHouse/pull/43409) ([Azat Khuzhin](https://github.com/azat)).
* Fix flaky test [#43435](https://github.com/ClickHouse/ClickHouse/pull/43435) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix backward compatibility check [#43436](https://github.com/ClickHouse/ClickHouse/pull/43436) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix typo [#43446](https://github.com/ClickHouse/ClickHouse/pull/43446) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove noise from logs about NetLink in Docker [#43447](https://github.com/ClickHouse/ClickHouse/pull/43447) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Modify test slightly [#43448](https://github.com/ClickHouse/ClickHouse/pull/43448) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Set run_passes to 1 by default [#43451](https://github.com/ClickHouse/ClickHouse/pull/43451) ([Dmitry Novik](https://github.com/novikd)).
* Do not reuse jemalloc memory in test_global_overcommit [#43453](https://github.com/ClickHouse/ClickHouse/pull/43453) ([Dmitry Novik](https://github.com/novikd)).
* Fix createTableSharedID again [#43458](https://github.com/ClickHouse/ClickHouse/pull/43458) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Use smaller buffer for small files [#43460](https://github.com/ClickHouse/ClickHouse/pull/43460) ([Alexander Gololobov](https://github.com/davenger)).
* Merging [#42064](https://github.com/ClickHouse/ClickHouse/issues/42064) [#43461](https://github.com/ClickHouse/ClickHouse/pull/43461) ([Anton Popov](https://github.com/CurtizJ)).
* Use all parameters with prefixes from ssm [#43467](https://github.com/ClickHouse/ClickHouse/pull/43467) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Avoid possible DROP hung due to attached web disk [#43489](https://github.com/ClickHouse/ClickHouse/pull/43489) ([Azat Khuzhin](https://github.com/azat)).
* Improve fuzzy search in clickhouse-client/clickhouse-local [#43498](https://github.com/ClickHouse/ClickHouse/pull/43498) ([Azat Khuzhin](https://github.com/azat)).
* check ast limits for create_parser_fuzzer [#43504](https://github.com/ClickHouse/ClickHouse/pull/43504) ([Sema Checherinda](https://github.com/CheSema)).
* Add another test for SingleDataValueString [#43514](https://github.com/ClickHouse/ClickHouse/pull/43514) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Move password reset message from client to server [#43517](https://github.com/ClickHouse/ClickHouse/pull/43517) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Sync everything to persistent storage to avoid writeback affects perf tests [#43530](https://github.com/ClickHouse/ClickHouse/pull/43530) ([Azat Khuzhin](https://github.com/azat)).
* bump lib for diag [#43538](https://github.com/ClickHouse/ClickHouse/pull/43538) ([Dale McDiarmid](https://github.com/gingerwizard)).
* Temporarily disable `test_hive_query` [#43542](https://github.com/ClickHouse/ClickHouse/pull/43542) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Analyzer SumIfToCountIfPass fix [#43543](https://github.com/ClickHouse/ClickHouse/pull/43543) ([Maksim Kita](https://github.com/kitaisreal)).
* Analyzer UniqInjectiveFunctionsEliminationPass [#43547](https://github.com/ClickHouse/ClickHouse/pull/43547) ([Maksim Kita](https://github.com/kitaisreal)).
* Disable broken 00176_bson_parallel_parsing [#43550](https://github.com/ClickHouse/ClickHouse/pull/43550) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Add benchmark for query interpretation with JOINs [#43556](https://github.com/ClickHouse/ClickHouse/pull/43556) ([Raúl Marín](https://github.com/Algunenano)).
* Analyzer table functions untuple fix [#43572](https://github.com/ClickHouse/ClickHouse/pull/43572) ([Maksim Kita](https://github.com/kitaisreal)).
* Prepare CI for universal runners preallocated pool [#43579](https://github.com/ClickHouse/ClickHouse/pull/43579) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Iterate list without index-based access [#43584](https://github.com/ClickHouse/ClickHouse/pull/43584) ([Alexander Gololobov](https://github.com/davenger)).
* Remove code that I do not understand [#43593](https://github.com/ClickHouse/ClickHouse/pull/43593) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Add table_uuid to system.parts (resubmit) [#43595](https://github.com/ClickHouse/ClickHouse/pull/43595) ([Azat Khuzhin](https://github.com/azat)).
* Move perf tests for Aarch64 from PRs to master [#43623](https://github.com/ClickHouse/ClickHouse/pull/43623) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix flaky 01175_distributed_ddl_output_mode_long [#43626](https://github.com/ClickHouse/ClickHouse/pull/43626) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Speedup backup config loading [#43627](https://github.com/ClickHouse/ClickHouse/pull/43627) ([Alexander Gololobov](https://github.com/davenger)).
* Fix [#43478](https://github.com/ClickHouse/ClickHouse/issues/43478) [#43636](https://github.com/ClickHouse/ClickHouse/pull/43636) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Do not checkout submodules recursively [#43637](https://github.com/ClickHouse/ClickHouse/pull/43637) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Optimize binary-builder size [#43654](https://github.com/ClickHouse/ClickHouse/pull/43654) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix flaky `KeeperMap` integration tests [#43658](https://github.com/ClickHouse/ClickHouse/pull/43658) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix data race in `Keeper` snapshot [#43663](https://github.com/ClickHouse/ClickHouse/pull/43663) ([Antonio Andelic](https://github.com/antonio2368)).
* Use docker images cache from merged PRs in master and release branches [#43664](https://github.com/ClickHouse/ClickHouse/pull/43664) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Update AsynchronousReadIndirectBufferFromRemoteFS.cpp [#43667](https://github.com/ClickHouse/ClickHouse/pull/43667) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix pagination issue in GITHUB_JOB_ID() [#43681](https://github.com/ClickHouse/ClickHouse/pull/43681) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Try fix flaky test 00176_bson_parallel_parsing [#43696](https://github.com/ClickHouse/ClickHouse/pull/43696) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix log messages in clickhouse-copier [#43707](https://github.com/ClickHouse/ClickHouse/pull/43707) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* try to remove clickhouse if already exists [#43728](https://github.com/ClickHouse/ClickHouse/pull/43728) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Fix 43622 [#43731](https://github.com/ClickHouse/ClickHouse/pull/43731) ([Amos Bird](https://github.com/amosbird)).
* Fix example of colored prompt in client [#43738](https://github.com/ClickHouse/ClickHouse/pull/43738) ([Azat Khuzhin](https://github.com/azat)).
* Minor fixes in annoy index documentation [#43743](https://github.com/ClickHouse/ClickHouse/pull/43743) ([Robert Schulze](https://github.com/rschu1ze)).
* Terminate lost runners [#43756](https://github.com/ClickHouse/ClickHouse/pull/43756) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Update README.md [#43759](https://github.com/ClickHouse/ClickHouse/pull/43759) ([Tyler Hannan](https://github.com/tylerhannan)).
* Fix included_elements calculation in AggregateFunctionNullVariadic [#43763](https://github.com/ClickHouse/ClickHouse/pull/43763) ([Dmitry Novik](https://github.com/novikd)).
* Migrate runner_token_rotation_lambda to zip-package deployment [#43766](https://github.com/ClickHouse/ClickHouse/pull/43766) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Analyzer compound expression crash fix [#43768](https://github.com/ClickHouse/ClickHouse/pull/43768) ([Maksim Kita](https://github.com/kitaisreal)).
* Migrate termination lambda to zip-package [#43769](https://github.com/ClickHouse/ClickHouse/pull/43769) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix flaky `test_store_cleanup` [#43770](https://github.com/ClickHouse/ClickHouse/pull/43770) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Attempt to fix StyleCheck condition [#43773](https://github.com/ClickHouse/ClickHouse/pull/43773) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Rerun PullRequestCI on changed description body [#43777](https://github.com/ClickHouse/ClickHouse/pull/43777) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Yet another fix for AggregateFunctionMinMaxAny [#43778](https://github.com/ClickHouse/ClickHouse/pull/43778) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Add google benchmark to contrib [#43779](https://github.com/ClickHouse/ClickHouse/pull/43779) ([Nikita Taranov](https://github.com/nickitat)).
* Fix EN doc as in [#43765](https://github.com/ClickHouse/ClickHouse/issues/43765) [#43780](https://github.com/ClickHouse/ClickHouse/pull/43780) ([Alexander Gololobov](https://github.com/davenger)).
* Detach threads from thread group [#43781](https://github.com/ClickHouse/ClickHouse/pull/43781) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Try making `test_keeper_zookeeper_converter` less flaky [#43789](https://github.com/ClickHouse/ClickHouse/pull/43789) ([Antonio Andelic](https://github.com/antonio2368)).
* Polish UDF substitution visitor [#43790](https://github.com/ClickHouse/ClickHouse/pull/43790) ([Antonio Andelic](https://github.com/antonio2368)).
* Analyzer ConstantNode refactoring [#43793](https://github.com/ClickHouse/ClickHouse/pull/43793) ([Maksim Kita](https://github.com/kitaisreal)).
* Update Poco [#43802](https://github.com/ClickHouse/ClickHouse/pull/43802) ([Alexander Gololobov](https://github.com/davenger)).
* Add another BC check suppression [#43810](https://github.com/ClickHouse/ClickHouse/pull/43810) ([Alexander Tokmakov](https://github.com/tavplubix)).
* tests: fix 01676_long_clickhouse_client_autocomplete flakiness [#43819](https://github.com/ClickHouse/ClickHouse/pull/43819) ([Azat Khuzhin](https://github.com/azat)).
* Use disk operation to serialize and deserialize meta files of StorageFilelog [#43826](https://github.com/ClickHouse/ClickHouse/pull/43826) ([flynn](https://github.com/ucasfl)).
* Add constexpr [#43827](https://github.com/ClickHouse/ClickHouse/pull/43827) ([zhanglistar](https://github.com/zhanglistar)).
* Do not postpone removal of in-memory tables [#43833](https://github.com/ClickHouse/ClickHouse/pull/43833) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Increase some logging level for keeper client. [#43835](https://github.com/ClickHouse/ClickHouse/pull/43835) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* FuseFunctionsPass small fix [#43837](https://github.com/ClickHouse/ClickHouse/pull/43837) ([Maksim Kita](https://github.com/kitaisreal)).
* Followup fixes for XML helpers [#43845](https://github.com/ClickHouse/ClickHouse/pull/43845) ([Alexander Gololobov](https://github.com/davenger)).
* Hold ProcessListEntry a bit longer in case of exception from Interpreter [#43847](https://github.com/ClickHouse/ClickHouse/pull/43847) ([Alexander Tokmakov](https://github.com/tavplubix)).
* A little improve performance of PODArray [#43860](https://github.com/ClickHouse/ClickHouse/pull/43860) ([zhanglistar](https://github.com/zhanglistar)).
* Change email for robot-clickhouse to immutable one [#43861](https://github.com/ClickHouse/ClickHouse/pull/43861) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Rerun DocsCheck on edited PR description [#43862](https://github.com/ClickHouse/ClickHouse/pull/43862) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Temporarily disable misc-* slow clang-tidy checks [#43863](https://github.com/ClickHouse/ClickHouse/pull/43863) ([Robert Schulze](https://github.com/rschu1ze)).
* do not leave tmp part on disk, do not go to the keeper for remove it [#43866](https://github.com/ClickHouse/ClickHouse/pull/43866) ([Sema Checherinda](https://github.com/CheSema)).
* do not read part status just for logging [#43868](https://github.com/ClickHouse/ClickHouse/pull/43868) ([Sema Checherinda](https://github.com/CheSema)).
* Analyzer Context refactoring [#43884](https://github.com/ClickHouse/ClickHouse/pull/43884) ([Maksim Kita](https://github.com/kitaisreal)).
* Analyzer CTE resolution fix [#43893](https://github.com/ClickHouse/ClickHouse/pull/43893) ([Maksim Kita](https://github.com/kitaisreal)).
* Improve release script [#43894](https://github.com/ClickHouse/ClickHouse/pull/43894) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Use only PRs to our repository in pr_info on push [#43895](https://github.com/ClickHouse/ClickHouse/pull/43895) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Join engine works with analyzer [#43897](https://github.com/ClickHouse/ClickHouse/pull/43897) ([Vladimir C](https://github.com/vdimir)).
* Fix reports [#43904](https://github.com/ClickHouse/ClickHouse/pull/43904) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix vim settings (and make it compatible with neovim) [#43909](https://github.com/ClickHouse/ClickHouse/pull/43909) ([Azat Khuzhin](https://github.com/azat)).
* Fix clang tidy errors introduced in [#43834](https://github.com/ClickHouse/ClickHouse/issues/43834) [#43911](https://github.com/ClickHouse/ClickHouse/pull/43911) ([Nikita Taranov](https://github.com/nickitat)).
* Fix BACKUP TO S3 for Google Cloud Storage [#43940](https://github.com/ClickHouse/ClickHouse/pull/43940) ([Azat Khuzhin](https://github.com/azat)).
* Fix tags workflow [#43942](https://github.com/ClickHouse/ClickHouse/pull/43942) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Generate missed changelogs for latest releases [#43944](https://github.com/ClickHouse/ClickHouse/pull/43944) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix typo in tests/ci/bugfix_validate_check.py [#43973](https://github.com/ClickHouse/ClickHouse/pull/43973) ([Vladimir C](https://github.com/vdimir)).
* Remove test logging of signal "EINTR" [#44001](https://github.com/ClickHouse/ClickHouse/pull/44001) ([Kruglov Pavel](https://github.com/Avogar)).
* Some cleanup of isDeterministic(InScopeOfQuery)() [#44011](https://github.com/ClickHouse/ClickHouse/pull/44011) ([Robert Schulze](https://github.com/rschu1ze)).
* Try to keep runners alive for longer [#44015](https://github.com/ClickHouse/ClickHouse/pull/44015) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix relaxed "too many parts" threshold [#44021](https://github.com/ClickHouse/ClickHouse/pull/44021) ([Sergei Trifonov](https://github.com/serxa)).
* Correct CompressionCodecGorilla exception message [#44023](https://github.com/ClickHouse/ClickHouse/pull/44023) ([Duc Canh Le](https://github.com/canhld94)).
* Fix exception message [#44034](https://github.com/ClickHouse/ClickHouse/pull/44034) ([Nikolay Degterinsky](https://github.com/evillique)).
* Update version_date.tsv and changelogs after v22.8.11.15-lts [#44035](https://github.com/ClickHouse/ClickHouse/pull/44035) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* do not hardlink serialization.json in new part [#44036](https://github.com/ClickHouse/ClickHouse/pull/44036) ([Sema Checherinda](https://github.com/CheSema)).
* Fix tracing of profile events [#44045](https://github.com/ClickHouse/ClickHouse/pull/44045) ([Anton Popov](https://github.com/CurtizJ)).
* Slightly better clickhouse disks and remove DiskMemory [#44050](https://github.com/ClickHouse/ClickHouse/pull/44050) ([alesapin](https://github.com/alesapin)).
* Assign release PRs [#44055](https://github.com/ClickHouse/ClickHouse/pull/44055) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Merging [#36877](https://github.com/ClickHouse/ClickHouse/issues/36877) [#44059](https://github.com/ClickHouse/ClickHouse/pull/44059) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* add changelogs [#44061](https://github.com/ClickHouse/ClickHouse/pull/44061) ([Dan Roscigno](https://github.com/DanRoscigno)).
* Fix the CACHE_PATH creation for default value [#44079](https://github.com/ClickHouse/ClickHouse/pull/44079) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix aspell [#44090](https://github.com/ClickHouse/ClickHouse/pull/44090) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix another bug in AggregateFunctionMinMaxAny [#44091](https://github.com/ClickHouse/ClickHouse/pull/44091) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Analyzer aggregate function lambda crash fix [#44098](https://github.com/ClickHouse/ClickHouse/pull/44098) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix -Wshorten-64-to-32 on FreeBSD and enable -Werror [#44121](https://github.com/ClickHouse/ClickHouse/pull/44121) ([Azat Khuzhin](https://github.com/azat)).
* Fix flaky test `02497_trace_events_stress_long` [#44124](https://github.com/ClickHouse/ClickHouse/pull/44124) ([Anton Popov](https://github.com/CurtizJ)).
* Minor file renaming [#44125](https://github.com/ClickHouse/ClickHouse/pull/44125) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix typo [#44127](https://github.com/ClickHouse/ClickHouse/pull/44127) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Better descriptions of signals [#44129](https://github.com/ClickHouse/ClickHouse/pull/44129) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* make calls to be sure that parts are deleted [#44156](https://github.com/ClickHouse/ClickHouse/pull/44156) ([Sema Checherinda](https://github.com/CheSema)).
* Ignore "session expired" errors after BC check [#44157](https://github.com/ClickHouse/ClickHouse/pull/44157) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix incorrect assertion [#44160](https://github.com/ClickHouse/ClickHouse/pull/44160) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Close GRPC channels in tests [#44184](https://github.com/ClickHouse/ClickHouse/pull/44184) ([Antonio Andelic](https://github.com/antonio2368)).
* Remove misleading message from logs [#44190](https://github.com/ClickHouse/ClickHouse/pull/44190) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Minor clang-tidy fixes in fromUnixTimestamp64() [#44194](https://github.com/ClickHouse/ClickHouse/pull/44194) ([Igor Nikonov](https://github.com/devcrafter)).
* Hotfix for "check_status.tsv doesn't exists" in stress tests [#44197](https://github.com/ClickHouse/ClickHouse/pull/44197) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix documentation after [#42438](https://github.com/ClickHouse/ClickHouse/issues/42438) [#44200](https://github.com/ClickHouse/ClickHouse/pull/44200) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix an assertion in transactions [#44202](https://github.com/ClickHouse/ClickHouse/pull/44202) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Add log message [#44237](https://github.com/ClickHouse/ClickHouse/pull/44237) ([Alexander Tokmakov](https://github.com/tavplubix)).

View File

@ -0,0 +1,37 @@
---
sidebar_position: 1
sidebar_label: 2022
---
# 2022 Changelog
### ClickHouse release v22.9.7.34-stable (613fe09ca2e) FIXME as compared to v22.9.6.20-stable (ef6343f9579)
#### Bug Fix
* Backported in [#43099](https://github.com/ClickHouse/ClickHouse/issues/43099): Updated normaliser to clone the alias ast. resolves [#42452](https://github.com/ClickHouse/ClickHouse/issues/42452) Implementation: * Updated QueryNormalizer to clone alias ast, when its replaced. Previously just assigning the same leads to exception in LogicalExpressinsOptimizer as it would be the same parent being inserted again. * This bug is not seen with new analyser (allow_experimental_analyzer), so no changes for it. I added a test for the same. [#42827](https://github.com/ClickHouse/ClickHouse/pull/42827) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
#### Build/Testing/Packaging Improvement
* Backported in [#44111](https://github.com/ClickHouse/ClickHouse/issues/44111): Bring sha512 sums back to the building step. [#44017](https://github.com/ClickHouse/ClickHouse/pull/44017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Backported in [#43612](https://github.com/ClickHouse/ClickHouse/issues/43612): Fix bad inefficiency of `remote_filesystem_read_method=read` with filesystem cache. Closes [#42125](https://github.com/ClickHouse/ClickHouse/issues/42125). [#42129](https://github.com/ClickHouse/ClickHouse/pull/42129) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#43526](https://github.com/ClickHouse/ClickHouse/issues/43526): Fix incorrect UserTimeMicroseconds/SystemTimeMicroseconds accounting. [#42791](https://github.com/ClickHouse/ClickHouse/pull/42791) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#43518](https://github.com/ClickHouse/ClickHouse/issues/43518): Fix rare possible hung on query cancellation. [#42874](https://github.com/ClickHouse/ClickHouse/pull/42874) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#43752](https://github.com/ClickHouse/ClickHouse/issues/43752): An issue with the following exception has been reported while trying to read a Parquet file from S3 into ClickHouse:. [#43297](https://github.com/ClickHouse/ClickHouse/pull/43297) ([Arthur Passos](https://github.com/arthurpassos)).
* Backported in [#43618](https://github.com/ClickHouse/ClickHouse/issues/43618): Fix sumMap() for Nullable(Decimal()). [#43414](https://github.com/ClickHouse/ClickHouse/pull/43414) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#43887](https://github.com/ClickHouse/ClickHouse/issues/43887): Fixed `ALTER ... RESET SETTING` with `ON CLUSTER`. It could be applied to one replica only. Fixes [#43843](https://github.com/ClickHouse/ClickHouse/issues/43843). [#43848](https://github.com/ClickHouse/ClickHouse/pull/43848) ([Elena Torró](https://github.com/elenatorro)).
* Backported in [#44145](https://github.com/ClickHouse/ClickHouse/issues/44145): Fix undefined behavior in the `quantiles` function, which might lead to uninitialized memory. Found by fuzzer. This closes [#44066](https://github.com/ClickHouse/ClickHouse/issues/44066). [#44067](https://github.com/ClickHouse/ClickHouse/pull/44067) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### NO CL ENTRY
* NO CL ENTRY: 'Fix multipart upload for large S3 object, backport to 22.9'. [#44219](https://github.com/ClickHouse/ClickHouse/pull/44219) ([ianton-ru](https://github.com/ianton-ru)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Yet another fix for AggregateFunctionMinMaxAny [#43778](https://github.com/ClickHouse/ClickHouse/pull/43778) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Use only PRs to our repository in pr_info on push [#43895](https://github.com/ClickHouse/ClickHouse/pull/43895) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix tags workflow [#43942](https://github.com/ClickHouse/ClickHouse/pull/43942) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Assign release PRs [#44055](https://github.com/ClickHouse/ClickHouse/pull/44055) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix another bug in AggregateFunctionMinMaxAny [#44091](https://github.com/ClickHouse/ClickHouse/pull/44091) ([Alexander Tokmakov](https://github.com/tavplubix)).

View File

@ -1,15 +1,15 @@
---
slug: /en/development/build-cross-osx
sidebar_position: 66
title: How to Build ClickHouse on Linux for Mac OS X
sidebar_label: Build on Linux for Mac OS X
title: How to Build ClickHouse on Linux for macOS
sidebar_label: Build on Linux for macOS
---
This is for the case when you have a Linux machine and want to use it to build `clickhouse` binary that will run on OS X.
This is intended for continuous integration checks that run on Linux servers. If you want to build ClickHouse directly on Mac OS X, then proceed with [another instruction](../development/build-osx.md).
This is intended for continuous integration checks that run on Linux servers. If you want to build ClickHouse directly on macOS, then proceed with [another instruction](../development/build-osx.md).
The cross-build for Mac OS X is based on the [Build instructions](../development/build.md), follow them first.
The cross-build for macOS is based on the [Build instructions](../development/build.md), follow them first.
## Install Clang-14

View File

@ -1,9 +1,9 @@
---
slug: /en/development/build-osx
sidebar_position: 65
sidebar_label: Build on Mac OS X
title: How to Build ClickHouse on Mac OS X
description: How to build ClickHouse on Mac OS X
sidebar_label: Build on macOS
title: How to Build ClickHouse on macOS
description: How to build ClickHouse on macOS
---
:::info You don't have to build ClickHouse yourself!

View File

@ -33,6 +33,13 @@ On Ubuntu/Debian you can use the automatic installation script (check [official
sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
```
Note: in case of troubles, you can also use this:
```bash
sudo apt-get install software-properties-common
sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
```
For other Linux distribution - check the availability of the [prebuild packages](https://releases.llvm.org/download.html) or build clang [from sources](https://clang.llvm.org/get_started.html).
#### Use the latest clang for Builds

View File

@ -7,7 +7,7 @@ description: Prerequisites and an overview of how to build ClickHouse
# Getting Started Guide for Building ClickHouse
The building of ClickHouse is supported on Linux, FreeBSD and Mac OS X.
The building of ClickHouse is supported on Linux, FreeBSD and macOS.
If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word “terminal” in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T.
@ -194,7 +194,7 @@ In this case, ClickHouse will use config files located in the current directory.
To connect to ClickHouse with clickhouse-client in another terminal navigate to `ClickHouse/build/programs/` and run `./clickhouse client`.
If you get `Connection refused` message on Mac OS X or FreeBSD, try specifying host address 127.0.0.1:
If you get `Connection refused` message on macOS or FreeBSD, try specifying host address 127.0.0.1:
clickhouse client --host 127.0.0.1
@ -213,7 +213,7 @@ You can also run your custom-built ClickHouse binary with the config file from t
## IDE (Integrated Development Environment) {#ide-integrated-development-environment}
If you do not know which IDE to use, we recommend that you use CLion. CLion is commercial software, but it offers 30 days free trial period. It is also free of charge for students. CLion can be used both on Linux and on Mac OS X.
If you do not know which IDE to use, we recommend that you use CLion. CLion is commercial software, but it offers 30 days free trial period. It is also free of charge for students. CLion can be used both on Linux and on macOS.
KDevelop and QTCreator are other great alternatives of an IDE for developing ClickHouse. KDevelop comes in as a very handy IDE although unstable. If KDevelop crashes after a while upon opening project, you should click “Stop All” button as soon as it has opened the list of projects files. After doing so KDevelop should be fine to work with.

View File

@ -139,7 +139,7 @@ If the system clickhouse-server is already running and you do not want to stop i
Build tests allow to check that build is not broken on various alternative configurations and on some foreign systems. These tests are automated as well.
Examples:
- cross-compile for Darwin x86_64 (Mac OS X)
- cross-compile for Darwin x86_64 (macOS)
- cross-compile for FreeBSD x86_64
- cross-compile for Linux AArch64
- build on Ubuntu with libraries from system packages (discouraged)

View File

@ -34,7 +34,7 @@ For a description of request parameters, see [request description](../../../sql-
`columns` - a tuple with the names of columns where values will be summarized. Optional parameter.
The columns must be of a numeric type and must not be in the primary key.
If `columns` not specified, ClickHouse summarizes the values in all columns with a numeric data type that are not in the primary key.
If `columns` is not specified, ClickHouse summarizes the values in all columns with a numeric data type that are not in the primary key.
### Query clauses

View File

@ -8,6 +8,10 @@ sidebar_label: Buffer
Buffers the data to write in RAM, periodically flushing it to another table. During the read operation, data is read from the buffer and the other table simultaneously.
:::note
A recommended alternative to the Buffer Table Engine is enabling [asynchronous inserts](/docs/en/guides/best-practices/asyncinserts.md).
:::
``` sql
Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes)
```
@ -24,7 +28,7 @@ Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_
#### num_layers
`num_layers` Parallelism layer. Physically, the table will be represented as `num_layers` of independent buffers. Recommended value: 16.
`num_layers` Parallelism layer. Physically, the table will be represented as `num_layers` of independent buffers.
#### min_time, max_time, min_rows, max_rows, min_bytes, and max_bytes
@ -34,11 +38,11 @@ Conditions for flushing data from the buffer.
#### flush_time, flush_rows, and flush_bytes
Conditions for flushing data from the buffer, that will happen only in background (omitted or zero means no `flush*` parameters).
Conditions for flushing data from the buffer in the background (omitted or zero means no `flush*` parameters).
Data is flushed from the buffer and written to the destination table if all the `min*` conditions or at least one `max*` condition are met.
Also, if at least one `flush*` condition are met flush initiated in background, this is different from `max*`, since `flush*` allows you to configure background flushes separately to avoid adding latency for `INSERT` (into `Buffer`) queries.
Also, if at least one `flush*` condition is met, a flush is initiated in the background. This differs from `max*` since `flush*` allows you to configure background flushes separately to avoid adding latency for `INSERT` queries into Buffer tables.
#### min_time, max_time, and flush_time
@ -52,48 +56,54 @@ Condition for the number of rows in the buffer.
Condition for the number of bytes in the buffer.
During the write operation, data is inserted to a `num_layers` number of random buffers. Or, if the data part to insert is large enough (greater than `max_rows` or `max_bytes`), it is written directly to the destination table, omitting the buffer.
During the write operation, data is inserted into one or more random buffers (configured with `num_layers`). Or, if the data part to insert is large enough (greater than `max_rows` or `max_bytes`), it is written directly to the destination table, omitting the buffer.
The conditions for flushing the data are calculated separately for each of the `num_layers` buffers. For example, if `num_layers = 16` and `max_bytes = 100000000`, the maximum RAM consumption is 1.6 GB.
Example:
``` sql
CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 16, 10, 100, 10000, 1000000, 10000000, 100000000)
CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 1, 10, 100, 10000, 1000000, 10000000, 100000000)
```
Creating a `merge.hits_buffer` table with the same structure as `merge.hits` and using the Buffer engine. When writing to this table, data is buffered in RAM and later written to the merge.hits table. 16 buffers are created. The data in each of them is flushed if either 100 seconds have passed, or one million rows have been written, or 100 MB of data have been written; or if simultaneously 10 seconds have passed and 10,000 rows and 10 MB of data have been written. For example, if just one row has been written, after 100 seconds it will be flushed, no matter what. But if many rows have been written, the data will be flushed sooner.
Creating a `merge.hits_buffer` table with the same structure as `merge.hits` and using the Buffer engine. When writing to this table, data is buffered in RAM and later written to the merge.hits table. A single buffer is created and the data is flushed if either:
- 100 seconds have passed since the last flush (`max_time`) or
- 1 million rows have been written (`max_rows`) or
- 100 MB of data have been written (`max_bytes`) or
- 10 seconds have passed (`min_time`) and 10,000 rows (`min_rows`) and 10 MB (`min_bytes`) of data have been written
When the server is stopped, with `DROP TABLE` or `DETACH TABLE`, buffer data is also flushed to the destination table.
For example, if just one row has been written, after 100 seconds, it will be flushed, no matter what. But if many rows have been written, the data will be flushed sooner.
When the server is stopped, with `DROP TABLE` or `DETACH TABLE`, buffered data is also flushed to the destination table.
You can set empty strings in single quotation marks for the database and table name. This indicates the absence of a destination table. In this case, when the data flush conditions are reached, the buffer is simply cleared. This may be useful for keeping a window of data in memory.
When reading from a Buffer table, data is processed both from the buffer and from the destination table (if there is one).
Note that the Buffer tables does not support an index. In other words, data in the buffer is fully scanned, which might be slow for large buffers. (For data in a subordinate table, the index that it supports will be used.)
Note that the Buffer table does not support an index. In other words, data in the buffer is fully scanned, which might be slow for large buffers. (For data in a subordinate table, the index that it supports will be used.)
If the set of columns in the Buffer table does not match the set of columns in a subordinate table, a subset of columns that exist in both tables is inserted.
If the types do not match for one of the columns in the Buffer table and a subordinate table, an error message is entered in the server log, and the buffer is cleared.
The same thing happens if the subordinate table does not exist when the buffer is flushed.
The same happens if the subordinate table does not exist when the buffer is flushed.
:::warning
Running ALTER on the Buffer table in releases made before 26 Oct 2021 will cause a `Block structure mismatch` error (see [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117) and [#30565](https://github.com/ClickHouse/ClickHouse/pull/30565)), so deleting the Buffer table and then recreating is the only option. It is advisable to check that this error is fixed in your release before trying to run ALTER on the Buffer table.
Running ALTER on the Buffer table in releases made before 26 Oct 2021 will cause a `Block structure mismatch` error (see [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117) and [#30565](https://github.com/ClickHouse/ClickHouse/pull/30565)), so deleting the Buffer table and then recreating is the only option. Check that this error is fixed in your release before trying to run ALTER on the Buffer table.
:::
If the server is restarted abnormally, the data in the buffer is lost.
`FINAL` and `SAMPLE` do not work correctly for Buffer tables. These conditions are passed to the destination table, but are not used for processing data in the buffer. If these features are required we recommend only using the Buffer table for writing, while reading from the destination table.
`FINAL` and `SAMPLE` do not work correctly for Buffer tables. These conditions are passed to the destination table but are not used for processing data in the buffer. If these features are required, we recommend only using the Buffer table for writing while reading from the destination table.
When adding data to a Buffer, one of the buffers is locked. This causes delays if a read operation is simultaneously being performed from the table.
When adding data to a Buffer table, one of the buffers is locked. This causes delays if a read operation is simultaneously being performed from the table.
Data that is inserted to a Buffer table may end up in the subordinate table in a different order and in different blocks. Because of this, a Buffer table is difficult to use for writing to a CollapsingMergeTree correctly. To avoid problems, you can set `num_layers` to 1.
Data that is inserted into a Buffer table may end up in the subordinate table in a different order and in different blocks. Because of this, a Buffer table is difficult to use for writing to a CollapsingMergeTree correctly. To avoid problems, you can set `num_layers` to 1.
If the destination table is replicated, some expected characteristics of replicated tables are lost when writing to a Buffer table. The random changes to the order of rows and sizes of data parts cause data deduplication to quit working, which means it is not possible to have a reliable exactly once write to replicated tables.
Due to these disadvantages, we can only recommend using a Buffer table in rare cases.
A Buffer table is used when too many INSERTs are received from a large number of servers over a unit of time and data cant be buffered before insertion, which means the INSERTs cant run fast enough.
A Buffer table is used when too many INSERTs are received from a large number of servers over a unit of time, and data cant be buffered before insertion, which means the INSERTs cant run fast enough.
Note that it does not make sense to insert data one row at a time, even for Buffer tables. This will only produce a speed of a few thousand rows per second, while inserting larger blocks of data can produce over a million rows per second (see the section “Performance”).
Note that it does not make sense to insert data one row at a time, even for Buffer tables. This will only produce a speed of a few thousand rows per second while inserting larger blocks of data can produce over a million rows per second.
[Original article](https://clickhouse.com/docs/en/engines/table-engines/special/buffer/) <!--hide-->

View File

@ -6,10 +6,11 @@ slug: /en/install
# Installing ClickHouse
You have two options for getting up and running with ClickHouse:
You have three options for getting up and running with ClickHouse:
- **[ClickHouse Cloud](https://clickhouse.com/cloud/):** the official ClickHouse as a service, - built by, maintained, and supported by the creators of ClickHouse
- **[Self-managed ClickHouse](https://github.com/ClickHouse/ClickHouse):** ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture
- **[ClickHouse Cloud](https://clickhouse.com/cloud/):** The official ClickHouse as a service, - built by, maintained and supported by the creators of ClickHouse
- **[Self-managed ClickHouse](#self-managed-install):** ClickHouse can run on any Linux, FreeBSD, or macOS with x86-64, ARM, or PowerPC64LE CPU architecture
- **[Docker Image](https://hub.docker.com/r/clickhouse/clickhouse-server/):** Read the guide with the official image in Docker Hub
## ClickHouse Cloud
@ -22,73 +23,49 @@ The quickest and easiest way to get up and running with ClickHouse is to create
Once your Cloud service is provisioned, you will be able to [connect to it](/docs/en/integrations/connect-a-client.md) and start [inserting data](/docs/en/integrations/data-ingestion.md).
:::note
The [Quick Start](/docs/en/quick-start.mdx) walks through the steps to get a ClickHouse Cloud service up and running, connecting to it, and inserting data.
:::
## Self-Managed Requirements
### CPU Architecture
ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture.
Official pre-built binaries are typically compiled for x86_64 and leverage SSE 4.2 instruction set, so unless otherwise stated usage of CPU that supports it becomes an additional system requirement. Heres the command to check if current CPU has support for SSE 4.2:
``` bash
$ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not supported"
```
To run ClickHouse on processors that do not support SSE 4.2 or have AArch64 or PowerPC64LE architecture, you should [build ClickHouse from sources](#from-sources) with proper configuration adjustments.
ClickHouse implements parallel data processing and uses all the hardware resources available. When choosing a processor, take into account that ClickHouse works more efficiently at configurations with a large number of cores but a lower clock rate than at configurations with fewer cores and a higher clock rate. For example, 16 cores with 2600 MHz is preferable to 8 cores with 3600 MHz.
It is recommended to use **Turbo Boost** and **hyper-threading** technologies. It significantly improves performance with a typical workload.
### RAM {#ram}
We recommend using a minimum of 4GB of RAM to perform non-trivial queries. The ClickHouse server can run with a much smaller amount of RAM, but it requires memory for processing queries.
The required volume of RAM depends on:
- The complexity of queries.
- The amount of data that is processed in queries.
To calculate the required volume of RAM, you should estimate the size of temporary data for [GROUP BY](/docs/en/sql-reference/statements/select/group-by.md#select-group-by-clause), [DISTINCT](/docs/en/sql-reference/statements/select/distinct.md#select-distinct), [JOIN](/docs/en/sql-reference/statements/select/join.md#select-join) and other operations you use.
ClickHouse can use external memory for temporary data. See [GROUP BY in External Memory](/docs/en/sql-reference/statements/select/group-by.md#select-group-by-in-external-memory) for details.
### Swap File {#swap-file}
Disable the swap file for production environments.
### Storage Subsystem {#storage-subsystem}
You need to have 2GB of free disk space to install ClickHouse.
The volume of storage required for your data should be calculated separately. Assessment should include:
- Estimation of the data volume.
You can take a sample of the data and get the average size of a row from it. Then multiply the value by the number of rows you plan to store.
- The data compression coefficient.
To estimate the data compression coefficient, load a sample of your data into ClickHouse, and compare the actual size of the data with the size of the table stored. For example, clickstream data is usually compressed by 6-10 times.
To calculate the final volume of data to be stored, apply the compression coefficient to the estimated data volume. If you plan to store data in several replicas, then multiply the estimated volume by the number of replicas.
### Network {#network}
If possible, use networks of 10G or higher class.
The network bandwidth is critical for processing distributed queries with a large amount of intermediate data. Besides, network speed affects replication processes.
### Software {#software}
ClickHouse is developed primarily for the Linux family of operating systems. The recommended Linux distribution is Ubuntu. The `tzdata` package should be installed in the system.
## Self-Managed Install
1. The simplest way to download ClickHouse locally is to run the following command. If your operating system is supported, an appropriate ClickHouse binary will be downloaded and made runnable:
```bash
curl https://clickhouse.com/ | sh
```
1. Run the `install` command, which defines a collection of useful symlinks along with the files and folders used by ClickHouse - all of which you can see in the output of the install script:
```bash
sudo ./clickhouse install
```
1. At the end of the install script, you are prompted for a password for the `default` user. Feel free to enter a password, or you can optionally leave it blank:
```response
Creating log directory /var/log/clickhouse-server.
Creating data directory /var/lib/clickhouse.
Creating pid directory /var/run/clickhouse-server.
chown -R clickhouse:clickhouse '/var/log/clickhouse-server'
chown -R clickhouse:clickhouse '/var/run/clickhouse-server'
chown clickhouse:clickhouse '/var/lib/clickhouse'
Enter password for default user:
```
You should see the following output:
```response
ClickHouse has been successfully installed.
Start clickhouse-server with:
sudo clickhouse start
Start clickhouse-client with:
clickhouse-client
```
1. Run the following command to start the ClickHouse server:
```bash
sudo clickhouse start
```
:::tip
The [Quick Start](/docs/en/quick-start.mdx/#step-1-get-clickhouse) walks through the steps to download and run ClickHouse, connect to it, and insert data.
:::
## Available Installation Options {#available-installation-options}
### From DEB Packages {#install-from-deb-packages}
@ -278,50 +255,16 @@ For production environments, its recommended to use the latest `stable`-versi
To run ClickHouse inside Docker follow the guide on [Docker Hub](https://hub.docker.com/r/clickhouse/clickhouse-server/). Those images use official `deb` packages inside.
### Single Binary {#from-single-binary}
You can install ClickHouse on Linux using a single portable binary from the latest commit of the `master` branch: [https://builds.clickhouse.com/master/amd64/clickhouse].
``` bash
curl -O 'https://builds.clickhouse.com/master/amd64/clickhouse' && chmod a+x clickhouse
sudo ./clickhouse install
```
### From Precompiled Binaries for Non-Standard Environments {#from-binaries-non-linux}
For non-Linux operating systems and for AArch64 CPU architecture, ClickHouse builds are provided as a cross-compiled binary from the latest commit of the `master` branch (with a few hours delay).
- [MacOS x86_64](https://builds.clickhouse.com/master/macos/clickhouse)
```bash
curl -O 'https://builds.clickhouse.com/master/macos/clickhouse' && chmod a+x ./clickhouse
```
- [MacOS Aarch64 (Apple Silicon)](https://builds.clickhouse.com/master/macos-aarch64/clickhouse)
```bash
curl -O 'https://builds.clickhouse.com/master/macos-aarch64/clickhouse' && chmod a+x ./clickhouse
```
- [FreeBSD x86_64](https://builds.clickhouse.com/master/freebsd/clickhouse)
```bash
curl -O 'https://builds.clickhouse.com/master/freebsd/clickhouse' && chmod a+x ./clickhouse
```
- [Linux AArch64](https://builds.clickhouse.com/master/aarch64/clickhouse)
```bash
curl -O 'https://builds.clickhouse.com/master/aarch64/clickhouse' && chmod a+x ./clickhouse
```
Run `sudo ./clickhouse install` to install ClickHouse system-wide (also with needed configuration files, configuring users etc.). Then run `sudo clickhouse start` commands to start the clickhouse-server and `clickhouse-client` to connect to it.
Use the `clickhouse client` to connect to the server, or `clickhouse local` to process local data.
### From Sources {#from-sources}
To manually compile ClickHouse, follow the instructions for [Linux](/docs/en/development/build.md) or [Mac OS X](/docs/en/development/build-osx.md).
To manually compile ClickHouse, follow the instructions for [Linux](/docs/en/development/build.md) or [macOS](/docs/en/development/build-osx.md).
You can compile packages and install them or use programs without installing packages. Also by building manually you can disable SSE 4.2 requirement or build for AArch64 CPUs.
You can compile packages and install them or use programs without installing packages.
Client: programs/clickhouse-client
Server: programs/clickhouse-server
Client: <build_directory>/programs/clickhouse-client
Server: <build_directory>/programs/clickhouse-server
Youll need to create a data and metadata folders and `chown` them for the desired user. Their paths can be changed in server config (src/programs/server/config.xml), by default they are:
Youll need to create data and metadata folders manually and `chown` them for the desired user. Their paths can be changed in server config (src/programs/server/config.xml), by default they are:
/var/lib/clickhouse/data/default/
/var/lib/clickhouse/metadata/default/
@ -406,3 +349,42 @@ SELECT 1
**Congratulations, the system works!**
To continue experimenting, you can download one of the test data sets or go through [tutorial](/docs/en/tutorial.md).
## Recommendations for Self-Managed ClickHouse
ClickHouse can run on any Linux, FreeBSD, or macOS with x86-64, ARM, or PowerPC64LE CPU architecture.
ClickHouse uses all hardware resources available to process data.
ClickHouse tends to work more efficiently with a large number of cores at a lower clock rate than with fewer cores at a higher clock rate.
We recommend using a minimum of 4GB of RAM to perform non-trivial queries. The ClickHouse server can run with a much smaller amount of RAM, but queries will then frequently abort.
The required volume of RAM generally depends on:
- The complexity of queries.
- The amount of data that is processed in queries.
To calculate the required volume of RAM, you may estimate the size of temporary data for [GROUP BY](/docs/en/sql-reference/statements/select/group-by.md#select-group-by-clause), [DISTINCT](/docs/en/sql-reference/statements/select/distinct.md#select-distinct), [JOIN](/docs/en/sql-reference/statements/select/join.md#select-join) and other operations you use.
To reduce memory consumption, ClickHouse can swap temporary data to external storage. See [GROUP BY in External Memory](/docs/en/sql-reference/statements/select/group-by.md#select-group-by-in-external-memory) for details.
We recommend to disable the operating system's swap file in production environments.
The ClickHouse binary requires at least 2.5 GB of disk space for installation.
The volume of storage required for your data may be calculated separately based on
- an estimation of the data volume.
You can take a sample of the data and get the average size of a row from it. Then multiply the value by the number of rows you plan to store.
- The data compression coefficient.
To estimate the data compression coefficient, load a sample of your data into ClickHouse, and compare the actual size of the data with the size of the table stored. For example, clickstream data is usually compressed by 6-10 times.
To calculate the final volume of data to be stored, apply the compression coefficient to the estimated data volume. If you plan to store data in several replicas, then multiply the estimated volume by the number of replicas.
For distributed ClickHouse deployments (clustering), we recommend at least 10G class network connectivity.
Network bandwidth is critical for processing distributed queries with a large amount of intermediate data. Besides, network speed affects replication processes.

View File

@ -1415,7 +1415,7 @@ Differs from [PrettySpaceNoEscapes](#prettyspacenoescapes) in that up to 10,000
## RowBinary {#rowbinary}
Formats and parses data by row in binary format. Rows and values are listed consecutively, without separators.
Formats and parses data by row in binary format. Rows and values are listed consecutively, without separators. Because data is in the binary format the delimiter after `FORMAT RowBinary` is strictly specified as next: any number of whitespaces (`' '` - space, code `0x20`; `'\t'` - tab, code `0x09`; `'\f'` - form feed, code `0x0C`) followed by exactly one new line sequence (Windows style `"\r\n"` or Unix style `'\n'`), immediately followed by binary data.
This format is less efficient than the Native format since it is row-based.
Integers use fixed-length little-endian representation. For example, UInt64 uses 8 bytes.

View File

@ -890,7 +890,7 @@ The maximum number of open files.
By default: `maximum`.
We recommend using this option in Mac OS X since the `getrlimit()` function returns an incorrect value.
We recommend using this option in macOS since the `getrlimit()` function returns an incorrect value.
**Example**

View File

@ -92,7 +92,7 @@ Code: 452, e.displayText() = DB::Exception: Setting force_index_by_date should n
**Note:** the `default` profile has special handling: all the constraints defined for the `default` profile become the default constraints, so they restrict all the users until theyre overridden explicitly for these users.
## Constraints on Merge Tree Settings
It is possible to set constraints for [merge tree settings](merge-tree-settings.md). There constraints are applied when table with merge tree engine is created or its storage settings are altered. Name of merge tree setting must be prepended by `merge_tree_` prefix when referenced in `<constraint>` section.
It is possible to set constraints for [merge tree settings](merge-tree-settings.md). These constraints are applied when table with merge tree engine is created or its storage settings are altered. Name of merge tree setting must be prepended by `merge_tree_` prefix when referenced in `<constraints>` section.
**Example:** Forbid to create new tables with explicitly specified `storage_policy`

View File

@ -3447,13 +3447,45 @@ Default value: 2.
## compatibility {#compatibility}
This setting changes other settings according to provided ClickHouse version.
If a behaviour in ClickHouse was changed by using a different default value for some setting, this compatibility setting allows you to use default values from previous versions for all the settings that were not set by the user.
The `compatibility` setting causes ClickHouse to use the default settings of a previous version of ClickHouse, where the previous version is provided as the setting.
This setting takes ClickHouse version number as a string, like `21.3`, `21.8`. Empty value means that this setting is disabled.
If settings are set to non-default values, then those settings are honored (only settings that have not been modified are affected by the `compatibility` setting).
This setting takes a ClickHouse version number as a string, like `22.3`, `22.8`. An empty value means that this setting is disabled.
Disabled by default.
:::note
In ClickHouse Cloud the compatibility setting must be set by ClickHouse Cloud support. Please [open a case](https://clickhouse.cloud/support) to have it set.
:::
## allow_settings_after_format_in_insert {#allow_settings_after_format_in_insert}
Control whether `SETTINGS` after `FORMAT` in `INSERT` queries is allowed or not. It is not recommended to use this, since this may interpret part of `SETTINGS` as values.
Example:
```sql
INSERT INTO FUNCTION null('foo String') SETTINGS max_threads=1 VALUES ('bar');
```
But the following query will work only with `allow_settings_after_format_in_insert`:
```sql
SET allow_settings_after_format_in_insert=1;
INSERT INTO FUNCTION null('foo String') VALUES ('bar') SETTINGS max_threads=1;
```
Possible values:
- 0 — Disallow.
- 1 — Allow.
Default value: `0`.
!!! note "Warning"
Use this setting only for backward compatibility if your use cases depend on old syntax.
# Format settings {#format-settings}
## input_format_skip_unknown_fields {#input_format_skip_unknown_fields}
@ -3588,6 +3620,31 @@ y Nullable(String)
z IPv4
```
## schema_inference_make_columns_nullable {#schema_inference_make_columns_nullable}
Controls making inferred types `Nullable` in schema inference for formats without information about nullability.
If the setting is enabled, the inferred type will be `Nullable` only if column contains `NULL` in a sample that is parsed during schema inference.
Default value: `false`.
## input_format_try_infer_integers {#input_format_try_infer_integers}
If enabled, ClickHouse will try to infer integers instead of floats in schema inference for text formats. If all numbers in the column from input data are integers, the result type will be `Int64`, if at least one number is float, the result type will be `Float64`.
Enabled by default.
## input_format_try_infer_dates {#input_format_try_infer_dates}
If enabled, ClickHouse will try to infer type `Date` from string fields in schema inference for text formats. If all fields from a column in input data were successfully parsed as dates, the result type will be `Date`, if at least one field was not parsed as date, the result type will be `String`.
Enabled by default.
## input_format_try_infer_datetimes {#input_format_try_infer_datetimes}
If enabled, ClickHouse will try to infer type `DateTime64` from string fields in schema inference for text formats. If all fields from a column in input data were successfully parsed as datetimes, the result type will be `DateTime64`, if at least one field was not parsed as datetime, the result type will be `String`.
Enabled by default.
## date_time_input_format {#date_time_input_format}
Allows choosing a parser of the text representation of date and time.

View File

@ -607,3 +607,7 @@ dictGetString('prefix', 'asn', tuple(IPv6StringToNum('2001:db8::1')))
Other types are not supported yet. The function returns the attribute for the prefix that corresponds to this IP address. If there are overlapping prefixes, the most specific one is returned.
Data must completely fit into RAM.
## Related Content
- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse)

View File

@ -136,3 +136,7 @@ or
SOURCE(CLICKHOUSE(... update_field 'added_time' update_lag 15))
...
```
## Related Content
- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse)

View File

@ -824,3 +824,7 @@ Setting fields:
:::note
The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared.
:::
## Related Content
- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse)

View File

@ -176,3 +176,6 @@ Configuration fields:
- [Functions for working with dictionaries](../../../sql-reference/functions/ext-dict-functions.md).
## Related Content
- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse)

View File

@ -52,3 +52,6 @@ LIFETIME(...) -- Lifetime of dictionary in memory
- [structure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md) — Structure of the dictionary . A key and attributes that can be retrieved by this key.
- [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) — Frequency of dictionary updates.
## Related Content
- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse)

View File

@ -79,3 +79,6 @@ You can convert values for a small dictionary by describing it in a `SELECT` que
- [Dictionary Key and Fields](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md)
- [Functions for Working with Dictionaries](../../../sql-reference/functions/ext-dict-functions.md)
## Related Content
- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse)

View File

@ -1104,6 +1104,7 @@ Using replacement fields, you can define a pattern for the resulting string. “
| %d | day of the month, zero-padded (01-31) | 02 |
| %D | Short MM/DD/YY date, equivalent to %m/%d/%y | 01/02/18 |
| %e | day of the month, space-padded ( 1-31) | &nbsp; 2 |
| %f | fractional second from the fractional part of DateTime64 | 1234560 |
| %F | short YYYY-MM-DD date, equivalent to %Y-%m-%d | 2018-01-02 |
| %G | four-digit year format for ISO week number, calculated from the week-based year [defined by the ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates) standard, normally useful only with %V | 2018 |
| %g | two-digit year format, aligned to ISO 8601, abbreviated from four-digit notation | 18 |
@ -1143,6 +1144,20 @@ Result:
└────────────────────────────────────────────┘
```
Query:
``` sql
SELECT formatDateTime(toDateTime64('2010-01-04 12:34:56.123456', 7), '%f')
```
Result:
```
┌─formatDateTime(toDateTime64('2010-01-04 12:34:56.123456', 7), '%f')─┐
│ 1234560 │
└─────────────────────────────────────────────────────────────────────┘
```
## dateName
Returns specified part of date.

View File

@ -595,9 +595,9 @@ SELECT xxHash64('')
**Returned value**
A `Uint32` or `Uint64` data type hash value.
A `UInt32` or `UInt64` data type hash value.
Type: `xxHash`.
Type: `UInt32` for `xxHash32` and `UInt64` for `xxHash64`.
**Example**

View File

@ -296,3 +296,7 @@ Another example is the `hostName` function, which returns the name of the server
If a function in a query is performed on the requestor server, but you need to perform it on remote servers, you can wrap it in an any aggregate function or add it to a key in `GROUP BY`.
## Related Content
- [User-defined functions in ClickHouse Cloud](https://clickhouse.com/blog/user-defined-functions-clickhouse-udfs)

View File

@ -58,3 +58,7 @@ Result:
│ 2 │ even │
└────────┴──────────────────────────────────────┘
```
## Related Content
- [User-defined functions in ClickHouse Cloud](https://clickhouse.com/blog/user-defined-functions-clickhouse-udfs)

View File

@ -21,12 +21,11 @@ Subquery is another `SELECT` query that may be specified in parenthesis inside `
When `FINAL` is specified, ClickHouse fully merges the data before returning the result and thus performs all data transformations that happen during merges for the given table engine.
It is applicable when selecting data from tables that use the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md)-engine family. Also supported for:
It is applicable when selecting data from ReplacingMergeTree, SummingMergeTree, AggregatingMergeTree, CollapsingMergeTree and VersionedCollapsingMergeTree tables.
- [Replicated](../../../engines/table-engines/mergetree-family/replication.md) versions of `MergeTree` engines.
- [View](../../../engines/table-engines/special/view.md), [Buffer](../../../engines/table-engines/special/buffer.md), [Distributed](../../../engines/table-engines/special/distributed.md), and [MaterializedView](../../../engines/table-engines/special/materializedview.md) engines that operate over other engines, provided they were created over `MergeTree`-engine tables.
`SELECT` queries with `FINAL` are executed in parallel. The [max_final_threads](../../../operations/settings/settings.md#max-final-threads) setting limits the number of threads used.
Now `SELECT` queries with `FINAL` are executed in parallel and slightly faster. But there are drawbacks (see below). The [max_final_threads](../../../operations/settings/settings.md#max-final-threads) setting limits the number of threads used.
There are drawbacks to using `FINAL` (see below).
### Drawbacks

View File

@ -77,8 +77,9 @@ Numeric literal tries to be parsed:
Literal value has the smallest type that the value fits in.
For example, 1 is parsed as `UInt8`, but 256 is parsed as `UInt16`. For more information, see [Data types](../sql-reference/data-types/index.md).
Underscores `_` inside numeric literals are ignored and can be used for better readability.
Examples: `1`, `18446744073709551615`, `0xDEADBEEF`, `01`, `0.1`, `1e100`, `-1e-100`, `inf`, `nan`.
Examples: `1`, `10_000_000`, `0xffff_ffff`, `18446744073709551615`, `0xDEADBEEF`, `01`, `0.1`, `1e100`, `-1e-100`, `inf`, `nan`.
### String

View File

@ -948,7 +948,7 @@ $ watch -n1 "clickhouse-client --query='SELECT event, value FROM system.events F
## RowBinary {#rowbinary}
Форматирует и парсит данные по строкам, в бинарном виде. Строки и значения уложены подряд, без разделителей.
Форматирует и парсит данные по строкам, в бинарном виде. Строки и значения уложены подряд, без разделителей. Так как данные представлены в бинарном виде, разделитель после `FORMAT RowBinary` строго определен в следующем виде: любое количество пробелов (`' '` - space, код `0x20`; `'\t'` - tab, код `0x09`; `'\f'` - form feed, код `0x0C`), следующая за этим одна последовательность конца строки (Windows style `"\r\n"` или Unix style `'\n'`), и непосредственно следующие за этим бинарные данные.
Формат менее эффективен, чем формат Native, так как является строковым.
Числа представлены в little endian формате фиксированной длины. Для примера, UInt64 занимает 8 байт.

View File

@ -11,8 +11,8 @@ Wants=time-sync.target
[Service]
Type=notify
# Switching off watchdog is very important for sd_notify to work correctly.
Environment=CLICKHOUSE_WATCHDOG_ENABLE=0
# NOTE: we leave clickhouse watchdog process enabled to be able to see OOM/SIGKILL traces in clickhouse-server.log files.
# If you wish to disable the watchdog and rely on systemd logs just add "Environment=CLICKHOUSE_WATCHDOG_ENABLE=0" line.
User=clickhouse
Group=clickhouse
Restart=always

View File

@ -37,7 +37,7 @@
#include <AggregateFunctions/registerAggregateFunctions.h>
#include <TableFunctions/registerTableFunctions.h>
#include <Storages/registerStorages.h>
#include <Storages/NamedCollectionUtils.h>
#include <Storages/NamedCollections/NamedCollectionUtils.h>
#include <Dictionaries/registerDictionaries.h>
#include <Disks/registerDisks.h>
#include <Formats/registerFormats.h>

View File

@ -60,7 +60,7 @@
#include <Storages/System/attachInformationSchemaTables.h>
#include <Storages/Cache/ExternalDataSourceCache.h>
#include <Storages/Cache/registerRemoteFileMetadatas.h>
#include <Storages/NamedCollectionUtils.h>
#include <Storages/NamedCollections/NamedCollectionUtils.h>
#include <AggregateFunctions/registerAggregateFunctions.h>
#include <Functions/UserDefined/IUserDefinedSQLObjectsLoader.h>
#include <Functions/registerFunctions.h>
@ -70,6 +70,8 @@
#include <QueryPipeline/ConnectionCollector.h>
#include <Dictionaries/registerDictionaries.h>
#include <Disks/registerDisks.h>
#include <IO/Resource/registerSchedulerNodes.h>
#include <IO/Resource/registerResourceManagers.h>
#include <Common/Config/ConfigReloader.h>
#include <Server/HTTPHandlerFactory.h>
#include "MetricsTransmitter.h"
@ -287,7 +289,6 @@ namespace ErrorCodes
extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA;
extern const int NETWORK_ERROR;
extern const int CORRUPTED_DATA;
extern const int SYSTEM_ERROR;
}
@ -661,51 +662,6 @@ static void sanityChecks(Server & server)
}
}
#if defined(OS_LINUX)
/// Sends notification to systemd, analogous to sd_notify from libsystemd
static void systemdNotify(const std::string_view & command)
{
const char * path = getenv("NOTIFY_SOCKET"); // NOLINT(concurrency-mt-unsafe)
if (path == nullptr)
return; /// not using systemd
int s = socket(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0);
if (s == -1)
throwFromErrno("Can't create UNIX socket for systemd notify.", ErrorCodes::SYSTEM_ERROR);
SCOPE_EXIT({ close(s); });
const size_t len = strlen(path);
struct sockaddr_un addr;
addr.sun_family = AF_UNIX;
if (len < 2 || len > sizeof(addr.sun_path) - 1)
throw Exception(ErrorCodes::SYSTEM_ERROR, "NOTIFY_SOCKET env var value \"{}\" is wrong.", path);
memcpy(addr.sun_path, path, len + 1); /// write last zero as well.
size_t addrlen = offsetof(struct sockaddr_un, sun_path) + len;
/// '@' meass this is Linux abstract socket, per documentation it must be sun_path[0] must be set to '\0' for it.
if (path[0] == '@')
addr.sun_path[0] = 0;
else if (path[0] == '/')
addrlen += 1; /// non-abstract-addresses should be zero terminated.
else
throw Exception(ErrorCodes::SYSTEM_ERROR, "Wrong UNIX path \"{}\" in NOTIFY_SOCKET env var", path);
const struct sockaddr *sock_addr = reinterpret_cast <const struct sockaddr *>(&addr);
if (sendto(s, command.data(), command.size(), 0, sock_addr, static_cast <socklen_t>(addrlen)) != static_cast <ssize_t>(command.size()))
throw Exception("Failed to notify systemd.", ErrorCodes::SYSTEM_ERROR);
}
#endif
int Server::main(const std::vector<std::string> & /*args*/)
try
{
@ -748,8 +704,8 @@ try
else
{
const String config_path = config().getString("config-file", "config.xml");
const auto config_dir = std::filesystem::path{config_path}.remove_filename();
setenv("OPENSSL_CONF", config_dir.string() + "openssl.conf", true);
const auto config_dir = std::filesystem::path{config_path}.replace_filename("openssl.conf");
setenv("OPENSSL_CONF", config_dir.string(), true);
}
#endif
@ -761,6 +717,8 @@ try
registerDisks(/* global_skip_access_check= */ false);
registerFormats();
registerRemoteFileMetadatas();
registerSchedulerNodes();
registerResourceManagers();
CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::getVersionRevision());
CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger());
@ -1335,6 +1293,11 @@ try
global_context->getDistributedSchedulePool().increaseThreadsCount(new_pool_size);
}
if (config->has("resources"))
{
global_context->getResourceManager()->updateConfiguration(*config);
}
if (!initial_loading)
{
/// We do not load ZooKeeper configuration on the first config loading
@ -1861,6 +1824,9 @@ try
}
#if defined(OS_LINUX)
/// Tell the service manager that service startup is finished.
/// NOTE: the parent clickhouse-watchdog process must do systemdNotify("MAINPID={}\n", child_pid); before
/// the child process notifies 'READY=1'.
systemdNotify("READY=1\n");
#endif

View File

@ -1,4 +1,3 @@
corrosion_import_crate(MANIFEST_PATH Cargo.toml NO_STD)
clickhouse_import_crate(MANIFEST_PATH Cargo.toml)
target_include_directories(_ch_rust_blake3 INTERFACE include)
add_library(ch_rust::blake3 ALIAS _ch_rust_blake3)

View File

@ -1,9 +1,10 @@
function(configure_rustc)
# NOTE: should be macro to export RUST_CXXFLAGS/RUST_CFLAGS for subfolders
macro(configure_rustc)
# NOTE: this can also be done by overriding rustc, but it not trivial with rustup.
set(RUST_CFLAGS "${CMAKE_C_FLAGS}")
set(CXX_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/libcxx/include")
set(RUST_CXXFLAGS "${CMAKE_CXX_FLAGS} -isystem ${CXX_INCLUDE_DIR} ")
set(RUST_CXXFLAGS "${CMAKE_CXX_FLAGS} -isystem ${CXX_INCLUDE_DIR} -nostdinc++")
if (CMAKE_OSX_SYSROOT)
set(RUST_CXXFLAGS "${RUST_CXXFLAGS} -isysroot ${CMAKE_OSX_SYSROOT}")
@ -18,8 +19,25 @@ function(configure_rustc)
# NOTE: requires RW access for the source dir
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/.cargo/config.toml.in" "${CMAKE_CURRENT_SOURCE_DIR}/.cargo/config.toml" @ONLY)
endfunction()
endmacro()
configure_rustc()
function(clickhouse_import_crate)
# This is a workaround for Corrosion case sensitive build type matching in
# _generator_add_cargo_targets(), that leads to different paths in
# IMPORTED_LOCATION and real path of the library.
#
# It uses CMAKE_CONFIGURATION_TYPES and $<CONFIG>, so here we preserve the
# case of ${CMAKE_BUILD_TYPE} in ${CMAKE_CONFIGURATION_TYPES}.
if ("${CMAKE_BUILD_TYPE_UC}" STREQUAL "DEBUG")
set(CMAKE_CONFIGURATION_TYPES "${CMAKE_BUILD_TYPE};release")
else()
set(CMAKE_CONFIGURATION_TYPES "${CMAKE_BUILD_TYPE};debug")
endif()
# NOTE: we may use LTO for rust too
corrosion_import_crate(NO_STD ${ARGN})
endfunction()
add_subdirectory (BLAKE3)
add_subdirectory (skim)

View File

@ -14,18 +14,12 @@ if (OS_FREEBSD)
return()
endif()
corrosion_import_crate(MANIFEST_PATH Cargo.toml NO_STD)
clickhouse_import_crate(MANIFEST_PATH Cargo.toml)
set(CXX_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/libcxx/include")
# -Wno-dollar-in-identifier-extension: cxx bridge complies names with '$'
# -Wno-unused-macros: unused CXXBRIDGE1_RUST_STRING
set(CXXBRIDGE_CXXFLAGS "-Wno-dollar-in-identifier-extension -Wno-unused-macros")
set(RUST_CXXFLAGS "${CMAKE_CXX_FLAGS} -isystem ${CXX_INCLUDE_DIR} ${CXXBRIDGE_CXXFLAGS}")
if (CMAKE_OSX_SYSROOT)
set(RUST_CXXFLAGS "${RUST_CXXFLAGS} -isysroot ${CMAKE_OSX_SYSROOT}")
elseif(CMAKE_SYSROOT)
set(RUST_CXXFLAGS "${RUST_CXXFLAGS} --sysroot ${CMAKE_SYSROOT}")
endif()
set(RUST_CXXFLAGS "${RUST_CXXFLAGS} ${CXXBRIDGE_CXXFLAGS}")
message(STATUS "RUST_CXXFLAGS (for skim): ${RUST_CXXFLAGS}")
# NOTE: requires RW access for the source dir
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/build.rs.in" "${CMAKE_CURRENT_SOURCE_DIR}/build.rs" @ONLY)

1
rust/skim/Cargo.lock generated
View File

@ -9,6 +9,7 @@ dependencies = [
"cxx",
"cxx-build",
"skim",
"term",
]
[[package]]

View File

@ -8,6 +8,7 @@ edition = "2021"
[dependencies]
skim = "0.10.2"
cxx = "1.0.83"
term = "0.7.0"
[build-dependencies]
cxx-build = "1.0.83"

View File

@ -87,4 +87,4 @@ private:
} // namespace cxxbridge1
} // namespace rust
::rust::String skim(::std::vector<::std::string> const &words) noexcept;
::rust::String skim(::std::vector<::std::string> const &words);

View File

@ -1,10 +1,11 @@
use skim::prelude::*;
use term::terminfo::TermInfo;
use cxx::{CxxString, CxxVector};
#[cxx::bridge]
mod ffi {
extern "Rust" {
fn skim(words: &CxxVector<CxxString>) -> String;
fn skim(words: &CxxVector<CxxString>) -> Result<String>;
}
}
@ -17,8 +18,12 @@ impl SkimItem for Item {
}
}
fn skim(words: &CxxVector<CxxString>) -> String {
// TODO: configure colors
fn skim(words: &CxxVector<CxxString>) -> Result<String, String> {
// Let's check is terminal available. To avoid panic.
if let Err(err) = TermInfo::from_env() {
return Err(format!("{}", err));
}
let options = SkimOptionsBuilder::default()
.height(Some("30%"))
.tac(true)
@ -35,15 +40,15 @@ fn skim(words: &CxxVector<CxxString>) -> String {
let output = Skim::run_with(&options, Some(rx));
if output.is_none() {
return "".to_string();
return Err("skim return nothing".to_string());
}
let output = output.unwrap();
if output.is_abort {
return "".to_string();
return Ok("".to_string());
}
if output.selected_items.is_empty() {
return "".to_string();
return Err("No items had been selected".to_string());
}
return output.selected_items[0].output().to_string();
return Ok(output.selected_items[0].output().to_string());
}

View File

@ -86,6 +86,49 @@ void SettingsConstraints::merge(const SettingsConstraints & other)
}
void SettingsConstraints::check(const Settings & current_settings, const SettingsProfileElements & profile_elements) const
{
for (const auto & element : profile_elements)
{
if (SettingsProfileElements::isAllowBackupSetting(element.setting_name))
continue;
if (!element.value.isNull())
{
SettingChange value(element.setting_name, element.value);
check(current_settings, value);
}
if (!element.min_value.isNull())
{
SettingChange value(element.setting_name, element.min_value);
check(current_settings, value);
}
if (!element.max_value.isNull())
{
SettingChange value(element.setting_name, element.max_value);
check(current_settings, value);
}
SettingConstraintWritability new_value = SettingConstraintWritability::WRITABLE;
SettingConstraintWritability old_value = SettingConstraintWritability::WRITABLE;
if (element.writability)
new_value = *element.writability;
auto it = constraints.find(element.setting_name);
if (it != constraints.end())
old_value = it->second.writability;
if (new_value != old_value)
{
if (old_value == SettingConstraintWritability::CONST)
throw Exception("Setting " + element.setting_name + " should not be changed", ErrorCodes::SETTING_CONSTRAINT_VIOLATION);
}
}
}
void SettingsConstraints::check(const Settings & current_settings, const SettingChange & change) const
{
checkImpl(current_settings, const_cast<SettingChange &>(change), THROW_ON_VIOLATION);

View File

@ -73,6 +73,7 @@ public:
void merge(const SettingsConstraints & other);
/// Checks whether `change` violates these constraints and throws an exception if so.
void check(const Settings & current_settings, const SettingsProfileElements & profile_elements) const;
void check(const Settings & current_settings, const SettingChange & change) const;
void check(const Settings & current_settings, const SettingsChanges & changes) const;
void check(const Settings & current_settings, SettingsChanges & changes) const;

View File

@ -75,6 +75,10 @@ void SettingsProfileElement::init(const ASTSettingsProfileElement & ast, const A
}
}
bool SettingsProfileElement::isConstraint() const
{
return this->writability || !this->min_value.isNull() || !this->max_value.isNull();
}
std::shared_ptr<ASTSettingsProfileElement> SettingsProfileElement::toAST() const
{
@ -213,7 +217,7 @@ SettingsConstraints SettingsProfileElements::toSettingsConstraints(const AccessC
{
SettingsConstraints res{access_control};
for (const auto & elem : *this)
if (!elem.setting_name.empty() && elem.setting_name != ALLOW_BACKUP_SETTING_NAME)
if (!elem.setting_name.empty() && elem.isConstraint() && elem.setting_name != ALLOW_BACKUP_SETTING_NAME)
res.set(
elem.setting_name,
elem.min_value,
@ -248,4 +252,9 @@ bool SettingsProfileElements::isBackupAllowed() const
return true;
}
bool SettingsProfileElements::isAllowBackupSetting(const String & setting_name)
{
return setting_name == ALLOW_BACKUP_SETTING_NAME;
}
}

View File

@ -44,6 +44,8 @@ struct SettingsProfileElement
std::shared_ptr<ASTSettingsProfileElement> toAST() const;
std::shared_ptr<ASTSettingsProfileElement> toASTWithNames(const AccessControl & access_control) const;
bool isConstraint() const;
private:
void init(const ASTSettingsProfileElement & ast, const AccessControl * access_control);
};
@ -71,6 +73,8 @@ public:
std::vector<UUID> toProfileIDs() const;
bool isBackupAllowed() const;
static bool isAllowBackupSetting(const String & setting_name);
};
}

View File

@ -0,0 +1,647 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <Common/HashTable/HashMap.h>
#include <Common/SymbolIndex.h>
#include <Common/ArenaAllocator.h>
#include <Core/Settings.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/WriteHelpers.h>
#include <IO/Operators.h>
#include <filesystem>
namespace DB
{
namespace ErrorCodes
{
extern const int FUNCTION_NOT_ALLOWED;
extern const int NOT_IMPLEMENTED;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
struct AggregateFunctionFlameGraphTree
{
struct ListNode;
struct TreeNode
{
TreeNode * parent = nullptr;
ListNode * children = nullptr;
UInt64 ptr = 0;
size_t allocated = 0;
};
struct ListNode
{
ListNode * next = nullptr;
TreeNode * child = nullptr;
};
TreeNode root;
static ListNode * createChild(TreeNode * parent, UInt64 ptr, Arena * arena)
{
ListNode * list_node = reinterpret_cast<ListNode *>(arena->alloc(sizeof(ListNode)));
TreeNode * tree_node = reinterpret_cast<TreeNode *>(arena->alloc(sizeof(TreeNode)));
list_node->child = tree_node;
list_node->next = nullptr;
tree_node->parent =parent;
tree_node->children = nullptr;
tree_node->ptr = ptr;
tree_node->allocated = 0;
return list_node;
}
TreeNode * find(const UInt64 * stack, size_t stack_size, Arena * arena)
{
TreeNode * node = &root;
for (size_t i = 0; i < stack_size; ++i)
{
UInt64 ptr = stack[i];
if (ptr == 0)
break;
if (!node->children)
{
node->children = createChild(node, ptr, arena);
node = node->children->child;
}
else
{
ListNode * list = node->children;
while (list->child->ptr != ptr && list->next)
list = list->next;
if (list->child->ptr != ptr)
{
list->next = createChild(node, ptr, arena);
list = list->next;
}
node = list->child;
}
}
return node;
}
static void append(DB::PaddedPODArray<UInt64> & values, DB::PaddedPODArray<UInt64> & offsets, std::vector<UInt64> & frame)
{
UInt64 prev = offsets.empty() ? 0 : offsets.back();
offsets.push_back(prev + frame.size());
for (UInt64 val : frame)
values.push_back(val);
}
struct Trace
{
using Frames = std::vector<UInt64>;
Frames frames;
/// The total number of bytes allocated for traces with the same prefix.
size_t allocated_total = 0;
/// This counter is relevant in case we want to filter some traces with small amount of bytes.
/// It shows the total number of bytes for *filtered* traces with the same prefix.
/// This is the value which is used in flamegraph.
size_t allocated_self = 0;
};
using Traces = std::vector<Trace>;
Traces dump(size_t max_depth, size_t min_bytes) const
{
Traces traces;
Trace::Frames frames;
std::vector<size_t> allocated_total;
std::vector<size_t> allocated_self;
std::vector<ListNode *> nodes;
nodes.push_back(root.children);
allocated_total.push_back(root.allocated);
allocated_self.push_back(root.allocated);
while (!nodes.empty())
{
if (nodes.back() == nullptr)
{
traces.push_back({frames, allocated_total.back(), allocated_self.back()});
nodes.pop_back();
allocated_total.pop_back();
allocated_self.pop_back();
/// We don't have root's frame so framers are empty in the end.
if (!frames.empty())
frames.pop_back();
continue;
}
TreeNode * current = nodes.back()->child;
nodes.back() = nodes.back()->next;
bool enough_bytes = current->allocated >= min_bytes;
bool enough_depth = max_depth == 0 || nodes.size() < max_depth;
if (enough_bytes)
{
frames.push_back(current->ptr);
allocated_self.back() -= current->allocated;
if (enough_depth)
{
allocated_total.push_back(current->allocated);
allocated_self.push_back(current->allocated);
nodes.push_back(current->children);
}
else
{
traces.push_back({frames, current->allocated, current->allocated});
frames.pop_back();
}
}
}
return traces;
}
};
static void insertData(DB::PaddedPODArray<UInt8> & chars, DB::PaddedPODArray<UInt64> & offsets, const char * pos, size_t length)
{
const size_t old_size = chars.size();
const size_t new_size = old_size + length + 1;
chars.resize(new_size);
if (length)
memcpy(chars.data() + old_size, pos, length);
chars[old_size + length] = 0;
offsets.push_back(new_size);
}
/// Split str by line feed and write as separate row to ColumnString.
static void fillColumn(DB::PaddedPODArray<UInt8> & chars, DB::PaddedPODArray<UInt64> & offsets, const std::string & str)
{
size_t start = 0;
size_t end = 0;
size_t size = str.size();
while (end < size)
{
if (str[end] == '\n')
{
insertData(chars, offsets, str.data() + start, end - start);
start = end + 1;
}
++end;
}
if (start < end)
insertData(chars, offsets, str.data() + start, end - start);
}
void dumpFlameGraph(
const AggregateFunctionFlameGraphTree::Traces & traces,
DB::PaddedPODArray<UInt8> & chars,
DB::PaddedPODArray<UInt64> & offsets)
{
DB::WriteBufferFromOwnString out;
std::unordered_map<uintptr_t, size_t> mapping;
#if defined(__ELF__) && !defined(OS_FREEBSD)
auto symbol_index_ptr = DB::SymbolIndex::instance();
const DB::SymbolIndex & symbol_index = *symbol_index_ptr;
#endif
for (const auto & trace : traces)
{
if (trace.allocated_self == 0)
continue;
for (size_t i = 0; i < trace.frames.size(); ++i)
{
if (i)
out << ";";
const void * ptr = reinterpret_cast<const void *>(trace.frames[i]);
#if defined(__ELF__) && !defined(OS_FREEBSD)
if (const auto * symbol = symbol_index.findSymbol(ptr))
writeString(demangle(symbol->name), out);
else
DB::writePointerHex(ptr, out);
#else
DB::writePointerHex(ptr, out);
#endif
}
out << ' ' << trace.allocated_self << "\n";
}
fillColumn(chars, offsets, out.str());
}
struct AggregateFunctionFlameGraphData
{
struct Entry
{
AggregateFunctionFlameGraphTree::TreeNode * trace;
UInt64 size;
Entry * next = nullptr;
};
struct Pair
{
Entry * allocation = nullptr;
Entry * deallocation = nullptr;
};
using Entries = HashMap<UInt64, Pair>;
AggregateFunctionFlameGraphTree tree;
Entries entries;
Entry * free_list = nullptr;
Entry * alloc(Arena * arena)
{
if (free_list)
{
auto * res = free_list;
free_list = free_list->next;
return res;
}
return reinterpret_cast<Entry *>(arena->alloc(sizeof(Entry)));
}
void release(Entry * entry)
{
entry->next = free_list;
free_list = entry;
}
static void track(Entry * allocation)
{
auto * node = allocation->trace;
while (node)
{
node->allocated += allocation->size;
node = node->parent;
}
}
static void untrack(Entry * allocation)
{
auto * node = allocation->trace;
while (node)
{
node->allocated -= allocation->size;
node = node->parent;
}
}
static Entry * tryFindMatchAndRemove(Entry *& list, UInt64 size)
{
if (!list)
return nullptr;
if (list->size == size)
{
Entry * entry = list;
list = list->next;
return entry;
}
else
{
Entry * parent = list;
while (parent->next && parent->next->size != size)
parent = parent->next;
if (parent->next && parent->next->size == size)
{
Entry * entry = parent->next;
parent->next = entry->next;
return entry;
}
return nullptr;
}
}
void add(UInt64 ptr, Int64 size, const UInt64 * stack, size_t stack_size, Arena * arena)
{
/// In case if argument is nullptr, only track allocations.
if (ptr == 0)
{
if (size > 0)
{
auto * node = tree.find(stack, stack_size, arena);
Entry entry{.trace = node, .size = UInt64(size)};
track(&entry);
}
return;
}
auto & place = entries[ptr];
if (size > 0)
{
if (auto * deallocation = tryFindMatchAndRemove(place.deallocation, size))
{
release(deallocation);
}
else
{
auto * node = tree.find(stack, stack_size, arena);
auto * allocation = alloc(arena);
allocation->size = UInt64(size);
allocation->trace = node;
track(allocation);
allocation->next = place.allocation;
place.allocation = allocation;
}
}
else if (size < 0)
{
UInt64 abs_size = -size;
if (auto * allocation = tryFindMatchAndRemove(place.allocation, abs_size))
{
untrack(allocation);
release(allocation);
}
else
{
auto * deallocation = alloc(arena);
deallocation->size = abs_size;
deallocation->next = place.deallocation;
place.deallocation = deallocation;
}
}
}
void merge(const AggregateFunctionFlameGraphTree & other_tree, Arena * arena)
{
AggregateFunctionFlameGraphTree::Trace::Frames frames;
std::vector<AggregateFunctionFlameGraphTree::ListNode *> nodes;
nodes.push_back(other_tree.root.children);
while (!nodes.empty())
{
if (nodes.back() == nullptr)
{
nodes.pop_back();
/// We don't have root's frame so framers are empty in the end.
if (!frames.empty())
frames.pop_back();
continue;
}
AggregateFunctionFlameGraphTree::TreeNode * current = nodes.back()->child;
nodes.back() = nodes.back()->next;
frames.push_back(current->ptr);
if (current->children)
nodes.push_back(current->children);
else
{
if (current->allocated)
add(0, current->allocated, frames.data(), frames.size(), arena);
frames.pop_back();
}
}
}
void merge(const AggregateFunctionFlameGraphData & other, Arena * arena)
{
AggregateFunctionFlameGraphTree::Trace::Frames frames;
for (const auto & entry : other.entries)
{
for (auto * allocation = entry.value.second.allocation; allocation; allocation = allocation->next)
{
frames.clear();
const auto * node = allocation->trace;
while (node->ptr)
{
frames.push_back(node->ptr);
node = node->parent;
}
std::reverse(frames.begin(), frames.end());
add(entry.value.first, allocation->size, frames.data(), frames.size(), arena);
untrack(allocation);
}
for (auto * deallocation = entry.value.second.deallocation; deallocation; deallocation = deallocation->next)
{
add(entry.value.first, -Int64(deallocation->size), nullptr, 0, arena);
}
}
merge(other.tree, arena);
}
void dumpFlameGraph(
DB::PaddedPODArray<UInt8> & chars,
DB::PaddedPODArray<UInt64> & offsets,
size_t max_depth, size_t min_bytes) const
{
DB::dumpFlameGraph(tree.dump(max_depth, min_bytes), chars, offsets);
}
};
/// Aggregate function which builds a flamegraph using the list of stacktraces.
/// The output is an array of strings which can be used by flamegraph.pl util.
/// See https://github.com/brendangregg/FlameGraph
///
/// Syntax: flameGraph(traces, [size = 1], [ptr = 0])
/// - trace : Array(UInt64), a stacktrace
/// - size : Int64, an allocation size (for memory profiling)
/// - ptr : UInt64, an allocation address
/// In case if ptr != 0, a flameGraph will map allocations (size > 0) and deallocations (size < 0) with the same size and ptr.
/// Only allocations which were not freed are shown. Not mapped deallocations are ignored.
///
/// Usage:
///
/// * Build a flamegraph based on CPU query profiler
/// set query_profiler_cpu_time_period_ns=10000000;
/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
/// clickhouse client --allow_introspection_functions=1
/// -q "select arrayJoin(flameGraph(arrayReverse(trace))) from system.trace_log where trace_type = 'CPU' and query_id = 'xxx'"
/// | ~/dev/FlameGraph/flamegraph.pl > flame_cpu.svg
///
/// * Build a flamegraph based on memory query profiler, showing all allocations
/// set memory_profiler_sample_probability=1, max_untracked_memory=1;
/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
/// clickhouse client --allow_introspection_functions=1
/// -q "select arrayJoin(flameGraph(trace, size)) from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx'"
/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem.svg
///
/// * Build a flamegraph based on memory query profiler, showing allocations which were not deallocated in query context
/// set memory_profiler_sample_probability=1, max_untracked_memory=1, use_uncompressed_cache=1, merge_tree_max_rows_to_use_cache=100000000000, merge_tree_max_bytes_to_use_cache=1000000000000;
/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
/// clickhouse client --allow_introspection_functions=1
/// -q "select arrayJoin(flameGraph(trace, size, ptr)) from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx'"
/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_untracked.svg
///
/// * Build a flamegraph based on memory query profiler, showing active allocations at the fixed point of time
/// set memory_profiler_sample_probability=1, max_untracked_memory=1;
/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
/// 1. Memory usage per second
/// select event_time, m, formatReadableSize(max(s) as m) from (select event_time, sum(size) over (order by event_time) as s from system.trace_log where query_id = 'xxx' and trace_type = 'MemorySample') group by event_time order by event_time;
/// 2. Find a time point with maximal memory usage
/// select argMax(event_time, s), max(s) from (select event_time, sum(size) over (order by event_time) as s from system.trace_log where query_id = 'xxx' and trace_type = 'MemorySample');
/// 3. Fix active allocations at fixed point of time
/// clickhouse client --allow_introspection_functions=1
/// -q "select arrayJoin(flameGraph(trace, size, ptr)) from (select * from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx' and event_time <= 'yyy' order by event_time)"
/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_time_point_pos.svg
/// 4. Find deallocations at fixed point of time
/// clickhouse client --allow_introspection_functions=1
/// -q "select arrayJoin(flameGraph(trace, -size, ptr)) from (select * from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx' and event_time > 'yyy' order by event_time desc)"
/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_time_point_neg.svg
class AggregateFunctionFlameGraph final : public IAggregateFunctionDataHelper<AggregateFunctionFlameGraphData, AggregateFunctionFlameGraph>
{
public:
explicit AggregateFunctionFlameGraph(const DataTypes & argument_types_)
: IAggregateFunctionDataHelper<AggregateFunctionFlameGraphData, AggregateFunctionFlameGraph>(argument_types_, {})
{}
String getName() const override { return "flameGraph"; }
DataTypePtr getReturnType() const override
{
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
}
bool allocatesMemoryInArena() const override { return true; }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
const auto * trace = typeid_cast<const ColumnArray *>(columns[0]);
const auto & trace_offsets = trace->getOffsets();
const auto & trace_values = typeid_cast<const ColumnUInt64 *>(&trace->getData())->getData();
UInt64 prev_offset = 0;
if (row_num)
prev_offset = trace_offsets[row_num - 1];
UInt64 trace_size = trace_offsets[row_num] - prev_offset;
Int64 allocated = 1;
if (argument_types.size() >= 2)
{
const auto & sizes = typeid_cast<const ColumnInt64 *>(columns[1])->getData();
allocated = sizes[row_num];
}
UInt64 ptr = 0;
if (argument_types.size() >= 3)
{
const auto & ptrs = typeid_cast<const ColumnUInt64 *>(columns[2])->getData();
ptr = ptrs[row_num];
}
this->data(place).add(ptr, allocated, trace_values.data() + prev_offset, trace_size, arena);
}
void addManyDefaults(
AggregateDataPtr __restrict /*place*/,
const IColumn ** /*columns*/,
size_t /*length*/,
Arena * /*arena*/) const override
{
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
this->data(place).merge(this->data(rhs), arena);
}
void serialize(ConstAggregateDataPtr __restrict, WriteBuffer &, std::optional<size_t> /* version */) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Serialization for function flameGraph is not implemented.");
}
void deserialize(AggregateDataPtr __restrict, ReadBuffer &, std::optional<size_t> /* version */, Arena *) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Deserialization for function flameGraph is not implemented.");
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
auto & array = assert_cast<ColumnArray &>(to);
auto & str = assert_cast<ColumnString &>(array.getData());
this->data(place).dumpFlameGraph(str.getChars(), str.getOffsets(), 0, 0);
array.getOffsets().push_back(str.size());
}
};
static void check(const std::string & name, const DataTypes & argument_types, const Array & params)
{
assertNoParameters(name, params);
if (argument_types.empty() || argument_types.size() > 3)
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Aggregate function {} requires 1 to 3 arguments : trace, [size = 1], [ptr = 0]",
name);
auto ptr_type = std::make_shared<DataTypeUInt64>();
auto trace_type = std::make_shared<DataTypeArray>(ptr_type);
auto size_type = std::make_shared<DataTypeInt64>();
if (!argument_types[0]->equals(*trace_type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"First argument (trace) for function {} must be Array(UInt64), but it has type {}",
name, argument_types[0]->getName());
if (argument_types.size() >= 2 && !argument_types[1]->equals(*size_type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Second argument (size) for function {} must be Int64, but it has type {}",
name, argument_types[1]->getName());
if (argument_types.size() >= 3 && !argument_types[2]->equals(*ptr_type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Third argument (ptr) for function {} must be UInt64, but it has type {}",
name, argument_types[2]->getName());
}
AggregateFunctionPtr createAggregateFunctionFlameGraph(const std::string & name, const DataTypes & argument_types, const Array & params, const Settings * settings)
{
if (!settings->allow_introspection_functions)
throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED,
"Introspection functions are disabled, because setting 'allow_introspection_functions' is set to 0");
check(name, argument_types, params);
return std::make_shared<AggregateFunctionFlameGraph>(argument_types);
}
void registerAggregateFunctionFlameGraph(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = true };
factory.registerFunction("flameGraph", { createAggregateFunctionFlameGraph, properties });
}
}

View File

@ -73,6 +73,7 @@ void registerAggregateFunctionExponentialMovingAverage(AggregateFunctionFactory
void registerAggregateFunctionSparkbar(AggregateFunctionFactory &);
void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &);
void registerAggregateFunctionAnalysisOfVariance(AggregateFunctionFactory &);
void registerAggregateFunctionFlameGraph(AggregateFunctionFactory &);
class AggregateFunctionCombinatorFactory;
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
@ -158,6 +159,7 @@ void registerAggregateFunctions()
registerAggregateFunctionExponentialMovingAverage(factory);
registerAggregateFunctionSparkbar(factory);
registerAggregateFunctionAnalysisOfVariance(factory);
registerAggregateFunctionFlameGraph(factory);
registerWindowFunctions(factory);
}

View File

@ -0,0 +1,197 @@
#include <Analyzer/Passes/IfTransformStringsToEnumPass.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeEnum.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/IDataType.h>
#include <Functions/FunctionFactory.h>
namespace DB
{
namespace
{
/// We place strings in ascending order here under the assumption it could speed up String to Enum conversion.
template <typename EnumType>
auto getDataEnumType(const std::set<std::string> & string_values)
{
using EnumValues = typename EnumType::Values;
EnumValues enum_values;
enum_values.reserve(string_values.size());
size_t number = 1;
for (const auto & value : string_values)
enum_values.emplace_back(value, number++);
return std::make_shared<EnumType>(std::move(enum_values));
}
DataTypePtr getEnumType(const std::set<std::string> & string_values)
{
if (string_values.size() >= 255)
return getDataEnumType<DataTypeEnum16>(string_values);
else
return getDataEnumType<DataTypeEnum8>(string_values);
}
QueryTreeNodePtr createCastFunction(QueryTreeNodePtr from, DataTypePtr result_type, ContextPtr context)
{
auto enum_literal = std::make_shared<ConstantValue>(result_type->getName(), std::make_shared<DataTypeString>());
auto enum_literal_node = std::make_shared<ConstantNode>(std::move(enum_literal));
auto cast_function = FunctionFactory::instance().get("_CAST", std::move(context));
QueryTreeNodes arguments{std::move(from), std::move(enum_literal_node)};
auto function_node = std::make_shared<FunctionNode>("_CAST");
function_node->resolveAsFunction(std::move(cast_function), std::move(result_type));
function_node->getArguments().getNodes() = std::move(arguments);
return function_node;
}
/// if(arg1, arg2, arg3) will be transformed to if(arg1, _CAST(arg2, Enum...), _CAST(arg3, Enum...))
/// where Enum is generated based on the possible values stored in string_values
void changeIfArguments(
QueryTreeNodePtr & first, QueryTreeNodePtr & second, const std::set<std::string> & string_values, const ContextPtr & context)
{
auto result_type = getEnumType(string_values);
first = createCastFunction(first, result_type, context);
second = createCastFunction(second, result_type, context);
}
/// transform(value, array_from, array_to, default_value) will be transformed to transform(value, array_from, _CAST(array_to, Array(Enum...)), _CAST(default_value, Enum...))
/// where Enum is generated based on the possible values stored in string_values
void changeTransformArguments(
QueryTreeNodePtr & array_to,
QueryTreeNodePtr & default_value,
const std::set<std::string> & string_values,
const ContextPtr & context)
{
auto result_type = getEnumType(string_values);
array_to = createCastFunction(array_to, std::make_shared<DataTypeArray>(result_type), context);
default_value = createCastFunction(default_value, std::move(result_type), context);
}
void wrapIntoToString(FunctionNode & function_node, QueryTreeNodePtr arg, ContextPtr context)
{
assert(isString(function_node.getResultType()));
auto to_string_function = FunctionFactory::instance().get("toString", std::move(context));
QueryTreeNodes arguments{std::move(arg)};
function_node.resolveAsFunction(std::move(to_string_function), std::make_shared<DataTypeString>());
function_node.getArguments().getNodes() = std::move(arguments);
}
class ConvertStringsToEnumVisitor : public InDepthQueryTreeVisitor<ConvertStringsToEnumVisitor>
{
public:
explicit ConvertStringsToEnumVisitor(ContextPtr context_)
: context(std::move(context_))
{
}
void visitImpl(QueryTreeNodePtr & node)
{
auto * function_node = node->as<FunctionNode>();
if (!function_node)
return;
/// to preserve return type (String) of the current function_node, we wrap the newly
/// generated function nodes into toString
std::string_view function_name = function_node->getFunctionName();
if (function_name == "if")
{
if (function_node->getArguments().getNodes().size() != 3)
return;
auto modified_if_node = function_node->clone();
auto & argument_nodes = modified_if_node->as<FunctionNode>()->getArguments().getNodes();
const auto * first_literal = argument_nodes[1]->as<ConstantNode>();
const auto * second_literal = argument_nodes[2]->as<ConstantNode>();
if (!first_literal || !second_literal)
return;
if (!isString(first_literal->getResultType()) || !isString(second_literal->getResultType()))
return;
std::set<std::string> string_values;
string_values.insert(first_literal->getValue().get<std::string>());
string_values.insert(second_literal->getValue().get<std::string>());
changeIfArguments(argument_nodes[1], argument_nodes[2], string_values, context);
wrapIntoToString(*function_node, std::move(modified_if_node), context);
return;
}
if (function_name == "transform")
{
if (function_node->getArguments().getNodes().size() != 4)
return;
auto modified_transform_node = function_node->clone();
auto & argument_nodes = modified_transform_node->as<FunctionNode>()->getArguments().getNodes();
if (!isString(function_node->getResultType()))
return;
const auto * literal_to = argument_nodes[2]->as<ConstantNode>();
const auto * literal_default = argument_nodes[3]->as<ConstantNode>();
if (!literal_to || !literal_default)
return;
if (!isArray(literal_to->getResultType()) || !isString(literal_default->getResultType()))
return;
auto array_to = literal_to->getValue().get<Array>();
if (array_to.empty())
return;
if (!std::all_of(
array_to.begin(),
array_to.end(),
[](const auto & field) { return field.getType() == Field::Types::Which::String; }))
return;
/// collect possible string values
std::set<std::string> string_values;
for (const auto & value : array_to)
string_values.insert(value.get<std::string>());
string_values.insert(literal_default->getValue().get<std::string>());
changeTransformArguments(argument_nodes[2], argument_nodes[3], string_values, context);
wrapIntoToString(*function_node, std::move(modified_transform_node), context);
return;
}
}
private:
ContextPtr context;
};
}
void IfTransformStringsToEnumPass::run(QueryTreeNodePtr query, ContextPtr context)
{
ConvertStringsToEnumVisitor visitor(context);
visitor.visit(query);
}
}

View File

@ -0,0 +1,39 @@
#pragma once
#include <Analyzer/IQueryTreePass.h>
namespace DB
{
/**
* This pass replaces string-type arguments in If and Transform to enum.
*
* E.g.
* -------------------------------
* SELECT if(number > 5, 'a', 'b')
* FROM system.numbers;
*
* will be transformed into
*
* SELECT if(number > 5, _CAST('a', 'Enum8(\'a\' = 1, \'b\' = 2)'), _CAST('b', 'Enum8(\'a\' = 1, \'b\' = 2)'))
* FROM system.numbers;
* -------------------------------
* SELECT transform(number, [2, 4], ['a', 'b'], 'c') FROM system.numbers;
*
* will be transformed into
*
* SELECT transform(number, [2, 4], _CAST(['a', 'b'], 'Array(Enum8(\'a\' = 1, \'b\' = 2, \'c\' = 3)'), _CAST('c', 'Enum8(\'a\' = 1, \'b\' = 2, \'c\' = 3)'))
* FROM system.numbers;
* -------------------------------
*/
class IfTransformStringsToEnumPass final : public IQueryTreePass
{
public:
String getName() override { return "IfTransformStringsToEnumPass"; }
String getDescription() override { return "Replaces string-type arguments in If and Transform to enum"; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
};
}

View File

@ -14,6 +14,7 @@
#include <Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.h>
#include <Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.h>
#include <Analyzer/Passes/FuseFunctionsPass.h>
#include <Analyzer/Passes/IfTransformStringsToEnumPass.h>
#include <IO/WriteHelpers.h>
#include <IO/Operators.h>
@ -77,7 +78,6 @@ public:
* TODO: Support setting optimize_duplicate_order_by_and_distinct.
* TODO: Support setting optimize_redundant_functions_in_order_by.
* TODO: Support setting optimize_monotonous_functions_in_order_by.
* TODO: Support setting optimize_if_transform_strings_to_enum.
* TODO: Support settings.optimize_or_like_chain.
* TODO: Add optimizations based on function semantics. Example: SELECT * FROM test_table WHERE id != id. (id is not nullable column).
*/
@ -193,6 +193,9 @@ void addQueryTreePasses(QueryTreePassManager & manager)
if (settings.optimize_syntax_fuse_functions)
manager.addPass(std::make_unique<FuseFunctionsPass>());
if (settings.optimize_if_transform_strings_to_enum)
manager.addPass(std::make_unique<IfTransformStringsToEnumPass>());
}
}

View File

@ -91,7 +91,8 @@ bool SortNode::isEqualImpl(const IQueryTreeNode & rhs) const
void SortNode::updateTreeHashImpl(HashState & hash_state) const
{
hash_state.update(sort_direction);
hash_state.update(nulls_sort_direction);
/// use some determined value if `nulls_sort_direction` is `nullopt`
hash_state.update(nulls_sort_direction.value_or(sort_direction));
hash_state.update(with_fill);
if (collator)

View File

@ -129,7 +129,7 @@ BackupWriterS3::BackupWriterS3(
, request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings)
, log(&Poco::Logger::get("BackupWriterS3"))
{
request_settings.updateFromSettingsIfEmpty(context_->getSettingsRef());
request_settings.updateFromSettings(context_->getSettingsRef());
request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint
}
@ -179,15 +179,6 @@ void BackupWriterS3::copyObjectImpl(
}
Aws::S3::Model::HeadObjectOutcome BackupWriterS3::requestObjectHeadData(const std::string & bucket_from, const std::string & key) const
{
Aws::S3::Model::HeadObjectRequest request;
request.SetBucket(bucket_from);
request.SetKey(key);
return client->HeadObject(request);
}
void BackupWriterS3::copyObjectMultipartImpl(
const String & src_bucket,
const String & src_key,
@ -219,20 +210,21 @@ void BackupWriterS3::copyObjectMultipartImpl(
std::vector<String> part_tags;
size_t position = 0;
size_t upload_part_size = request_settings.min_upload_part_size;
const auto & settings = request_settings.getUploadSettings();
size_t upload_part_size = settings.min_upload_part_size;
for (size_t part_number = 1; position < size; ++part_number)
{
/// Check that part number is not too big.
if (part_number > request_settings.max_part_number)
if (part_number > settings.max_part_number)
{
throw Exception(
ErrorCodes::INVALID_CONFIG_PARAMETER,
"Part number exceeded {} while writing {} bytes to S3. Check min_upload_part_size = {}, max_upload_part_size = {}, "
"upload_part_size_multiply_factor = {}, upload_part_size_multiply_parts_count_threshold = {}, max_single_operation_copy_size = {}",
request_settings.max_part_number, size, request_settings.min_upload_part_size, request_settings.max_upload_part_size,
request_settings.upload_part_size_multiply_factor, request_settings.upload_part_size_multiply_parts_count_threshold,
request_settings.max_single_operation_copy_size);
settings.max_part_number, size, settings.min_upload_part_size, settings.max_upload_part_size,
settings.upload_part_size_multiply_factor, settings.upload_part_size_multiply_parts_count_threshold,
settings.max_single_operation_copy_size);
}
size_t next_position = std::min(position + upload_part_size, size);
@ -265,10 +257,10 @@ void BackupWriterS3::copyObjectMultipartImpl(
position = next_position;
/// Maybe increase `upload_part_size` (we need to increase it sometimes to keep `part_number` less or equal than `max_part_number`).
if (part_number % request_settings.upload_part_size_multiply_parts_count_threshold == 0)
if (part_number % settings.upload_part_size_multiply_parts_count_threshold == 0)
{
upload_part_size *= request_settings.upload_part_size_multiply_factor;
upload_part_size = std::min(upload_part_size, request_settings.max_upload_part_size);
upload_part_size *= settings.upload_part_size_multiply_factor;
upload_part_size = std::min(upload_part_size, settings.max_upload_part_size);
}
}
@ -310,8 +302,8 @@ void BackupWriterS3::copyFileNative(DiskPtr from_disk, const String & file_name_
std::string source_bucket = object_storage->getObjectsNamespace();
auto file_path = fs::path(s3_uri.key) / file_name_to;
auto head = requestObjectHeadData(source_bucket, objects[0].absolute_path).GetResult();
if (static_cast<size_t>(head.GetContentLength()) < request_settings.max_single_operation_copy_size)
auto head = S3::headObject(*client, source_bucket, objects[0].absolute_path).GetResult();
if (static_cast<size_t>(head.GetContentLength()) < request_settings.getUploadSettings().max_single_operation_copy_size)
{
copyObjectImpl(
source_bucket, objects[0].absolute_path, s3_uri.bucket, file_path, head);

View File

@ -62,8 +62,6 @@ public:
void copyFileNative(DiskPtr from_disk, const String & file_name_from, const String & file_name_to) override;
private:
Aws::S3::Model::HeadObjectOutcome requestObjectHeadData(const std::string & bucket_from, const std::string & key) const;
void copyObjectImpl(
const String & src_bucket,
const String & src_key,

View File

@ -181,6 +181,7 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
/// For ON CLUSTER queries we will need to change some settings.
/// For ASYNC queries we have to clone the context anyway.
context_in_use = mutable_context = Context::createCopy(context);
mutable_context->makeQueryContext();
}
if (backup_settings.async)
@ -400,6 +401,7 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt
/// For ON CLUSTER queries we will need to change some settings.
/// For ASYNC queries we have to clone the context anyway.
context_in_use = Context::createCopy(context);
context_in_use->makeQueryContext();
}
if (restore_settings.async)

View File

@ -346,7 +346,7 @@ void RestorerFromBackup::findTableInBackup(const QualifiedTableName & table_name
res_table_info.has_data = backup->hasFiles(data_path_in_backup);
res_table_info.data_path_in_backup = data_path_in_backup;
tables_dependencies.addDependencies(table_name, getDependenciesFromCreateQuery(context->getGlobalContext(), table_name, create_table_query));
tables_dependencies.addDependencies(table_name, getDependenciesFromCreateQuery(context, table_name, create_table_query));
if (partitions)
{
@ -674,6 +674,7 @@ void RestorerFromBackup::removeUnresolvedDependencies()
void RestorerFromBackup::createTables()
{
/// We need to create tables considering their dependencies.
tables_dependencies.log();
auto tables_to_create = tables_dependencies.getTablesSortedByDependency();
for (const auto & table_id : tables_to_create)
{

View File

@ -88,6 +88,7 @@ add_headers_and_sources(clickhouse_common_io Common)
add_headers_and_sources(clickhouse_common_io Common/HashTable)
add_headers_and_sources(clickhouse_common_io IO)
add_headers_and_sources(clickhouse_common_io IO/Archives)
add_headers_and_sources(clickhouse_common_io IO/Resource)
add_headers_and_sources(clickhouse_common_io IO/S3)
list (REMOVE_ITEM clickhouse_common_io_sources Common/malloc.cpp Common/new_delete.cpp)
@ -106,6 +107,7 @@ if (TARGET ch_contrib::nats_io)
endif()
add_headers_and_sources(dbms Storages/MeiliSearch)
add_headers_and_sources(dbms Storages/NamedCollections)
if (TARGET ch_contrib::amqp_cpp)
add_headers_and_sources(dbms Storages/RabbitMQ)

View File

@ -0,0 +1,16 @@
#pragma once
#include <cstddef>
/// This is a structure which is returned by MemoryTracker.
/// Methods onAlloc/onFree should be called after actual memory allocation if it succeed.
/// For now, it will only collect allocation trace with sample_probability.
struct AllocationTrace
{
AllocationTrace() = default;
explicit AllocationTrace(double sample_probability_);
void onAlloc(void * ptr, size_t size) const;
void onFree(void * ptr, size_t size) const;
double sample_probability = 0;
};

View File

@ -92,8 +92,10 @@ public:
void * alloc(size_t size, size_t alignment = 0)
{
checkSize(size);
CurrentMemoryTracker::alloc(size);
return allocNoTrack(size, alignment);
auto trace = CurrentMemoryTracker::alloc(size);
void * ptr = allocNoTrack(size, alignment);
trace.onAlloc(ptr, size);
return ptr;
}
/// Free memory range.
@ -103,7 +105,8 @@ public:
{
checkSize(size);
freeNoTrack(buf, size);
CurrentMemoryTracker::free(size);
auto trace = CurrentMemoryTracker::free(size);
trace.onFree(buf, size);
}
catch (...)
{
@ -129,13 +132,16 @@ public:
&& alignment <= MALLOC_MIN_ALIGNMENT)
{
/// Resize malloc'd memory region with no special alignment requirement.
CurrentMemoryTracker::realloc(old_size, new_size);
auto trace = CurrentMemoryTracker::realloc(old_size, new_size);
trace.onFree(buf, old_size);
void * new_buf = ::realloc(buf, new_size);
if (nullptr == new_buf)
DB::throwFromErrno(fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
buf = new_buf;
trace.onAlloc(buf, new_size);
if constexpr (clear_memory)
if (new_size > old_size)
memset(reinterpret_cast<char *>(buf) + old_size, 0, new_size - old_size);
@ -143,7 +149,8 @@ public:
else if (old_size >= MMAP_THRESHOLD && new_size >= MMAP_THRESHOLD)
{
/// Resize mmap'd memory region.
CurrentMemoryTracker::realloc(old_size, new_size);
auto trace = CurrentMemoryTracker::realloc(old_size, new_size);
trace.onFree(buf, old_size);
// On apple and freebsd self-implemented mremap used (common/mremap.h)
buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE,
@ -152,14 +159,17 @@ public:
DB::throwFromErrno(fmt::format("Allocator: Cannot mremap memory chunk from {} to {}.",
ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_MREMAP);
trace.onAlloc(buf, new_size);
/// No need for zero-fill, because mmap guarantees it.
}
else if (new_size < MMAP_THRESHOLD)
{
/// Small allocs that requires a copy. Assume there's enough memory in system. Call CurrentMemoryTracker once.
CurrentMemoryTracker::realloc(old_size, new_size);
auto trace = CurrentMemoryTracker::realloc(old_size, new_size);
trace.onFree(buf, old_size);
void * new_buf = allocNoTrack(new_size, alignment);
trace.onAlloc(new_buf, new_size);
memcpy(new_buf, buf, std::min(old_size, new_size));
freeNoTrack(buf, old_size);
buf = new_buf;

View File

@ -30,21 +30,24 @@ struct AllocatorWithMemoryTracking
throw std::bad_alloc();
size_t bytes = n * sizeof(T);
CurrentMemoryTracker::alloc(bytes);
auto trace = CurrentMemoryTracker::alloc(bytes);
T * p = static_cast<T *>(malloc(bytes));
if (!p)
throw std::bad_alloc();
trace.onAlloc(p, bytes);
return p;
}
void deallocate(T * p, size_t n) noexcept
{
free(p);
size_t bytes = n * sizeof(T);
CurrentMemoryTracker::free(bytes);
free(p);
auto trace = CurrentMemoryTracker::free(bytes);
trace.onFree(p, bytes);
}
};

View File

@ -0,0 +1,76 @@
#pragma once
#include <Common/hex.h>
namespace DB
{
static void inline hexStringDecode(const char * pos, const char * end, char *& out, size_t word_size = 2)
{
if ((end - pos) & 1)
{
*out = unhex(*pos);
++out;
++pos;
}
while (pos < end)
{
*out = unhex2(pos);
pos += word_size;
++out;
}
*out = '\0';
++out;
}
static void inline binStringDecode(const char * pos, const char * end, char *& out)
{
if (pos == end)
{
*out = '\0';
++out;
return;
}
UInt8 left = 0;
/// end - pos is the length of input.
/// (length & 7) to make remain bits length mod 8 is zero to split.
/// e.g. the length is 9 and the input is "101000001",
/// first left_cnt is 1, left is 0, right shift, pos is 1, left = 1
/// then, left_cnt is 0, remain input is '01000001'.
for (UInt8 left_cnt = (end - pos) & 7; left_cnt > 0; --left_cnt)
{
left = left << 1;
if (*pos != '0')
left += 1;
++pos;
}
if (left != 0 || end - pos == 0)
{
*out = left;
++out;
}
assert((end - pos) % 8 == 0);
while (end - pos != 0)
{
UInt8 c = 0;
for (UInt8 i = 0; i < 8; ++i)
{
c = c << 1;
if (*pos != '0')
c += 1;
++pos;
}
*out = c;
++out;
}
*out = '\0';
++out;
}
}

View File

@ -37,7 +37,7 @@ MemoryTracker * getMemoryTracker()
using DB::current_thread;
void CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded)
AllocationTrace CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded)
{
#ifdef MEMORY_TRACKER_DEBUG_CHECKS
if (unlikely(memory_tracker_always_throw_logical_error_on_allocation))
@ -55,8 +55,9 @@ void CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded)
if (will_be > current_thread->untracked_memory_limit)
{
memory_tracker->allocImpl(will_be, throw_if_memory_exceeded);
auto res = memory_tracker->allocImpl(will_be, throw_if_memory_exceeded);
current_thread->untracked_memory = 0;
return res;
}
else
{
@ -68,36 +69,40 @@ void CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded)
/// total_memory_tracker only, ignore untracked_memory
else
{
memory_tracker->allocImpl(size, throw_if_memory_exceeded);
return memory_tracker->allocImpl(size, throw_if_memory_exceeded);
}
return AllocationTrace(memory_tracker->getSampleProbability());
}
return AllocationTrace(0);
}
void CurrentMemoryTracker::check()
{
if (auto * memory_tracker = getMemoryTracker())
memory_tracker->allocImpl(0, true);
std::ignore = memory_tracker->allocImpl(0, true);
}
void CurrentMemoryTracker::alloc(Int64 size)
AllocationTrace CurrentMemoryTracker::alloc(Int64 size)
{
bool throw_if_memory_exceeded = true;
allocImpl(size, throw_if_memory_exceeded);
return allocImpl(size, throw_if_memory_exceeded);
}
void CurrentMemoryTracker::allocNoThrow(Int64 size)
AllocationTrace CurrentMemoryTracker::allocNoThrow(Int64 size)
{
bool throw_if_memory_exceeded = false;
allocImpl(size, throw_if_memory_exceeded);
return allocImpl(size, throw_if_memory_exceeded);
}
void CurrentMemoryTracker::realloc(Int64 old_size, Int64 new_size)
AllocationTrace CurrentMemoryTracker::realloc(Int64 old_size, Int64 new_size)
{
Int64 addition = new_size - old_size;
addition > 0 ? alloc(addition) : free(-addition);
return addition > 0 ? alloc(addition) : free(-addition);
}
void CurrentMemoryTracker::free(Int64 size)
AllocationTrace CurrentMemoryTracker::free(Int64 size)
{
if (auto * memory_tracker = getMemoryTracker())
{
@ -106,15 +111,20 @@ void CurrentMemoryTracker::free(Int64 size)
current_thread->untracked_memory -= size;
if (current_thread->untracked_memory < -current_thread->untracked_memory_limit)
{
memory_tracker->free(-current_thread->untracked_memory);
Int64 untracked_memory = current_thread->untracked_memory;
current_thread->untracked_memory = 0;
return memory_tracker->free(-untracked_memory);
}
}
/// total_memory_tracker only, ignore untracked_memory
else
{
memory_tracker->free(size);
return memory_tracker->free(size);
}
return AllocationTrace(memory_tracker->getSampleProbability());
}
return AllocationTrace(0);
}

View File

@ -1,19 +1,20 @@
#pragma once
#include <base/types.h>
#include <Common/AllocationTrace.h>
/// Convenience methods, that use current thread's memory_tracker if it is available.
struct CurrentMemoryTracker
{
/// Call the following functions before calling of corresponding operations with memory allocators.
static void alloc(Int64 size);
static void allocNoThrow(Int64 size);
static void realloc(Int64 old_size, Int64 new_size);
[[nodiscard]] static AllocationTrace alloc(Int64 size);
[[nodiscard]] static AllocationTrace allocNoThrow(Int64 size);
[[nodiscard]] static AllocationTrace realloc(Int64 old_size, Int64 new_size);
/// This function should be called after memory deallocation.
static void free(Int64 size);
[[nodiscard]] static AllocationTrace free(Int64 size);
static void check();
private:
static void allocImpl(Int64 size, bool throw_if_memory_exceeded);
[[nodiscard]] static AllocationTrace allocImpl(Int64 size, bool throw_if_memory_exceeded);
};

Some files were not shown because too many files have changed in this diff Show More