Merge branch 'master' into feature-server-iface-metrics

This commit is contained in:
Alexey Milovidov 2023-12-29 22:37:52 +01:00 committed by GitHub
commit aa462fa7cd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
530 changed files with 11652 additions and 4187 deletions

View File

@ -157,7 +157,8 @@ jobs:
##################################### BUILD REPORTER #######################################
############################################################################################
BuilderReport:
if: ${{ !failure() && !cancelled() }}
# run report check for failed builds to indicate the CI error
if: ${{ !cancelled() }}
needs:
- RunConfig
- BuilderDebAarch64
@ -177,7 +178,8 @@ jobs:
run_command: |
python3 build_report_check.py "$CHECK_NAME"
BuilderSpecialReport:
if: ${{ !failure() && !cancelled() }}
# run report check for failed builds to indicate the CI error
if: ${{ !cancelled() }}
needs:
- RunConfig
- BuilderBinDarwin

View File

@ -262,6 +262,8 @@ jobs:
##################################### BUILD REPORTER #######################################
############################################################################################
BuilderReport:
# run report check for failed builds to indicate the CI error
if: ${{ !cancelled() }}
needs:
- RunConfig
- BuilderBinRelease
@ -272,7 +274,6 @@ jobs:
- BuilderDebRelease
- BuilderDebTsan
- BuilderDebUBsan
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse build check
@ -285,7 +286,8 @@ jobs:
run_command: |
python3 build_report_check.py "$CHECK_NAME"
BuilderSpecialReport:
if: ${{ !failure() && !cancelled() }}
# run report check for failed builds to indicate the CI error
if: ${{ !cancelled() }}
needs:
- RunConfig
- BuilderBinAarch64

View File

@ -291,6 +291,8 @@ jobs:
##################################### BUILD REPORTER #######################################
############################################################################################
BuilderReport:
# run report check for failed builds to indicate the CI error
if: ${{ !cancelled() }}
needs:
- RunConfig
- BuilderBinRelease
@ -301,7 +303,6 @@ jobs:
- BuilderDebRelease
- BuilderDebTsan
- BuilderDebUBsan
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse build check
@ -314,7 +315,8 @@ jobs:
run_command: |
python3 build_report_check.py "$CHECK_NAME"
BuilderSpecialReport:
if: ${{ !failure() && !cancelled() }}
# run report check for failed builds to indicate the CI error
if: ${{ !cancelled() }}
needs:
- RunConfig
- BuilderBinAarch64

View File

@ -172,6 +172,8 @@ jobs:
##################################### BUILD REPORTER #######################################
############################################################################################
BuilderReport:
# run report check for failed builds to indicate the CI error
if: ${{ !cancelled() }}
needs:
- RunConfig
- BuilderDebRelease
@ -181,7 +183,6 @@ jobs:
- BuilderDebUBsan
- BuilderDebMsan
- BuilderDebDebug
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse build check
@ -194,7 +195,8 @@ jobs:
run_command: |
python3 build_report_check.py "$CHECK_NAME"
BuilderSpecialReport:
if: ${{ !failure() && !cancelled() }}
# run report check for failed builds to indicate the CI error
if: ${{ !cancelled() }}
needs:
- RunConfig
- BuilderDebRelease

View File

@ -76,6 +76,8 @@ jobs:
run: |
python3 "$GITHUB_WORKSPACE/tests/ci/build_check.py" "$BUILD_NAME"
- name: Post
# it still be build report to upload for failed build job
if: always()
run: |
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --post --job-name '${{inputs.build_name}}'
- name: Mark as done

10
.gitmessage Normal file
View File

@ -0,0 +1,10 @@
## To avoid merge commit in CI run (add a leading space to apply):
#no-merge-commit
## Running specified job (add a leading space to apply):
#job_<JOB NAME>
#job_stateless_tests_release
#job_package_debug
#job_integration_tests_asan

3
.gitmodules vendored
View File

@ -360,6 +360,3 @@
[submodule "contrib/sqids-cpp"]
path = contrib/sqids-cpp
url = https://github.com/sqids/sqids-cpp.git
[submodule "contrib/idna"]
path = contrib/idna
url = https://github.com/ada-url/idna.git

View File

@ -1,4 +1,5 @@
### Table of Contents
**[ClickHouse release v23.12, 2023-12-28](#2312)**<br/>
**[ClickHouse release v23.11, 2023-12-06](#2311)**<br/>
**[ClickHouse release v23.10, 2023-11-02](#2310)**<br/>
**[ClickHouse release v23.9, 2023-09-28](#239)**<br/>
@ -14,6 +15,146 @@
# 2023 Changelog
### <a id="2312"></a> ClickHouse release 23.12, 2023-12-28
#### Backward Incompatible Change
* Fix check for non-deterministic functions in TTL expressions. Previously, you could create a TTL expression with non-deterministic functions in some cases, which could lead to undefined behavior later. This fixes [#37250](https://github.com/ClickHouse/ClickHouse/issues/37250). Disallow TTL expressions that don't depend on any columns of a table by default. It can be allowed back by `SET allow_suspicious_ttl_expressions = 1` or `SET compatibility = '23.11'`. Closes [#37286](https://github.com/ClickHouse/ClickHouse/issues/37286). [#51858](https://github.com/ClickHouse/ClickHouse/pull/51858) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* The MergeTree setting `clean_deleted_rows` is deprecated, it has no effect anymore. The `CLEANUP` keyword for the `OPTIMIZE` is not allowed by default (it can be unlocked with the `allow_experimental_replacing_merge_with_cleanup` setting). [#58267](https://github.com/ClickHouse/ClickHouse/pull/58267) ([Alexander Tokmakov](https://github.com/tavplubix)). This fixes [#57930](https://github.com/ClickHouse/ClickHouse/issues/57930). This closes [#54988](https://github.com/ClickHouse/ClickHouse/issues/54988). This closes [#54570](https://github.com/ClickHouse/ClickHouse/issues/54570). This closes [#50346](https://github.com/ClickHouse/ClickHouse/issues/50346). This closes [#47579](https://github.com/ClickHouse/ClickHouse/issues/47579). The feature has to be removed because it is not good. We have to remove it as quickly as possible, because there is no other option. [#57932](https://github.com/ClickHouse/ClickHouse/pull/57932) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### New Feature
* Implement Refreshable Materialized Views, requested in [#33919](https://github.com/ClickHouse/ClickHouse/issues/57995). [#56946](https://github.com/ClickHouse/ClickHouse/pull/56946) ([Michael Kolupaev](https://github.com/al13n321), [Michael Guzov](https://github.com/koloshmet)).
* Introduce `PASTE JOIN`, which allows users to join tables without `ON` clause simply by row numbers. Example: `SELECT * FROM (SELECT number AS a FROM numbers(2)) AS t1 PASTE JOIN (SELECT number AS a FROM numbers(2) ORDER BY a DESC) AS t2`. [#57995](https://github.com/ClickHouse/ClickHouse/pull/57995) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* The `ORDER BY` clause now supports specifying `ALL`, meaning that ClickHouse sorts by all columns in the `SELECT` clause. Example: `SELECT col1, col2 FROM tab WHERE [...] ORDER BY ALL`. [#57875](https://github.com/ClickHouse/ClickHouse/pull/57875) ([zhongyuankai](https://github.com/zhongyuankai)).
* Added a new mutation command `ALTER TABLE <table> APPLY DELETED MASK`, which allows to enforce applying of mask written by lightweight delete and to remove rows marked as deleted from disk. [#57433](https://github.com/ClickHouse/ClickHouse/pull/57433) ([Anton Popov](https://github.com/CurtizJ)).
* A handler `/binary` opens a visual viewer of symbols inside the ClickHouse binary. [#58211](https://github.com/ClickHouse/ClickHouse/pull/58211) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Added a new SQL function `sqid` to generate Sqids (https://sqids.org/), example: `SELECT sqid(125, 126)`. [#57512](https://github.com/ClickHouse/ClickHouse/pull/57512) ([Robert Schulze](https://github.com/rschu1ze)).
* Add a new function `seriesPeriodDetectFFT` to detect series period using FFT. [#57574](https://github.com/ClickHouse/ClickHouse/pull/57574) ([Bhavna Jindal](https://github.com/bhavnajindal)).
* Add an HTTP endpoint for checking if Keeper is ready to accept traffic. [#55876](https://github.com/ClickHouse/ClickHouse/pull/55876) ([Konstantin Bogdanov](https://github.com/thevar1able)).
* Add 'union' mode for schema inference. In this mode the resulting table schema is the union of all files schemas (so schema is inferred from each file). The mode of schema inference is controlled by a setting `schema_inference_mode` with two possible values - `default` and `union`. Closes [#55428](https://github.com/ClickHouse/ClickHouse/issues/55428). [#55892](https://github.com/ClickHouse/ClickHouse/pull/55892) ([Kruglov Pavel](https://github.com/Avogar)).
* Add new setting `input_format_csv_try_infer_numbers_from_strings` that allows to infer numbers from strings in CSV format. Closes [#56455](https://github.com/ClickHouse/ClickHouse/issues/56455). [#56859](https://github.com/ClickHouse/ClickHouse/pull/56859) ([Kruglov Pavel](https://github.com/Avogar)).
* When the number of databases or tables exceeds a configurable threshold, show a warning to the user. [#57375](https://github.com/ClickHouse/ClickHouse/pull/57375) ([凌涛](https://github.com/lingtaolf)).
* Dictionary with `HASHED_ARRAY` (and `COMPLEX_KEY_HASHED_ARRAY`) layout supports `SHARDS` similarly to `HASHED`. [#57544](https://github.com/ClickHouse/ClickHouse/pull/57544) ([vdimir](https://github.com/vdimir)).
* Add asynchronous metrics for total primary key bytes and total allocated primary key bytes in memory. [#57551](https://github.com/ClickHouse/ClickHouse/pull/57551) ([Bharat Nallan](https://github.com/bharatnc)).
* Add `SHA512_256` function. [#57645](https://github.com/ClickHouse/ClickHouse/pull/57645) ([Bharat Nallan](https://github.com/bharatnc)).
* Add `FORMAT_BYTES` as an alias for `formatReadableSize`. [#57592](https://github.com/ClickHouse/ClickHouse/pull/57592) ([Bharat Nallan](https://github.com/bharatnc)).
* Allow passing optional session token to the `s3` table function. [#57850](https://github.com/ClickHouse/ClickHouse/pull/57850) ([Shani Elharrar](https://github.com/shanielh)).
* Introduce a new setting `http_make_head_request`. If it is turned off, the URL table engine will not do a HEAD request to determine the file size. This is needed to support inefficient, misconfigured, or not capable HTTP servers. [#54602](https://github.com/ClickHouse/ClickHouse/pull/54602) ([Fionera](https://github.com/fionera)).
* It is now possible to refer to ALIAS column in index (non-primary-key) definitions (issue [#55650](https://github.com/ClickHouse/ClickHouse/issues/55650)). Example: `CREATE TABLE tab(col UInt32, col_alias ALIAS col + 1, INDEX idx (col_alias) TYPE minmax) ENGINE = MergeTree ORDER BY col;`. [#57546](https://github.com/ClickHouse/ClickHouse/pull/57546) ([Robert Schulze](https://github.com/rschu1ze)).
* Added a new setting `readonly` which can be used to specify an S3 disk is read only. It can be useful to create a table on a disk of `s3_plain` type, while having read only access to the underlying S3 bucket. [#57977](https://github.com/ClickHouse/ClickHouse/pull/57977) ([Pengyuan Bian](https://github.com/bianpengyuan)).
* The primary key analysis in MergeTree tables will now be applied to predicates that include the virtual column `_part_offset` (optionally with `_part`). This feature can serve as a special kind of a secondary index. [#58224](https://github.com/ClickHouse/ClickHouse/pull/58224) ([Amos Bird](https://github.com/amosbird)).
#### Performance Improvement
* Extract non-intersecting parts ranges from MergeTree table during FINAL processing. That way we can avoid additional FINAL logic for this non-intersecting parts ranges. In case when amount of duplicate values with same primary key is low, performance will be almost the same as without FINAL. Improve reading performance for MergeTree FINAL when `do_not_merge_across_partitions_select_final` setting is set. [#58120](https://github.com/ClickHouse/ClickHouse/pull/58120) ([Maksim Kita](https://github.com/kitaisreal)).
* Made copy between s3 disks using a s3-server-side copy instead of copying through the buffer. Improves `BACKUP/RESTORE` operations and `clickhouse-disks copy` command. [#56744](https://github.com/ClickHouse/ClickHouse/pull/56744) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
* Hash JOIN respects setting `max_joined_block_size_rows` and do not produce large blocks for `ALL JOIN`. [#56996](https://github.com/ClickHouse/ClickHouse/pull/56996) ([vdimir](https://github.com/vdimir)).
* Release memory for aggregation earlier. This may avoid unnecessary external aggregation. [#57691](https://github.com/ClickHouse/ClickHouse/pull/57691) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Improve performance of string serialization. [#57717](https://github.com/ClickHouse/ClickHouse/pull/57717) ([Maksim Kita](https://github.com/kitaisreal)).
* Support trivial count optimization for `Merge`-engine tables. [#57867](https://github.com/ClickHouse/ClickHouse/pull/57867) ([skyoct](https://github.com/skyoct)).
* Optimized aggregation in some cases. [#57872](https://github.com/ClickHouse/ClickHouse/pull/57872) ([Anton Popov](https://github.com/CurtizJ)).
* The `hasAny` function can now take advantage of the full-text skipping indices. [#57878](https://github.com/ClickHouse/ClickHouse/pull/57878) ([Jpnock](https://github.com/Jpnock)).
* Function `if(cond, then, else)` (and its alias `cond ? then : else`) were optimized to use branch-free evaluation. [#57885](https://github.com/ClickHouse/ClickHouse/pull/57885) ([zhanglistar](https://github.com/zhanglistar)).
* MergeTree automatically derive `do_not_merge_across_partitions_select_final` setting if partition key expression contains only columns from primary key expression. [#58218](https://github.com/ClickHouse/ClickHouse/pull/58218) ([Maksim Kita](https://github.com/kitaisreal)).
* Speedup `MIN` and `MAX` for native types. [#58231](https://github.com/ClickHouse/ClickHouse/pull/58231) ([Raúl Marín](https://github.com/Algunenano)).
* Implement `SLRU` cache policy for filesystem cache. [#57076](https://github.com/ClickHouse/ClickHouse/pull/57076) ([Kseniia Sumarokova](https://github.com/kssenii)).
* The limit for the number of connections per endpoint for background fetches was raised from `15` to the value of `background_fetches_pool_size` setting. - MergeTree-level setting `replicated_max_parallel_fetches_for_host` became obsolete - MergeTree-level settings `replicated_fetches_http_connection_timeout`, `replicated_fetches_http_send_timeout` and `replicated_fetches_http_receive_timeout` are moved to the Server-level. - Setting `keep_alive_timeout` is added to the list of Server-level settings. [#57523](https://github.com/ClickHouse/ClickHouse/pull/57523) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Make querying `system.filesystem_cache` not memory intensive. [#57687](https://github.com/ClickHouse/ClickHouse/pull/57687) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Reduce memory usage on strings deserialization. [#57787](https://github.com/ClickHouse/ClickHouse/pull/57787) ([Maksim Kita](https://github.com/kitaisreal)).
* More efficient constructor for Enum - it makes sense when Enum has a boatload of values. [#57887](https://github.com/ClickHouse/ClickHouse/pull/57887) ([Duc Canh Le](https://github.com/canhld94)).
* An improvement for reading from the filesystem cache: always use `pread` method. [#57970](https://github.com/ClickHouse/ClickHouse/pull/57970) ([Nikita Taranov](https://github.com/nickitat)).
* Add optimization for AND notEquals chain in logical expression optimizer. This optimization is only available with the experimental Analyzer enabled. [#58214](https://github.com/ClickHouse/ClickHouse/pull/58214) ([Kevin Mingtarja](https://github.com/kevinmingtarja)).
#### Improvement
* Support for soft memory limit in Keeper. It will refuse requests if the memory usage is close to the maximum. [#57271](https://github.com/ClickHouse/ClickHouse/pull/57271) ([Han Fei](https://github.com/hanfei1991)). [#57699](https://github.com/ClickHouse/ClickHouse/pull/57699) ([Han Fei](https://github.com/hanfei1991)).
* Make inserts into distributed tables handle updated cluster configuration properly. When the list of cluster nodes is dynamically updated, the Directory Monitor of the distribution table will update it. [#42826](https://github.com/ClickHouse/ClickHouse/pull/42826) ([zhongyuankai](https://github.com/zhongyuankai)).
* Do not allow creating a replicated table with inconsistent merge parameters. [#56833](https://github.com/ClickHouse/ClickHouse/pull/56833) ([Duc Canh Le](https://github.com/canhld94)).
* Show uncompressed size in `system.tables`. [#56618](https://github.com/ClickHouse/ClickHouse/issues/56618). [#57186](https://github.com/ClickHouse/ClickHouse/pull/57186) ([Chen Lixiang](https://github.com/chenlx0)).
* Add `skip_unavailable_shards` as a setting for `Distributed` tables that is similar to the corresponding query-level setting. Closes [#43666](https://github.com/ClickHouse/ClickHouse/issues/43666). [#57218](https://github.com/ClickHouse/ClickHouse/pull/57218) ([Gagan Goel](https://github.com/tntnatbry)).
* The function `substring` (aliases: `substr`, `mid`) can now be used with `Enum` types. Previously, the first function argument had to be a value of type `String` or `FixedString`. This improves compatibility with 3rd party tools such as Tableau via MySQL interface. [#57277](https://github.com/ClickHouse/ClickHouse/pull/57277) ([Serge Klochkov](https://github.com/slvrtrn)).
* Function `format` now supports arbitrary argument types (instead of only `String` and `FixedString` arguments). This is important to calculate `SELECT format('The {0} to all questions is {1}', 'answer', 42)`. [#57549](https://github.com/ClickHouse/ClickHouse/pull/57549) ([Robert Schulze](https://github.com/rschu1ze)).
* Allows to use the `date_trunc` function with a case-insensitive first argument. Both cases are now supported: `SELECT date_trunc('day', now())` and `SELECT date_trunc('DAY', now())`. [#57624](https://github.com/ClickHouse/ClickHouse/pull/57624) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Better hints when a table doesn't exist. [#57342](https://github.com/ClickHouse/ClickHouse/pull/57342) ([Bharat Nallan](https://github.com/bharatnc)).
* Allow to overwrite `max_partition_size_to_drop` and `max_table_size_to_drop` server settings in query time. [#57452](https://github.com/ClickHouse/ClickHouse/pull/57452) ([Jordi Villar](https://github.com/jrdi)).
* Slightly better inference of unnamed tupes in JSON formats. [#57751](https://github.com/ClickHouse/ClickHouse/pull/57751) ([Kruglov Pavel](https://github.com/Avogar)).
* Add support for read-only flag when connecting to Keeper (fixes [#53749](https://github.com/ClickHouse/ClickHouse/issues/53749)). [#57479](https://github.com/ClickHouse/ClickHouse/pull/57479) ([Mikhail Koviazin](https://github.com/mkmkme)).
* Fix possible distributed sends stuck due to "No such file or directory" (during recovering a batch from disk). Fix possible issues with `error_count` from `system.distribution_queue` (in case of `distributed_directory_monitor_max_sleep_time_ms` >5min). Introduce profile event to track async INSERT failures - `DistributedAsyncInsertionFailures`. [#57480](https://github.com/ClickHouse/ClickHouse/pull/57480) ([Azat Khuzhin](https://github.com/azat)).
* Support PostgreSQL generated columns and default column values in `MaterializedPostgreSQL` (experimental feature). Closes [#40449](https://github.com/ClickHouse/ClickHouse/issues/40449). [#57568](https://github.com/ClickHouse/ClickHouse/pull/57568) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Allow to apply some filesystem cache config settings changes without server restart. [#57578](https://github.com/ClickHouse/ClickHouse/pull/57578) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Properly handling PostgreSQL table structure with empty array. [#57618](https://github.com/ClickHouse/ClickHouse/pull/57618) ([Mike Kot](https://github.com/myrrc)).
* Expose the total number of errors occurred since last server restart as a `ClickHouseErrorMetric_ALL` metric. [#57627](https://github.com/ClickHouse/ClickHouse/pull/57627) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Allow nodes in the configuration file with `from_env`/`from_zk` reference and non empty element with replace=1. [#57628](https://github.com/ClickHouse/ClickHouse/pull/57628) ([Azat Khuzhin](https://github.com/azat)).
* A table function `fuzzJSON` which allows generating a lot of malformed JSON for fuzzing. [#57646](https://github.com/ClickHouse/ClickHouse/pull/57646) ([Julia Kartseva](https://github.com/jkartseva)).
* Allow IPv6 to UInt128 conversion and binary arithmetic. [#57707](https://github.com/ClickHouse/ClickHouse/pull/57707) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Add a setting for `async inserts deduplication cache` - how long we wait for cache update. Deprecate setting `async_block_ids_cache_min_update_interval_ms`. Now cache is updated only in case of conflicts. [#57743](https://github.com/ClickHouse/ClickHouse/pull/57743) ([alesapin](https://github.com/alesapin)).
* `sleep()` function now can be cancelled with `KILL QUERY`. [#57746](https://github.com/ClickHouse/ClickHouse/pull/57746) ([Vitaly Baranov](https://github.com/vitlibar)).
* Forbid `CREATE TABLE ... AS SELECT` queries for `Replicated` table engines in the experimental `Replicated` database because they are not supported. Reference [#35408](https://github.com/ClickHouse/ClickHouse/issues/35408). [#57796](https://github.com/ClickHouse/ClickHouse/pull/57796) ([Nikolay Degterinsky](https://github.com/evillique)).
* Fix and improve transforming queries for external databases, to recursively obtain all compatible predicates. [#57888](https://github.com/ClickHouse/ClickHouse/pull/57888) ([flynn](https://github.com/ucasfl)).
* Support dynamic reloading of the filesystem cache size. Closes [#57866](https://github.com/ClickHouse/ClickHouse/issues/57866). [#57897](https://github.com/ClickHouse/ClickHouse/pull/57897) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Correctly support `system.stack_trace` for threads with blocked SIGRTMIN (these threads can exist in low-quality external libraries such as Apache rdkafka). [#57907](https://github.com/ClickHouse/ClickHouse/pull/57907) ([Azat Khuzhin](https://github.com/azat)). Aand also send signal to the threads only if it is not blocked to avoid waiting `storage_system_stack_trace_pipe_read_timeout_ms` when it does not make any sense. [#58136](https://github.com/ClickHouse/ClickHouse/pull/58136) ([Azat Khuzhin](https://github.com/azat)).
* Tolerate keeper failures in the quorum inserts' check. [#57986](https://github.com/ClickHouse/ClickHouse/pull/57986) ([Raúl Marín](https://github.com/Algunenano)).
* Add max/peak RSS (`MemoryResidentMax`) into system.asynchronous_metrics. [#58095](https://github.com/ClickHouse/ClickHouse/pull/58095) ([Azat Khuzhin](https://github.com/azat)).
* This PR allows users to use s3-style links (`https://` and `s3://`) without mentioning region if it's not default. Also find the correct region if the user mentioned the wrong one. [#58148](https://github.com/ClickHouse/ClickHouse/pull/58148) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* `clickhouse-format --obfuscate` will know about Settings, MergeTreeSettings, and time zones and keep their names unchanged. [#58179](https://github.com/ClickHouse/ClickHouse/pull/58179) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Added explicit `finalize()` function in `ZipArchiveWriter`. Simplify too complicated code in `ZipArchiveWriter`. This fixes [#58074](https://github.com/ClickHouse/ClickHouse/issues/58074). [#58202](https://github.com/ClickHouse/ClickHouse/pull/58202) ([Vitaly Baranov](https://github.com/vitlibar)).
* Make caches with the same path use the same cache objects. This behaviour existed before, but was broken in 23.4. If such caches with the same path have different set of cache settings, an exception will be thrown, that this is not allowed. [#58264](https://github.com/ClickHouse/ClickHouse/pull/58264) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Parallel replicas (experimental feature): friendly settings [#57542](https://github.com/ClickHouse/ClickHouse/pull/57542) ([Igor Nikonov](https://github.com/devcrafter)).
* Parallel replicas (experimental feature): announcement response handling improvement [#57749](https://github.com/ClickHouse/ClickHouse/pull/57749) ([Igor Nikonov](https://github.com/devcrafter)).
* Parallel replicas (experimental feature): give more respect to `min_number_of_marks` in `ParallelReplicasReadingCoordinator` [#57763](https://github.com/ClickHouse/ClickHouse/pull/57763) ([Nikita Taranov](https://github.com/nickitat)).
* Parallel replicas (experimental feature): disable parallel replicas with IN (subquery) [#58133](https://github.com/ClickHouse/ClickHouse/pull/58133) ([Igor Nikonov](https://github.com/devcrafter)).
* Parallel replicas (experimental feature): add profile event 'ParallelReplicasUsedCount' [#58173](https://github.com/ClickHouse/ClickHouse/pull/58173) ([Igor Nikonov](https://github.com/devcrafter)).
* Non POST requests such as HEAD will be readonly similar to GET. [#58060](https://github.com/ClickHouse/ClickHouse/pull/58060) ([San](https://github.com/santrancisco)).
* Add `bytes_uncompressed` column to `system.part_log` [#58167](https://github.com/ClickHouse/ClickHouse/pull/58167) ([Jordi Villar](https://github.com/jrdi)).
* Add base backup name to `system.backups` and `system.backup_log` tables [#58178](https://github.com/ClickHouse/ClickHouse/pull/58178) ([Pradeep Chhetri](https://github.com/chhetripradeep)).
* Add support for specifying query parameters in the command line in clickhouse-local [#58210](https://github.com/ClickHouse/ClickHouse/pull/58210) ([Pradeep Chhetri](https://github.com/chhetripradeep)).
#### Build/Testing/Packaging Improvement
* Randomize more settings [#39663](https://github.com/ClickHouse/ClickHouse/pull/39663) ([Anton Popov](https://github.com/CurtizJ)).
* Randomize disabled optimizations in CI [#57315](https://github.com/ClickHouse/ClickHouse/pull/57315) ([Raúl Marín](https://github.com/Algunenano)).
* Allow usage of Azure-related table engines/functions on macOS. [#51866](https://github.com/ClickHouse/ClickHouse/pull/51866) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* ClickHouse Fast Test now uses Musl instead of GLibc. [#57711](https://github.com/ClickHouse/ClickHouse/pull/57711) ([Alexey Milovidov](https://github.com/alexey-milovidov)). The fully-static Musl build is available to download from the CI.
* Run ClickBench for every commit. This closes [#57708](https://github.com/ClickHouse/ClickHouse/issues/57708). [#57712](https://github.com/ClickHouse/ClickHouse/pull/57712) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove the usage of a harmful C/POSIX `select` function from external libraries. [#57467](https://github.com/ClickHouse/ClickHouse/pull/57467) ([Igor Nikonov](https://github.com/devcrafter)).
* Settings only available in ClickHouse Cloud will be also present in the open-source ClickHouse build for convenience. [#57638](https://github.com/ClickHouse/ClickHouse/pull/57638) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fixed a possibility of sorting order breakage in TTL GROUP BY [#49103](https://github.com/ClickHouse/ClickHouse/pull/49103) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Fix: split `lttb` bucket strategy, first bucket and last bucket should only contain single point [#57003](https://github.com/ClickHouse/ClickHouse/pull/57003) ([FFish](https://github.com/wxybear)).
* Fix possible deadlock in the `Template` format during sync after error [#57004](https://github.com/ClickHouse/ClickHouse/pull/57004) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix early stop while parsing a file with skipping lots of errors [#57006](https://github.com/ClickHouse/ClickHouse/pull/57006) ([Kruglov Pavel](https://github.com/Avogar)).
* Prevent dictionary's ACL bypass via the `dictionary` table function [#57362](https://github.com/ClickHouse/ClickHouse/pull/57362) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
* Fix another case of a "non-ready set" error found by Fuzzer. [#57423](https://github.com/ClickHouse/ClickHouse/pull/57423) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix several issues regarding PostgreSQL `array_ndims` usage. [#57436](https://github.com/ClickHouse/ClickHouse/pull/57436) ([Ryan Jacobs](https://github.com/ryanmjacobs)).
* Fix RWLock inconsistency after write lock timeout [#57454](https://github.com/ClickHouse/ClickHouse/pull/57454) ([Vitaly Baranov](https://github.com/vitlibar)). Fix RWLock inconsistency after write lock timeout (again) [#57733](https://github.com/ClickHouse/ClickHouse/pull/57733) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix: don't exclude ephemeral column when building pushing to view chain [#57461](https://github.com/ClickHouse/ClickHouse/pull/57461) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* MaterializedPostgreSQL (experimental issue): fix issue [#41922](https://github.com/ClickHouse/ClickHouse/issues/41922), add test for [#41923](https://github.com/ClickHouse/ClickHouse/issues/41923) [#57515](https://github.com/ClickHouse/ClickHouse/pull/57515) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Ignore ON CLUSTER clause in grant/revoke queries for management of replicated access entities. [#57538](https://github.com/ClickHouse/ClickHouse/pull/57538) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
* Fix crash in clickhouse-local [#57553](https://github.com/ClickHouse/ClickHouse/pull/57553) ([Nikolay Degterinsky](https://github.com/evillique)).
* A fix for Hash JOIN. [#57564](https://github.com/ClickHouse/ClickHouse/pull/57564) ([vdimir](https://github.com/vdimir)).
* Fix possible error in PostgreSQL source [#57567](https://github.com/ClickHouse/ClickHouse/pull/57567) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix type correction in Hash JOIN for nested LowCardinality. [#57614](https://github.com/ClickHouse/ClickHouse/pull/57614) ([vdimir](https://github.com/vdimir)).
* Avoid hangs of `system.stack_trace` by correctly prohibiting parallel reading from it. [#57641](https://github.com/ClickHouse/ClickHouse/pull/57641) ([Azat Khuzhin](https://github.com/azat)).
* Fix an error for aggregation of sparse columns with `any(...) RESPECT NULL` [#57710](https://github.com/ClickHouse/ClickHouse/pull/57710) ([Azat Khuzhin](https://github.com/azat)).
* Fix unary operators parsing [#57713](https://github.com/ClickHouse/ClickHouse/pull/57713) ([Nikolay Degterinsky](https://github.com/evillique)).
* Fix dependency loading for the experimental table engine `MaterializedPostgreSQL`. [#57754](https://github.com/ClickHouse/ClickHouse/pull/57754) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix retries for disconnected nodes for BACKUP/RESTORE ON CLUSTER [#57764](https://github.com/ClickHouse/ClickHouse/pull/57764) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix result of external aggregation in case of partially materialized projection [#57790](https://github.com/ClickHouse/ClickHouse/pull/57790) ([Anton Popov](https://github.com/CurtizJ)).
* Fix merge in aggregation functions with `*Map` combinator [#57795](https://github.com/ClickHouse/ClickHouse/pull/57795) ([Anton Popov](https://github.com/CurtizJ)).
* Disable `system.kafka_consumers` because it has a bug. [#57822](https://github.com/ClickHouse/ClickHouse/pull/57822) ([Azat Khuzhin](https://github.com/azat)).
* Fix LowCardinality keys support in Merge JOIN. [#57827](https://github.com/ClickHouse/ClickHouse/pull/57827) ([vdimir](https://github.com/vdimir)).
* A fix for `InterpreterCreateQuery` related to the sample block. [#57855](https://github.com/ClickHouse/ClickHouse/pull/57855) ([Maksim Kita](https://github.com/kitaisreal)).
* `addresses_expr` were ignored for named collections from PostgreSQL. [#57874](https://github.com/ClickHouse/ClickHouse/pull/57874) ([joelynch](https://github.com/joelynch)).
* Fix invalid memory access in BLAKE3 (Rust) [#57876](https://github.com/ClickHouse/ClickHouse/pull/57876) ([Raúl Marín](https://github.com/Algunenano)). Then it was rewritten from Rust to C++ for better [memory-safety](https://www.memorysafety.org/). [#57994](https://github.com/ClickHouse/ClickHouse/pull/57994) ([Raúl Marín](https://github.com/Algunenano)).
* Normalize function names in `CREATE INDEX` [#57906](https://github.com/ClickHouse/ClickHouse/pull/57906) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix handling of unavailable replicas before first request happened [#57933](https://github.com/ClickHouse/ClickHouse/pull/57933) ([Nikita Taranov](https://github.com/nickitat)).
* Fix literal alias misclassification [#57988](https://github.com/ClickHouse/ClickHouse/pull/57988) ([Chen768959](https://github.com/Chen768959)).
* Fix invalid preprocessing on Keeper [#58069](https://github.com/ClickHouse/ClickHouse/pull/58069) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix integer overflow in the `Poco` library, related to `UTF32Encoding` [#58073](https://github.com/ClickHouse/ClickHouse/pull/58073) ([Andrey Fedotov](https://github.com/anfedotoff)).
* Fix parallel replicas (experimental feature) in presence of a scalar subquery with a big integer value [#58118](https://github.com/ClickHouse/ClickHouse/pull/58118) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix `accurateCastOrNull` for out-of-range `DateTime` [#58139](https://github.com/ClickHouse/ClickHouse/pull/58139) ([Andrey Zvonov](https://github.com/zvonand)).
* Fix possible `PARAMETER_OUT_OF_BOUND` error during subcolumns reading from a wide part in MergeTree [#58175](https://github.com/ClickHouse/ClickHouse/pull/58175) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix a slow-down of CREATE VIEW with an enormous number of subqueries [#58220](https://github.com/ClickHouse/ClickHouse/pull/58220) ([Tao Wang](https://github.com/wangtZJU)).
* Fix parallel parsing for JSONCompactEachRow [#58181](https://github.com/ClickHouse/ClickHouse/pull/58181) ([Alexey Milovidov](https://github.com/alexey-milovidov)). [#58250](https://github.com/ClickHouse/ClickHouse/pull/58250) ([Kruglov Pavel](https://github.com/Avogar)).
### <a id="2311"></a> ClickHouse release 23.11, 2023-12-06
#### Backward Incompatible Change

View File

@ -13,9 +13,10 @@ The following versions of ClickHouse server are currently being supported with s
| Version | Supported |
|:-|:-|
| 23.12 | ✔️ |
| 23.11 | ✔️ |
| 23.10 | ✔️ |
| 23.9 | ✔️ |
| 23.9 | |
| 23.8 | ✔️ |
| 23.7 | ❌ |
| 23.6 | ❌ |

View File

@ -69,6 +69,9 @@
// init() is called in the MyIOS constructor.
// Therefore we replace each call to init() with
// the poco_ios_init macro defined below.
//
// Also this macro will adjust exceptions() flags, since by default std::ios
// will hide exceptions, while in ClickHouse it is better to pass them through.
#if !defined(POCO_IOS_INIT_HACK)
@ -79,7 +82,10 @@
#if defined(POCO_IOS_INIT_HACK)
# define poco_ios_init(buf)
#else
# define poco_ios_init(buf) init(buf)
# define poco_ios_init(buf) do { \
init(buf); \
this->exceptions(std::ios::failbit | std::ios::badbit); \
} while (0)
#endif

View File

@ -70,6 +70,15 @@ public:
int queryConvert(const unsigned char * bytes, int length) const;
int sequenceLength(const unsigned char * bytes, int length) const;
protected:
static int safeToInt(Poco::UInt32 value)
{
if (value <= 0x10FFFF)
return static_cast<int>(value);
else
return -1;
}
private:
bool _flipBytes;
static const char * _names[];

View File

@ -30,22 +30,22 @@ const char* UTF32Encoding::_names[] =
const TextEncoding::CharacterMap UTF32Encoding::_charMap =
{
/* 00 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* 10 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* 20 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* 30 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* 40 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* 50 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* 60 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* 70 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* 80 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* 90 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* a0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* b0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* c0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* d0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* e0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* f0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
/* 00 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
/* 10 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
/* 20 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
/* 30 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
/* 40 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
/* 50 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
/* 60 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
/* 70 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
/* 80 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
/* 90 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
/* a0 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
/* b0 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
/* c0 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
/* d0 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
/* e0 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
/* f0 */ -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
};
@ -118,7 +118,7 @@ const TextEncoding::CharacterMap& UTF32Encoding::characterMap() const
int UTF32Encoding::convert(const unsigned char* bytes) const
{
UInt32 uc;
unsigned char* p = (unsigned char*) &uc;
unsigned char* p = reinterpret_cast<unsigned char*>(&uc);
*p++ = *bytes++;
*p++ = *bytes++;
*p++ = *bytes++;
@ -129,7 +129,7 @@ int UTF32Encoding::convert(const unsigned char* bytes) const
ByteOrder::flipBytes(uc);
}
return uc;
return safeToInt(uc);
}
@ -138,7 +138,7 @@ int UTF32Encoding::convert(int ch, unsigned char* bytes, int length) const
if (bytes && length >= 4)
{
UInt32 ch1 = _flipBytes ? ByteOrder::flipBytes((UInt32) ch) : (UInt32) ch;
unsigned char* p = (unsigned char*) &ch1;
unsigned char* p = reinterpret_cast<unsigned char*>(&ch1);
*bytes++ = *p++;
*bytes++ = *p++;
*bytes++ = *p++;
@ -155,14 +155,14 @@ int UTF32Encoding::queryConvert(const unsigned char* bytes, int length) const
if (length >= 4)
{
UInt32 uc;
unsigned char* p = (unsigned char*) &uc;
unsigned char* p = reinterpret_cast<unsigned char*>(&uc);
*p++ = *bytes++;
*p++ = *bytes++;
*p++ = *bytes++;
*p++ = *bytes++;
if (_flipBytes)
ByteOrder::flipBytes(uc);
return uc;
ret = safeToInt(uc);
}
return ret;

View File

@ -2,11 +2,11 @@
# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
SET(VERSION_REVISION 54481)
SET(VERSION_REVISION 54482)
SET(VERSION_MAJOR 23)
SET(VERSION_MINOR 12)
SET(VERSION_MINOR 13)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH 05bc8ef1e02b9c7332f08091775b255d191341bf)
SET(VERSION_DESCRIBE v23.12.1.1-testing)
SET(VERSION_STRING 23.12.1.1)
SET(VERSION_GITHASH a2faa65b080a587026c86844f3a20c74d23a86f8)
SET(VERSION_DESCRIBE v23.13.1.1-testing)
SET(VERSION_STRING 23.13.1.1)
# end of autochange

View File

@ -12,6 +12,8 @@ elseif (CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin")
set (OS_DARWIN 1)
add_definitions(-D OS_DARWIN)
# For MAP_ANON/MAP_ANONYMOUS
add_definitions(-D _DARWIN_C_SOURCE)
elseif (CMAKE_SYSTEM_NAME MATCHES "SunOS")
set (OS_SUNOS 1)
add_definitions(-D OS_SUNOS)

View File

@ -154,7 +154,6 @@ add_contrib (libpqxx-cmake libpqxx)
add_contrib (libpq-cmake libpq)
add_contrib (nuraft-cmake NuRaft)
add_contrib (fast_float-cmake fast_float)
add_contrib (idna-cmake idna)
add_contrib (datasketches-cpp-cmake datasketches-cpp)
add_contrib (incbin-cmake incbin)
add_contrib (sqids-cpp-cmake sqids-cpp)

2
contrib/azure vendored

@ -1 +1 @@
Subproject commit a852d81f92f153e109de165ee08546741e3f2a68
Subproject commit 060c54dfb0abe869c065143303a9d3e9c54c29e3

View File

@ -8,31 +8,21 @@ endif()
set(AZURE_DIR "${ClickHouse_SOURCE_DIR}/contrib/azure")
set(AZURE_SDK_LIBRARY_DIR "${AZURE_DIR}/sdk")
file(GLOB AZURE_SDK_CORE_SRC
file(GLOB AZURE_SDK_SRC
"${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/*.cpp"
"${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/cryptography/*.cpp"
"${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/http/*.cpp"
"${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/http/curl/*.cpp"
"${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/io/*.cpp"
)
file(GLOB AZURE_SDK_IDENTITY_SRC
"${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/tracing/*.cpp"
"${AZURE_SDK_LIBRARY_DIR}/identity/azure-identity/src/*.cpp"
)
file(GLOB AZURE_SDK_STORAGE_COMMON_SRC
"${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-blobs/src/*.cpp"
"${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-blobs/src/private/*.cpp"
"${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-common/src/*.cpp"
)
file(GLOB AZURE_SDK_STORAGE_BLOBS_SRC
"${AZURE_SDK_LIBRARY_DIR}/storage/azure-storage-blobs/src/*.cpp"
)
file(GLOB AZURE_SDK_UNIFIED_SRC
${AZURE_SDK_CORE_SRC}
${AZURE_SDK_IDENTITY_SRC}
${AZURE_SDK_STORAGE_COMMON_SRC}
${AZURE_SDK_STORAGE_BLOBS_SRC}
${AZURE_SDK_SRC}
)
set(AZURE_SDK_INCLUDES

1
contrib/idna vendored

@ -1 +0,0 @@
Subproject commit 3c8be01d42b75649f1ac9b697d0ef757eebfe667

View File

@ -1,24 +0,0 @@
option(ENABLE_IDNA "Enable idna support" ${ENABLE_LIBRARIES})
if ((NOT ENABLE_IDNA))
message (STATUS "Not using idna")
return()
endif()
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/idna")
set (SRCS
"${LIBRARY_DIR}/src/idna.cpp"
"${LIBRARY_DIR}/src/mapping.cpp"
"${LIBRARY_DIR}/src/mapping_tables.cpp"
"${LIBRARY_DIR}/src/normalization.cpp"
"${LIBRARY_DIR}/src/normalization_tables.cpp"
"${LIBRARY_DIR}/src/punycode.cpp"
"${LIBRARY_DIR}/src/to_ascii.cpp"
"${LIBRARY_DIR}/src/to_unicode.cpp"
"${LIBRARY_DIR}/src/unicode_transcoding.cpp"
"${LIBRARY_DIR}/src/validity.cpp"
)
add_library (_idna ${SRCS})
target_include_directories(_idna PUBLIC "${LIBRARY_DIR}/include")
add_library (ch_contrib::idna ALIAS _idna)

View File

@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="23.11.2.11"
ARG VERSION="23.12.1.1368"
ARG PACKAGES="clickhouse-keeper"
ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="23.11.2.11"
ARG VERSION="23.12.1.1368"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="23.11.2.11"
ARG VERSION="23.12.1.1368"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image

View File

@ -34,7 +34,7 @@ services:
# Empty container to run proxy resolver.
resolver:
image: clickhouse/python-bottle
image: clickhouse/python-bottle:${DOCKER_PYTHON_BOTTLE_TAG:-latest}
expose:
- "8080"
tty: true

View File

@ -40,6 +40,12 @@ if [ "$cache_policy" = "SLRU" ]; then
mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml
fi
if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then
# It is not needed, we will explicitly create tables on s3.
# We do not have statefull tests with s3 storage run in public repository, but this is needed for another repository.
rm /etc/clickhouse-server/config.d/s3_storage_policy_for_merge_tree_by_default.xml
fi
function start()
{
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
@ -123,8 +129,76 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]
else
clickhouse-client --query "CREATE DATABASE test"
clickhouse-client --query "SHOW TABLES FROM test"
clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then
clickhouse-client --query "CREATE TABLE test.hits (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16,
EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32,
UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String,
RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16),
URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8,
FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16,
UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8,
MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16,
SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16,
ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32,
SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8,
FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8,
HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8,
GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32,
HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String,
HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32,
FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32,
LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32,
RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String,
ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String,
OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String,
UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64,
URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String,
ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64),
IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate)
ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDate Date, Sign Int8, IsNew UInt8,
VisitID UInt64, UserID UInt64, StartTime DateTime, Duration UInt32, UTCStartTime DateTime, PageViews Int32,
Hits Int32, IsBounce UInt8, Referer String, StartURL String, RefererDomain String, StartURLDomain String,
EndURL String, LinkURL String, IsDownload UInt8, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String,
AdvEngineID UInt8, PlaceID Int32, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32),
RefererRegions Array(UInt32), IsYandex UInt8, GoalReachesDepth Int32, GoalReachesURL Int32, GoalReachesAny Int32,
SocialSourceNetworkID UInt8, SocialSourcePage String, MobilePhoneModel String, ClientEventTime DateTime, RegionID UInt32,
ClientIP UInt32, ClientIP6 FixedString(16), RemoteIP UInt32, RemoteIP6 FixedString(16), IPNetworkID UInt32,
SilverlightVersion3 UInt32, CodeVersion UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, UserAgentMajor UInt16,
UserAgentMinor UInt16, WindowClientWidth UInt16, WindowClientHeight UInt16, SilverlightVersion2 UInt8, SilverlightVersion4 UInt16,
FlashVersion3 UInt16, FlashVersion4 UInt16, ClientTimeZone Int16, OS UInt8, UserAgent UInt8, ResolutionDepth UInt8,
FlashMajor UInt8, FlashMinor UInt8, NetMajor UInt8, NetMinor UInt8, MobilePhone UInt8, SilverlightVersion1 UInt8,
Age UInt8, Sex UInt8, Income UInt8, JavaEnable UInt8, CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8,
BrowserLanguage UInt16, BrowserCountry UInt16, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16),
Params Array(String), Goals Nested(ID UInt32, Serial UInt32, EventTime DateTime, Price Int64, OrderID String, CurrencyID UInt32),
WatchIDs Array(UInt64), ParamSumPrice Int64, ParamCurrency FixedString(3), ParamCurrencyID UInt16, ClickLogID UInt64,
ClickEventID Int32, ClickGoodEvent Int32, ClickEventTime DateTime, ClickPriorityID Int32, ClickPhraseID Int32, ClickPageID Int32,
ClickPlaceID Int32, ClickTypeID Int32, ClickResourceID Int32, ClickCost UInt32, ClickClientIP UInt32, ClickDomainID UInt32,
ClickURL String, ClickAttempt UInt8, ClickOrderID UInt32, ClickBannerID UInt32, ClickMarketCategoryID UInt32, ClickMarketPP UInt32,
ClickMarketCategoryName String, ClickMarketPPName String, ClickAWAPSCampaignName String, ClickPageName String, ClickTargetType UInt16,
ClickTargetPhraseID UInt64, ClickContextType UInt8, ClickSelectType Int8, ClickOptions String, ClickGroupBannerID Int32,
OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String,
UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, FirstVisit DateTime,
PredLastVisit Date, LastVisit Date, TotalVisits UInt32, TraficSource Nested(ID Int8, SearchEngineID UInt16, AdvEngineID UInt8,
PlaceID UInt16, SocialSourceNetworkID UInt8, Domain String, SearchPhrase String, SocialSourcePage String), Attendance FixedString(16),
CLID UInt32, YCLID UInt64, NormalizedRefererHash UInt64, SearchPhraseHash UInt64, RefererDomainHash UInt64, NormalizedStartURLHash UInt64,
StartURLDomainHash UInt64, NormalizedEndURLHash UInt64, TopLevelDomain UInt64, URLScheme UInt64, OpenstatServiceNameHash UInt64,
OpenstatCampaignIDHash UInt64, OpenstatAdIDHash UInt64, OpenstatSourceIDHash UInt64, UTMSourceHash UInt64, UTMMediumHash UInt64,
UTMCampaignHash UInt64, UTMContentHash UInt64, UTMTermHash UInt64, FromHash UInt64, WebVisorEnabled UInt8, WebVisorActivity UInt32,
ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64),
Market Nested(Type UInt8, GoalID UInt32, OrderID String, OrderPrice Int64, PP UInt32, DirectPlaceID UInt32, DirectOrderID UInt32,
DirectBannerID UInt32, GoodID String, GoodName String, GoodQuantity Int32, GoodPrice Int64), IslandID FixedString(16))
ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)
SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
clickhouse-client --query "DROP TABLE datasets.visits_v1 SYNC"
clickhouse-client --query "DROP TABLE datasets.hits_v1 SYNC"
else
clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
fi
clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0"
fi
@ -144,6 +218,10 @@ function run_tests()
ADDITIONAL_OPTIONS+=('--replicated-database')
fi
if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then
ADDITIONAL_OPTIONS+=('--s3-storage')
fi
if [[ -n "$USE_DATABASE_ORDINARY" ]] && [[ "$USE_DATABASE_ORDINARY" -eq 1 ]]; then
ADDITIONAL_OPTIONS+=('--db-engine=Ordinary')
fi

View File

@ -58,6 +58,7 @@ if [[ -n "$BUGFIX_VALIDATE_CHECK" ]] && [[ "$BUGFIX_VALIDATE_CHECK" -eq 1 ]]; th
# it contains some new settings, but we can safely remove it
rm /etc/clickhouse-server/users.d/s3_cache_new.xml
rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml
fi
# For flaky check we also enable thread fuzzer

View File

@ -0,0 +1,26 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.11.3.23-stable (a14ab450b0e) FIXME as compared to v23.11.2.11-stable (6e5411358c8)
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix invalid memory access in BLAKE3 (Rust) [#57876](https://github.com/ClickHouse/ClickHouse/pull/57876) ([Raúl Marín](https://github.com/Algunenano)).
* Normalize function names in CREATE INDEX [#57906](https://github.com/ClickHouse/ClickHouse/pull/57906) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix handling of unavailable replicas before first request happened [#57933](https://github.com/ClickHouse/ClickHouse/pull/57933) ([Nikita Taranov](https://github.com/nickitat)).
* Revert "Fix bug window functions: revert [#39631](https://github.com/ClickHouse/ClickHouse/issues/39631)" [#58031](https://github.com/ClickHouse/ClickHouse/pull/58031) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
#### NO CL CATEGORY
* Backported in [#57918](https://github.com/ClickHouse/ClickHouse/issues/57918):. [#57909](https://github.com/ClickHouse/ClickHouse/pull/57909) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Remove heavy rust stable toolchain [#57905](https://github.com/ClickHouse/ClickHouse/pull/57905) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix docker image for integration tests (fixes CI) [#57952](https://github.com/ClickHouse/ClickHouse/pull/57952) ([Azat Khuzhin](https://github.com/azat)).
* Always use `pread` for reading cache segments [#57970](https://github.com/ClickHouse/ClickHouse/pull/57970) ([Nikita Taranov](https://github.com/nickitat)).

View File

@ -0,0 +1,327 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.12.1.1368-stable (a2faa65b080) FIXME as compared to v23.11.1.2711-stable (05bc8ef1e02)
#### Backward Incompatible Change
* Fix check for non-deterministic functions in TTL expressions. Previously, you could create a TTL expression with non-deterministic functions in some cases, which could lead to undefined behavior later. This fixes [#37250](https://github.com/ClickHouse/ClickHouse/issues/37250). Disallow TTL expressions that don't depend on any columns of a table by default. It can be allowed back by `SET allow_suspicious_ttl_expressions = 1` or `SET compatibility = '23.11'`. Closes [#37286](https://github.com/ClickHouse/ClickHouse/issues/37286). [#51858](https://github.com/ClickHouse/ClickHouse/pull/51858) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove function `arrayFold` because it has a bug. This closes [#57816](https://github.com/ClickHouse/ClickHouse/issues/57816). This closes [#57458](https://github.com/ClickHouse/ClickHouse/issues/57458). [#57836](https://github.com/ClickHouse/ClickHouse/pull/57836) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove the feature of `is_deleted` row in ReplacingMergeTree and the `CLEANUP` modifier for the OPTIMIZE query. This fixes [#57930](https://github.com/ClickHouse/ClickHouse/issues/57930). This closes [#54988](https://github.com/ClickHouse/ClickHouse/issues/54988). This closes [#54570](https://github.com/ClickHouse/ClickHouse/issues/54570). This closes [#50346](https://github.com/ClickHouse/ClickHouse/issues/50346). This closes [#47579](https://github.com/ClickHouse/ClickHouse/issues/47579). The feature has to be removed because it is not good. We have to remove it as quickly as possible, because there is no other option. [#57932](https://github.com/ClickHouse/ClickHouse/pull/57932) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* The MergeTree setting `clean_deleted_rows` is deprecated, it has no effect anymore. The `CLEANUP` keyword for `OPTIMIZE` is not allowed by default (unless `allow_experimental_replacing_merge_with_cleanup` is enabled). [#58267](https://github.com/ClickHouse/ClickHouse/pull/58267) ([Alexander Tokmakov](https://github.com/tavplubix)).
#### New Feature
* Allow disabling of HEAD request before GET request. [#54602](https://github.com/ClickHouse/ClickHouse/pull/54602) ([Fionera](https://github.com/fionera)).
* Add a HTTP endpoint for checking if Keeper is ready to accept traffic. [#55876](https://github.com/ClickHouse/ClickHouse/pull/55876) ([Konstantin Bogdanov](https://github.com/thevar1able)).
* Add 'union' mode for schema inference. In this mode the resulting table schema is the union of all files schemas (so schema is inferred from each file). The mode of schema inference is controlled by a setting `schema_inference_mode` with 2 possible values - `default` and `union`. Closes [#55428](https://github.com/ClickHouse/ClickHouse/issues/55428). [#55892](https://github.com/ClickHouse/ClickHouse/pull/55892) ([Kruglov Pavel](https://github.com/Avogar)).
* Add new setting `input_format_csv_try_infer_numbers_from_strings` that allows to infer numbers from strings in CSV format. Closes [#56455](https://github.com/ClickHouse/ClickHouse/issues/56455). [#56859](https://github.com/ClickHouse/ClickHouse/pull/56859) ([Kruglov Pavel](https://github.com/Avogar)).
* Refreshable materialized views. [#56946](https://github.com/ClickHouse/ClickHouse/pull/56946) ([Michael Kolupaev](https://github.com/al13n321)).
* Add more warnings on the number of databases, tables. [#57375](https://github.com/ClickHouse/ClickHouse/pull/57375) ([凌涛](https://github.com/lingtaolf)).
* Added a new mutation command `ALTER TABLE <table> APPLY DELETED MASK`, which allows to enforce applying of mask written by lightweight delete and to remove rows marked as deleted from disk. [#57433](https://github.com/ClickHouse/ClickHouse/pull/57433) ([Anton Popov](https://github.com/CurtizJ)).
* Added a new SQL function `sqid` to generate Sqids (https://sqids.org/), example: `SELECT sqid(125, 126)`. [#57512](https://github.com/ClickHouse/ClickHouse/pull/57512) ([Robert Schulze](https://github.com/rschu1ze)).
* Dictionary with `HASHED_ARRAY` (and `COMPLEX_KEY_HASHED_ARRAY`) layout supports `SHARDS` similarly to `HASHED`. [#57544](https://github.com/ClickHouse/ClickHouse/pull/57544) ([vdimir](https://github.com/vdimir)).
* Add asynchronous metrics for total primary key bytes and total allocated primary key bytes in memory. [#57551](https://github.com/ClickHouse/ClickHouse/pull/57551) ([Bharat Nallan](https://github.com/bharatnc)).
* Table system.dropped_tables_parts contains parts of system.dropped_tables tables (dropped but not yet removed tables). [#57555](https://github.com/ClickHouse/ClickHouse/pull/57555) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Add `FORMAT_BYTES` as an alias for `formatReadableSize`. [#57592](https://github.com/ClickHouse/ClickHouse/pull/57592) ([Bharat Nallan](https://github.com/bharatnc)).
* Add SHA512_256 function. [#57645](https://github.com/ClickHouse/ClickHouse/pull/57645) ([Bharat Nallan](https://github.com/bharatnc)).
* Allow passing optional SESSION_TOKEN to `s3` table function. [#57850](https://github.com/ClickHouse/ClickHouse/pull/57850) ([Shani Elharrar](https://github.com/shanielh)).
* Clause `ORDER BY` now supports specifying `ALL`, meaning that ClickHouse sorts by all columns in the `SELECT` clause. Example: `SELECT col1, col2 FROM tab WHERE [...] ORDER BY ALL`. [#57875](https://github.com/ClickHouse/ClickHouse/pull/57875) ([zhongyuankai](https://github.com/zhongyuankai)).
* Added functions for punycode encoding/decoding: `punycodeEncode()` and `punycodeDecode()`. [#57969](https://github.com/ClickHouse/ClickHouse/pull/57969) ([Robert Schulze](https://github.com/rschu1ze)).
* This PR reproduces the implementation of `PASTE JOIN`, which allows users to join tables without `ON` clause. Example: ``` SQL SELECT * FROM ( SELECT number AS a FROM numbers(2) ) AS t1 PASTE JOIN ( SELECT number AS a FROM numbers(2) ORDER BY a DESC ) AS t2. [#57995](https://github.com/ClickHouse/ClickHouse/pull/57995) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* A handler `/binary` opens a visual viewer of symbols inside the ClickHouse binary. [#58211](https://github.com/ClickHouse/ClickHouse/pull/58211) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Performance Improvement
* Made copy between s3 disks using a s3-server-side copy instead of copying through the buffer. Improves `BACKUP/RESTORE` operations and `clickhouse-disks copy` command. [#56744](https://github.com/ClickHouse/ClickHouse/pull/56744) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
* HashJoin respects setting `max_joined_block_size_rows` and do not produce large blocks for `ALL JOIN`. [#56996](https://github.com/ClickHouse/ClickHouse/pull/56996) ([vdimir](https://github.com/vdimir)).
* Release memory for aggregation earlier. This may avoid unnecessary external aggregation. [#57691](https://github.com/ClickHouse/ClickHouse/pull/57691) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Improve performance of string serialization. [#57717](https://github.com/ClickHouse/ClickHouse/pull/57717) ([Maksim Kita](https://github.com/kitaisreal)).
* Support trivial count optimization for `Merge`-engine tables. [#57867](https://github.com/ClickHouse/ClickHouse/pull/57867) ([skyoct](https://github.com/skyoct)).
* Optimized aggregation in some cases. [#57872](https://github.com/ClickHouse/ClickHouse/pull/57872) ([Anton Popov](https://github.com/CurtizJ)).
* The `hasAny()` function can now take advantage of the full-text skipping indices. [#57878](https://github.com/ClickHouse/ClickHouse/pull/57878) ([Jpnock](https://github.com/Jpnock)).
* Function `if(cond, then, else)` (and its alias `cond ? : then : else`) were optimized to use branch-free evaluation. [#57885](https://github.com/ClickHouse/ClickHouse/pull/57885) ([zhanglistar](https://github.com/zhanglistar)).
* Extract non intersecting parts ranges from MergeTree table during FINAL processing. That way we can avoid additional FINAL logic for this non intersecting parts ranges. In case when amount of duplicate values with same primary key is low, performance will be almost the same as without FINAL. Improve reading performance for MergeTree FINAL when `do_not_merge_across_partitions_select_final` setting is set. [#58120](https://github.com/ClickHouse/ClickHouse/pull/58120) ([Maksim Kita](https://github.com/kitaisreal)).
* MergeTree automatically derive `do_not_merge_across_partitions_select_final` setting if partition key expression contains only columns from primary key expression. [#58218](https://github.com/ClickHouse/ClickHouse/pull/58218) ([Maksim Kita](https://github.com/kitaisreal)).
* Speedup MIN and MAX for native types. [#58231](https://github.com/ClickHouse/ClickHouse/pull/58231) ([Raúl Marín](https://github.com/Algunenano)).
#### Improvement
* Make inserts into distributed tables handle updated cluster configuration properly. When the list of cluster nodes is dynamically updated, the Directory Monitor of the distribution table cannot sense the new node, and the Directory Monitor must be re-noded to sense it. [#42826](https://github.com/ClickHouse/ClickHouse/pull/42826) ([zhongyuankai](https://github.com/zhongyuankai)).
* Replace --no-system-tables with loading virtual tables of system database lazily. [#55271](https://github.com/ClickHouse/ClickHouse/pull/55271) ([Azat Khuzhin](https://github.com/azat)).
* Clickhouse-test print case sn, current time and case name in one test case. [#55710](https://github.com/ClickHouse/ClickHouse/pull/55710) ([guoxiaolong](https://github.com/guoxiaolongzte)).
* Do not allow creating replicated table with inconsistent merge params. [#56833](https://github.com/ClickHouse/ClickHouse/pull/56833) ([Duc Canh Le](https://github.com/canhld94)).
* Implement SLRU cache policy for filesystem cache. [#57076](https://github.com/ClickHouse/ClickHouse/pull/57076) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Show uncompressed size in `system.tables`, obtained from data parts' checksums [#56618](https://github.com/ClickHouse/ClickHouse/issues/56618). [#57186](https://github.com/ClickHouse/ClickHouse/pull/57186) ([Chen Lixiang](https://github.com/chenlx0)).
* Add `skip_unavailable_shards` as a setting for `Distributed` tables that is similar to the corresponding query-level setting. Closes [#43666](https://github.com/ClickHouse/ClickHouse/issues/43666). [#57218](https://github.com/ClickHouse/ClickHouse/pull/57218) ([Gagan Goel](https://github.com/tntnatbry)).
* Function `substring()` (aliases: `substr`, `mid`) can now be used with `Enum` types. Previously, the first function argument had to be a value of type `String` or `FixedString`. This improves compatibility with 3rd party tools such as Tableau via MySQL interface. [#57277](https://github.com/ClickHouse/ClickHouse/pull/57277) ([Serge Klochkov](https://github.com/slvrtrn)).
* Better hints when a table doesn't exist. [#57342](https://github.com/ClickHouse/ClickHouse/pull/57342) ([Bharat Nallan](https://github.com/bharatnc)).
* Allow to overwrite `max_partition_size_to_drop` and `max_table_size_to_drop` server settings in query time. [#57452](https://github.com/ClickHouse/ClickHouse/pull/57452) ([Jordi Villar](https://github.com/jrdi)).
* Add support for read-only flag when connecting to the ZooKeeper server (fixes [#53749](https://github.com/ClickHouse/ClickHouse/issues/53749)). [#57479](https://github.com/ClickHouse/ClickHouse/pull/57479) ([Mikhail Koviazin](https://github.com/mkmkme)).
* Fix possible distributed sends stuck due to "No such file or directory" (during recovering batch from disk). Fix possible issues with `error_count` from `system.distribution_queue` (in case of `distributed_directory_monitor_max_sleep_time_ms` >5min). Introduce profile event to track async INSERT failures - `DistributedAsyncInsertionFailures`. [#57480](https://github.com/ClickHouse/ClickHouse/pull/57480) ([Azat Khuzhin](https://github.com/azat)).
* The limit for the number of connections per endpoint for background fetches was raised from `15` to the value of `background_fetches_pool_size` setting. - MergeTree-level setting `replicated_max_parallel_fetches_for_host` became obsolete - MergeTree-level settings `replicated_fetches_http_connection_timeout`, `replicated_fetches_http_send_timeout` and `replicated_fetches_http_receive_timeout` are moved to the Server-level. - Setting `keep_alive_timeout` is added to the list of Server-level settings. [#57523](https://github.com/ClickHouse/ClickHouse/pull/57523) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* It is now possible to refer to ALIAS column in index (non-primary-key) definitions (issue [#55650](https://github.com/ClickHouse/ClickHouse/issues/55650)). Example: `CREATE TABLE tab(col UInt32, col_alias ALIAS col + 1, INDEX idx (col_alias) TYPE minmax) ENGINE = MergeTree ORDER BY col;`. [#57546](https://github.com/ClickHouse/ClickHouse/pull/57546) ([Robert Schulze](https://github.com/rschu1ze)).
* Function `format()` now supports arbitrary argument types (instead of only `String` and `FixedString` arguments). This is important to calculate `SELECT format('The {0} to all questions is {1}', 'answer', 42)`. [#57549](https://github.com/ClickHouse/ClickHouse/pull/57549) ([Robert Schulze](https://github.com/rschu1ze)).
* Support PostgreSQL generated columns and default column values in `MaterializedPostgreSQL`. Closes [#40449](https://github.com/ClickHouse/ClickHouse/issues/40449). [#57568](https://github.com/ClickHouse/ClickHouse/pull/57568) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Allow to apply some filesystem cache config settings changes without server restart. [#57578](https://github.com/ClickHouse/ClickHouse/pull/57578) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Handle sigabrt case when getting PostgreSQl table structure with empty array. [#57618](https://github.com/ClickHouse/ClickHouse/pull/57618) ([Mike Kot (Михаил Кот)](https://github.com/myrrc)).
* Allows to use the `date_trunc()` function with the first argument not depending on the case of it. Both cases are now supported: `SELECT date_trunc('day', now())` and `SELECT date_trunc('DAY', now())`. [#57624](https://github.com/ClickHouse/ClickHouse/pull/57624) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Expose the total number of errors occurred since last server as a `ClickHouseErrorMetric_ALL` metric. [#57627](https://github.com/ClickHouse/ClickHouse/pull/57627) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Allow nodes in config with from_env/from_zk and non empty element with replace=1. [#57628](https://github.com/ClickHouse/ClickHouse/pull/57628) ([Azat Khuzhin](https://github.com/azat)).
* Generate malformed output that cannot be parsed as JSON. [#57646](https://github.com/ClickHouse/ClickHouse/pull/57646) ([Julia Kartseva](https://github.com/jkartseva)).
* Consider lightweight deleted rows when selecting parts to merge if enabled. [#57648](https://github.com/ClickHouse/ClickHouse/pull/57648) ([Zhuo Qiu](https://github.com/jewelzqiu)).
* Make querying system.filesystem_cache not memory intensive. [#57687](https://github.com/ClickHouse/ClickHouse/pull/57687) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Allow IPv6 to UInt128 conversion and binary arithmetic. [#57707](https://github.com/ClickHouse/ClickHouse/pull/57707) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Support negative positional arguments. Closes [#57736](https://github.com/ClickHouse/ClickHouse/issues/57736). [#57741](https://github.com/ClickHouse/ClickHouse/pull/57741) ([flynn](https://github.com/ucasfl)).
* Add a setting for `async inserts deduplication cache` -- how long we wait for cache update. Deprecate setting `async_block_ids_cache_min_update_interval_ms`. Now cache is updated only in case of conflicts. [#57743](https://github.com/ClickHouse/ClickHouse/pull/57743) ([alesapin](https://github.com/alesapin)).
* `sleep()` function now can be cancelled with `KILL QUERY`. [#57746](https://github.com/ClickHouse/ClickHouse/pull/57746) ([Vitaly Baranov](https://github.com/vitlibar)).
* Slightly better inference of unnamed tupes in JSON formats. [#57751](https://github.com/ClickHouse/ClickHouse/pull/57751) ([Kruglov Pavel](https://github.com/Avogar)).
* Refactor UserDefinedSQL* classes to make it possible to add SQL UDF storages which are different from ZooKeeper and Disk. [#57752](https://github.com/ClickHouse/ClickHouse/pull/57752) ([Natasha Chizhonkova](https://github.com/chizhonkova)).
* Forbid `CREATE TABLE ... AS SELECT` queries for Replicated table engines in Replicated database because they are broken. Reference [#35408](https://github.com/ClickHouse/ClickHouse/issues/35408). [#57796](https://github.com/ClickHouse/ClickHouse/pull/57796) ([Nikolay Degterinsky](https://github.com/evillique)).
* Fix and improve transform query for external database, we should recursively obtain all compatible predicates. [#57888](https://github.com/ClickHouse/ClickHouse/pull/57888) ([flynn](https://github.com/ucasfl)).
* Support dynamic reloading of filesystem cache size. Closes [#57866](https://github.com/ClickHouse/ClickHouse/issues/57866). [#57897](https://github.com/ClickHouse/ClickHouse/pull/57897) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix system.stack_trace for threads with blocked SIGRTMIN. [#57907](https://github.com/ClickHouse/ClickHouse/pull/57907) ([Azat Khuzhin](https://github.com/azat)).
* Added a new setting `readonly` which can be used to specify a s3 disk is read only. It can be useful to create a table with read only `s3_plain` type disk. [#57977](https://github.com/ClickHouse/ClickHouse/pull/57977) ([Pengyuan Bian](https://github.com/bianpengyuan)).
* Support keeper failures in quorum check. [#57986](https://github.com/ClickHouse/ClickHouse/pull/57986) ([Raúl Marín](https://github.com/Algunenano)).
* Add max/peak RSS (`MemoryResidentMax`) into system.asynchronous_metrics. [#58095](https://github.com/ClickHouse/ClickHouse/pull/58095) ([Azat Khuzhin](https://github.com/azat)).
* Fix system.stack_trace for threads with blocked SIGRTMIN (and also send signal to the threads only if it is not blocked to avoid waiting `storage_system_stack_trace_pipe_read_timeout_ms` when it does not make any sense). [#58136](https://github.com/ClickHouse/ClickHouse/pull/58136) ([Azat Khuzhin](https://github.com/azat)).
* This PR allows users to use s3 links (`https://` and `s3://`) without mentioning region if it's not default. Also find the correct region if the user mentioned the wrong one. ### Documentation entry for user-facing changes. [#58148](https://github.com/ClickHouse/ClickHouse/pull/58148) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* `clickhouse-format --obfuscate` will know about Settings, MergeTreeSettings, and time zones and keep their names unchanged. [#58179](https://github.com/ClickHouse/ClickHouse/pull/58179) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Added explicit `finalize()` function in `ZipArchiveWriter`. Simplify too complicated code in `ZipArchiveWriter`. This PR fixes [#58074](https://github.com/ClickHouse/ClickHouse/issues/58074). [#58202](https://github.com/ClickHouse/ClickHouse/pull/58202) ([Vitaly Baranov](https://github.com/vitlibar)).
* The primary key analysis in MergeTree tables will now be applied to predicates that include the virtual column `_part_offset` (optionally with `_part`). This feature can serve as a poor man's secondary index. [#58224](https://github.com/ClickHouse/ClickHouse/pull/58224) ([Amos Bird](https://github.com/amosbird)).
* Make caches with the same path use the same cache objects. This behaviour existed before, but was broken in https://github.com/ClickHouse/ClickHouse/pull/48805 (in 23.4). If such caches with the same path have different set of cache settings, an exception will be thrown, that this is not allowed. [#58264](https://github.com/ClickHouse/ClickHouse/pull/58264) ([Kseniia Sumarokova](https://github.com/kssenii)).
#### Build/Testing/Packaging Improvement
* Allow usage of Azure-related table engines/functions on macOS. [#51866](https://github.com/ClickHouse/ClickHouse/pull/51866) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* ClickHouse Fast Test now uses Musl instead of GLibc. [#57711](https://github.com/ClickHouse/ClickHouse/pull/57711) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Run ClickBench for every commit. This closes [#57708](https://github.com/ClickHouse/ClickHouse/issues/57708). [#57712](https://github.com/ClickHouse/ClickHouse/pull/57712) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fixed a sorting order breakage in TTL GROUP BY [#49103](https://github.com/ClickHouse/ClickHouse/pull/49103) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* fix: split lttb bucket strategy, first bucket and last bucket should only contain single point [#57003](https://github.com/ClickHouse/ClickHouse/pull/57003) ([FFish](https://github.com/wxybear)).
* Fix possible deadlock in Template format during sync after error [#57004](https://github.com/ClickHouse/ClickHouse/pull/57004) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix early stop while parsing file with skipping lots of errors [#57006](https://github.com/ClickHouse/ClickHouse/pull/57006) ([Kruglov Pavel](https://github.com/Avogar)).
* Prevent dictionary's ACL bypass via dictionary() table function [#57362](https://github.com/ClickHouse/ClickHouse/pull/57362) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
* Fix another case of non-ready set. [#57423](https://github.com/ClickHouse/ClickHouse/pull/57423) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix several issues regarding PostgreSQL `array_ndims` usage. [#57436](https://github.com/ClickHouse/ClickHouse/pull/57436) ([Ryan Jacobs](https://github.com/ryanmjacobs)).
* Fix RWLock inconsistency after write lock timeout [#57454](https://github.com/ClickHouse/ClickHouse/pull/57454) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix: don't exclude ephemeral column when building pushing to view chain [#57461](https://github.com/ClickHouse/ClickHouse/pull/57461) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* MaterializedPostgreSQL: fix issue [#41922](https://github.com/ClickHouse/ClickHouse/issues/41922), add test for [#41923](https://github.com/ClickHouse/ClickHouse/issues/41923) [#57515](https://github.com/ClickHouse/ClickHouse/pull/57515) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Ignore ON CLUSTER clause in grant/revoke queries for management of replicated access entities. [#57538](https://github.com/ClickHouse/ClickHouse/pull/57538) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
* Fix crash in clickhouse-local [#57553](https://github.com/ClickHouse/ClickHouse/pull/57553) ([Nikolay Degterinsky](https://github.com/evillique)).
* Materialize block in HashJoin for Type::EMPTY [#57564](https://github.com/ClickHouse/ClickHouse/pull/57564) ([vdimir](https://github.com/vdimir)).
* Fix possible segfault in PostgreSQLSource [#57567](https://github.com/ClickHouse/ClickHouse/pull/57567) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix type correction in HashJoin for nested low cardinality [#57614](https://github.com/ClickHouse/ClickHouse/pull/57614) ([vdimir](https://github.com/vdimir)).
* Avoid hangs of system.stack_trace by correctly prohibit parallel read from it [#57641](https://github.com/ClickHouse/ClickHouse/pull/57641) ([Azat Khuzhin](https://github.com/azat)).
* Fix SIGSEGV for aggregation of sparse columns with any() RESPECT NULL [#57710](https://github.com/ClickHouse/ClickHouse/pull/57710) ([Azat Khuzhin](https://github.com/azat)).
* Fix unary operators parsing [#57713](https://github.com/ClickHouse/ClickHouse/pull/57713) ([Nikolay Degterinsky](https://github.com/evillique)).
* Fix RWLock inconsistency after write lock timeout (again) [#57733](https://github.com/ClickHouse/ClickHouse/pull/57733) ([Vitaly Baranov](https://github.com/vitlibar)).
* Table engine MaterializedPostgreSQL fix dependency loading [#57754](https://github.com/ClickHouse/ClickHouse/pull/57754) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix retries for disconnected nodes for BACKUP/RESTORE ON CLUSTER [#57764](https://github.com/ClickHouse/ClickHouse/pull/57764) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix bug window functions: revert [#39631](https://github.com/ClickHouse/ClickHouse/issues/39631) [#57766](https://github.com/ClickHouse/ClickHouse/pull/57766) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix result of external aggregation in case of partially materialized projection [#57790](https://github.com/ClickHouse/ClickHouse/pull/57790) ([Anton Popov](https://github.com/CurtizJ)).
* Fix merge in aggregation functions with `*Map` combinator [#57795](https://github.com/ClickHouse/ClickHouse/pull/57795) ([Anton Popov](https://github.com/CurtizJ)).
* Disable system.kafka_consumers by default (due to possible live memory leak) [#57822](https://github.com/ClickHouse/ClickHouse/pull/57822) ([Azat Khuzhin](https://github.com/azat)).
* Fix low-cardinality keys support in MergeJoin [#57827](https://github.com/ClickHouse/ClickHouse/pull/57827) ([vdimir](https://github.com/vdimir)).
* Create consumers for Kafka tables on fly (but keep them for some period since last used) [#57829](https://github.com/ClickHouse/ClickHouse/pull/57829) ([Azat Khuzhin](https://github.com/azat)).
* InterpreterCreateQuery sample block fix [#57855](https://github.com/ClickHouse/ClickHouse/pull/57855) ([Maksim Kita](https://github.com/kitaisreal)).
* bugfix: addresses_expr ignored for psql named collections [#57874](https://github.com/ClickHouse/ClickHouse/pull/57874) ([joelynch](https://github.com/joelynch)).
* Fix invalid memory access in BLAKE3 (Rust) [#57876](https://github.com/ClickHouse/ClickHouse/pull/57876) ([Raúl Marín](https://github.com/Algunenano)).
* Resurrect `arrayFold()` [#57879](https://github.com/ClickHouse/ClickHouse/pull/57879) ([Robert Schulze](https://github.com/rschu1ze)).
* Normalize function names in CREATE INDEX [#57906](https://github.com/ClickHouse/ClickHouse/pull/57906) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix handling of unavailable replicas before first request happened [#57933](https://github.com/ClickHouse/ClickHouse/pull/57933) ([Nikita Taranov](https://github.com/nickitat)).
* Fix literal alias misclassification [#57988](https://github.com/ClickHouse/ClickHouse/pull/57988) ([Chen768959](https://github.com/Chen768959)).
* Revert "Fix bug window functions: revert [#39631](https://github.com/ClickHouse/ClickHouse/issues/39631)" [#58031](https://github.com/ClickHouse/ClickHouse/pull/58031) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix invalid preprocessing on Keeper [#58069](https://github.com/ClickHouse/ClickHouse/pull/58069) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix Integer overflow in Poco::UTF32Encoding [#58073](https://github.com/ClickHouse/ClickHouse/pull/58073) ([Andrey Fedotov](https://github.com/anfedotoff)).
* Fix parallel replicas in presence of a scalar subquery with a big integer value [#58118](https://github.com/ClickHouse/ClickHouse/pull/58118) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix `accurateCastOrNull` for out-of-range DateTime [#58139](https://github.com/ClickHouse/ClickHouse/pull/58139) ([Andrey Zvonov](https://github.com/zvonand)).
* Fix possible PARAMETER_OUT_OF_BOUND error during subcolumns reading from wide part in MergeTree [#58175](https://github.com/ClickHouse/ClickHouse/pull/58175) ([Kruglov Pavel](https://github.com/Avogar)).
* Remove parallel parsing for JSONCompactEachRow [#58181](https://github.com/ClickHouse/ClickHouse/pull/58181) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* fix CREATE VIEW hang [#58220](https://github.com/ClickHouse/ClickHouse/pull/58220) ([Tao Wang](https://github.com/wangtZJU)).
* Fix parallel parsing for JSONCompactEachRow [#58250](https://github.com/ClickHouse/ClickHouse/pull/58250) ([Kruglov Pavel](https://github.com/Avogar)).
#### NO CL ENTRY
* NO CL ENTRY: 'Revert "Revert "Update Sentry""'. [#57694](https://github.com/ClickHouse/ClickHouse/pull/57694) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'Revert "Fix RWLock inconsistency after write lock timeout"'. [#57730](https://github.com/ClickHouse/ClickHouse/pull/57730) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'Revert "improve CI with digest for docker, build and test jobs"'. [#57903](https://github.com/ClickHouse/ClickHouse/pull/57903) ([Max K.](https://github.com/mkaynov)).
* NO CL ENTRY: 'Reapply "improve CI with digest for docker, build and test jobs"'. [#57904](https://github.com/ClickHouse/ClickHouse/pull/57904) ([Max K.](https://github.com/mkaynov)).
* NO CL ENTRY: 'Revert "Merge pull request [#56573](https://github.com/ClickHouse/ClickHouse/issues/56573) from mkmkme/mkmkme/reload-config"'. [#57909](https://github.com/ClickHouse/ClickHouse/pull/57909) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'Revert "Add system.dropped_tables_parts table"'. [#58022](https://github.com/ClickHouse/ClickHouse/pull/58022) ([Antonio Andelic](https://github.com/antonio2368)).
* NO CL ENTRY: 'Revert "Consider lightweight deleted rows when selecting parts to merge"'. [#58097](https://github.com/ClickHouse/ClickHouse/pull/58097) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'Revert "Fix leftover processes/hangs in tests"'. [#58207](https://github.com/ClickHouse/ClickHouse/pull/58207) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'Revert "Create consumers for Kafka tables on fly (but keep them for some period since last used)"'. [#58272](https://github.com/ClickHouse/ClickHouse/pull/58272) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'Revert "Implement punycode encoding/decoding"'. [#58277](https://github.com/ClickHouse/ClickHouse/pull/58277) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Randomize more settings [#39663](https://github.com/ClickHouse/ClickHouse/pull/39663) ([Anton Popov](https://github.com/CurtizJ)).
* Add more tests for `compile_expressions` [#51113](https://github.com/ClickHouse/ClickHouse/pull/51113) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* [RFC] Correctly wait background threads [#52717](https://github.com/ClickHouse/ClickHouse/pull/52717) ([Azat Khuzhin](https://github.com/azat)).
* improve CI with digest for docker, build and test jobs [#56317](https://github.com/ClickHouse/ClickHouse/pull/56317) ([Max K.](https://github.com/mkaynov)).
* Prepare the introduction of more keeper faults [#56917](https://github.com/ClickHouse/ClickHouse/pull/56917) ([Raúl Marín](https://github.com/Algunenano)).
* Analyzer: Fix assert in tryReplaceAndEqualsChainsWithConstant [#57139](https://github.com/ClickHouse/ClickHouse/pull/57139) ([vdimir](https://github.com/vdimir)).
* Check what will happen if we build ClickHouse with Musl [#57180](https://github.com/ClickHouse/ClickHouse/pull/57180) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* support memory soft limit for keeper [#57271](https://github.com/ClickHouse/ClickHouse/pull/57271) ([Han Fei](https://github.com/hanfei1991)).
* Randomize disabled optimizations in CI [#57315](https://github.com/ClickHouse/ClickHouse/pull/57315) ([Raúl Marín](https://github.com/Algunenano)).
* Don't throw if noop when dropping database replica in batch [#57337](https://github.com/ClickHouse/ClickHouse/pull/57337) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Better JSON -> JSONEachRow fallback without catching exceptions [#57364](https://github.com/ClickHouse/ClickHouse/pull/57364) ([Kruglov Pavel](https://github.com/Avogar)).
* Add tests for [#48496](https://github.com/ClickHouse/ClickHouse/issues/48496) [#57414](https://github.com/ClickHouse/ClickHouse/pull/57414) ([Raúl Marín](https://github.com/Algunenano)).
* Add profile event for cache lookup in `ThreadPoolRemoteFSReader` [#57437](https://github.com/ClickHouse/ClickHouse/pull/57437) ([Nikita Taranov](https://github.com/nickitat)).
* Remove select() usage [#57467](https://github.com/ClickHouse/ClickHouse/pull/57467) ([Igor Nikonov](https://github.com/devcrafter)).
* Parallel replicas: friendly settings [#57542](https://github.com/ClickHouse/ClickHouse/pull/57542) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix formatting string prompt error [#57569](https://github.com/ClickHouse/ClickHouse/pull/57569) ([skyoct](https://github.com/skyoct)).
* Tune CI scale up/down multipliers [#57572](https://github.com/ClickHouse/ClickHouse/pull/57572) ([Max K.](https://github.com/mkaynov)).
* Revert "Revert "Implemented series period detect method using pocketfft lib"" [#57574](https://github.com/ClickHouse/ClickHouse/pull/57574) ([Bhavna Jindal](https://github.com/bhavnajindal)).
* Correctly handle errors during opening query in editor in client [#57587](https://github.com/ClickHouse/ClickHouse/pull/57587) ([Azat Khuzhin](https://github.com/azat)).
* Add a test for [#55251](https://github.com/ClickHouse/ClickHouse/issues/55251) [#57588](https://github.com/ClickHouse/ClickHouse/pull/57588) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Add a test for [#48039](https://github.com/ClickHouse/ClickHouse/issues/48039) [#57593](https://github.com/ClickHouse/ClickHouse/pull/57593) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Update CHANGELOG.md [#57594](https://github.com/ClickHouse/ClickHouse/pull/57594) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Update version after release [#57595](https://github.com/ClickHouse/ClickHouse/pull/57595) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Update version_date.tsv and changelogs after v23.11.1.2711-stable [#57597](https://github.com/ClickHouse/ClickHouse/pull/57597) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Identify failed jobs in lambda and mark as steps=0 [#57600](https://github.com/ClickHouse/ClickHouse/pull/57600) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix flaky test: distinct in order with analyzer [#57606](https://github.com/ClickHouse/ClickHouse/pull/57606) ([Igor Nikonov](https://github.com/devcrafter)).
* CHJIT add assembly printer [#57610](https://github.com/ClickHouse/ClickHouse/pull/57610) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix parsing virtual hosted S3 URI in clickhouse_backupview script [#57612](https://github.com/ClickHouse/ClickHouse/pull/57612) ([Daniel Pozo Escalona](https://github.com/danipozo)).
* Fix docs for `fileCluster` [#57613](https://github.com/ClickHouse/ClickHouse/pull/57613) ([Andrey Zvonov](https://github.com/zvonand)).
* Analyzer: Fix logical error in MultiIfToIfPass [#57622](https://github.com/ClickHouse/ClickHouse/pull/57622) ([vdimir](https://github.com/vdimir)).
* Throw more clear exception [#57626](https://github.com/ClickHouse/ClickHouse/pull/57626) ([alesapin](https://github.com/alesapin)).
* Fix "logs and exception messages formatting", part 1 [#57630](https://github.com/ClickHouse/ClickHouse/pull/57630) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix "logs and exception messages formatting", part 2 [#57632](https://github.com/ClickHouse/ClickHouse/pull/57632) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix "logs and exception messages formatting", part 3 [#57633](https://github.com/ClickHouse/ClickHouse/pull/57633) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix "logs and exception messages formatting", part 4 [#57634](https://github.com/ClickHouse/ClickHouse/pull/57634) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove bad test (1) [#57636](https://github.com/ClickHouse/ClickHouse/pull/57636) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove bad test (2) [#57637](https://github.com/ClickHouse/ClickHouse/pull/57637) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* ClickHouse Cloud promotion [#57638](https://github.com/ClickHouse/ClickHouse/pull/57638) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Remove bad test (3) [#57639](https://github.com/ClickHouse/ClickHouse/pull/57639) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove bad test (4) [#57640](https://github.com/ClickHouse/ClickHouse/pull/57640) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Random changes in random files [#57642](https://github.com/ClickHouse/ClickHouse/pull/57642) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Merge half of [#51113](https://github.com/ClickHouse/ClickHouse/issues/51113) [#57643](https://github.com/ClickHouse/ClickHouse/pull/57643) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Analyzer: Fix JOIN ON true with join_use_nulls [#57662](https://github.com/ClickHouse/ClickHouse/pull/57662) ([vdimir](https://github.com/vdimir)).
* Pin alpine version of integration tests helper container [#57669](https://github.com/ClickHouse/ClickHouse/pull/57669) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add support for system.stack_trace filtering optimizations for analyzer [#57682](https://github.com/ClickHouse/ClickHouse/pull/57682) ([Azat Khuzhin](https://github.com/azat)).
* test for [#33308](https://github.com/ClickHouse/ClickHouse/issues/33308) [#57693](https://github.com/ClickHouse/ClickHouse/pull/57693) ([Denny Crane](https://github.com/den-crane)).
* support keeper memory soft limit ratio [#57699](https://github.com/ClickHouse/ClickHouse/pull/57699) ([Han Fei](https://github.com/hanfei1991)).
* Fix test_dictionaries_update_and_reload/test.py::test_reload_while_loading flakiness [#57714](https://github.com/ClickHouse/ClickHouse/pull/57714) ([Azat Khuzhin](https://github.com/azat)).
* Tune autoscale to scale for single job in the queue [#57742](https://github.com/ClickHouse/ClickHouse/pull/57742) ([Max K.](https://github.com/mkaynov)).
* Tune network memory for dockerhub proxy hosts [#57744](https://github.com/ClickHouse/ClickHouse/pull/57744) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Parallel replicas: announcement response handling improvement [#57749](https://github.com/ClickHouse/ClickHouse/pull/57749) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix building Rust with Musl [#57756](https://github.com/ClickHouse/ClickHouse/pull/57756) ([Azat Khuzhin](https://github.com/azat)).
* Fix flaky test_parallel_replicas_distributed_read_from_all [#57757](https://github.com/ClickHouse/ClickHouse/pull/57757) ([Igor Nikonov](https://github.com/devcrafter)).
* Minor refactoring of toStartOfInterval() [#57761](https://github.com/ClickHouse/ClickHouse/pull/57761) ([Robert Schulze](https://github.com/rschu1ze)).
* Don't run test 02919_skip_lots_of_parsing_errors on aarch64 [#57762](https://github.com/ClickHouse/ClickHouse/pull/57762) ([Kruglov Pavel](https://github.com/Avogar)).
* More respect to `min_number_of_marks` in `ParallelReplicasReadingCoordinator` [#57763](https://github.com/ClickHouse/ClickHouse/pull/57763) ([Nikita Taranov](https://github.com/nickitat)).
* SerializationString reduce memory usage [#57787](https://github.com/ClickHouse/ClickHouse/pull/57787) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix ThreadSanitizer data race in librdkafka [#57791](https://github.com/ClickHouse/ClickHouse/pull/57791) ([Ilya Golshtein](https://github.com/ilejn)).
* Rename `system.async_loader` into `system.asynchronous_loader` [#57793](https://github.com/ClickHouse/ClickHouse/pull/57793) ([Sergei Trifonov](https://github.com/serxa)).
* Set replica number to its position in cluster definition [#57800](https://github.com/ClickHouse/ClickHouse/pull/57800) ([Nikita Taranov](https://github.com/nickitat)).
* fix clickhouse-client invocation in 02327_capnproto_protobuf_empty_messages [#57804](https://github.com/ClickHouse/ClickHouse/pull/57804) ([Mikhail Koviazin](https://github.com/mkmkme)).
* Fix flaky test_parallel_replicas_over_distributed [#57809](https://github.com/ClickHouse/ClickHouse/pull/57809) ([Igor Nikonov](https://github.com/devcrafter)).
* Revert [#57741](https://github.com/ClickHouse/ClickHouse/issues/57741) [#57811](https://github.com/ClickHouse/ClickHouse/pull/57811) ([Raúl Marín](https://github.com/Algunenano)).
* Dumb down `substring()` tests [#57821](https://github.com/ClickHouse/ClickHouse/pull/57821) ([Robert Schulze](https://github.com/rschu1ze)).
* Update version_date.tsv and changelogs after v23.11.2.11-stable [#57824](https://github.com/ClickHouse/ClickHouse/pull/57824) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Fix 02906_force_optimize_projection_name [#57826](https://github.com/ClickHouse/ClickHouse/pull/57826) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* ClickBench: slightly better [#57831](https://github.com/ClickHouse/ClickHouse/pull/57831) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix 02932_kill_query_sleep flakiness [#57849](https://github.com/ClickHouse/ClickHouse/pull/57849) ([Azat Khuzhin](https://github.com/azat)).
* Revert "Replace --no-system-tables with loading virtual tables of system database lazily" [#57851](https://github.com/ClickHouse/ClickHouse/pull/57851) ([Azat Khuzhin](https://github.com/azat)).
* Fix memory leak in StorageHDFS [#57860](https://github.com/ClickHouse/ClickHouse/pull/57860) ([Andrey Zvonov](https://github.com/zvonand)).
* Remove hardcoded clickhouse-client invocations from tests [#57861](https://github.com/ClickHouse/ClickHouse/pull/57861) ([Mikhail Koviazin](https://github.com/mkmkme)).
* Follow up to [#57568](https://github.com/ClickHouse/ClickHouse/issues/57568) [#57863](https://github.com/ClickHouse/ClickHouse/pull/57863) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix assertion in HashJoin [#57873](https://github.com/ClickHouse/ClickHouse/pull/57873) ([vdimir](https://github.com/vdimir)).
* More efficient constructor for SerializationEnum [#57887](https://github.com/ClickHouse/ClickHouse/pull/57887) ([Duc Canh Le](https://github.com/canhld94)).
* Fix test_unset_skip_unavailable_shards [#57895](https://github.com/ClickHouse/ClickHouse/pull/57895) ([Raúl Marín](https://github.com/Algunenano)).
* Add argument to fill the gap in cherry-pick [#57896](https://github.com/ClickHouse/ClickHouse/pull/57896) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Delete debug logging in OutputFormatWithUTF8ValidationAdaptor [#57899](https://github.com/ClickHouse/ClickHouse/pull/57899) ([Kruglov Pavel](https://github.com/Avogar)).
* Remove heavy rust stable toolchain [#57905](https://github.com/ClickHouse/ClickHouse/pull/57905) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Improvements for 00002_log_and_exception_messages_formatting [#57910](https://github.com/ClickHouse/ClickHouse/pull/57910) ([Raúl Marín](https://github.com/Algunenano)).
* Update CHANGELOG.md [#57911](https://github.com/ClickHouse/ClickHouse/pull/57911) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* remove cruft from TablesLoader [#57938](https://github.com/ClickHouse/ClickHouse/pull/57938) ([Bharat Nallan](https://github.com/bharatnc)).
* Fix `/dashboard` work with passwords [#57948](https://github.com/ClickHouse/ClickHouse/pull/57948) ([Sergei Trifonov](https://github.com/serxa)).
* Remove wrong test [#57950](https://github.com/ClickHouse/ClickHouse/pull/57950) ([Sergei Trifonov](https://github.com/serxa)).
* Fix docker image for integration tests (fixes CI) [#57952](https://github.com/ClickHouse/ClickHouse/pull/57952) ([Azat Khuzhin](https://github.com/azat)).
* Remove C++ templates (normalizeQuery) [#57963](https://github.com/ClickHouse/ClickHouse/pull/57963) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* A small fix for dashboard [#57964](https://github.com/ClickHouse/ClickHouse/pull/57964) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Always use `pread` for reading cache segments [#57970](https://github.com/ClickHouse/ClickHouse/pull/57970) ([Nikita Taranov](https://github.com/nickitat)).
* Improve some tests [#57973](https://github.com/ClickHouse/ClickHouse/pull/57973) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Revert "Merge pull request [#57907](https://github.com/ClickHouse/ClickHouse/issues/57907) from azat/system.stack_trace-rt_tgsigqueueinfo" [#57974](https://github.com/ClickHouse/ClickHouse/pull/57974) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add a test for [#49708](https://github.com/ClickHouse/ClickHouse/issues/49708) [#57979](https://github.com/ClickHouse/ClickHouse/pull/57979) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix style-check checkout head-ref [#57989](https://github.com/ClickHouse/ClickHouse/pull/57989) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* refine error message [#57991](https://github.com/ClickHouse/ClickHouse/pull/57991) ([Han Fei](https://github.com/hanfei1991)).
* CI for docs only fix [#57992](https://github.com/ClickHouse/ClickHouse/pull/57992) ([Max K.](https://github.com/mkaynov)).
* Replace rust's BLAKE3 with llvm's implementation [#57994](https://github.com/ClickHouse/ClickHouse/pull/57994) ([Raúl Marín](https://github.com/Algunenano)).
* Better trivial count optimization for storage `Merge` [#57996](https://github.com/ClickHouse/ClickHouse/pull/57996) ([Anton Popov](https://github.com/CurtizJ)).
* enhanced docs for `date_trunc()` [#58000](https://github.com/ClickHouse/ClickHouse/pull/58000) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* CI: add needs_changed_files flag for pr_info [#58003](https://github.com/ClickHouse/ClickHouse/pull/58003) ([Max K.](https://github.com/mkaynov)).
* more messages in ci [#58007](https://github.com/ClickHouse/ClickHouse/pull/58007) ([Sema Checherinda](https://github.com/CheSema)).
* Test parallel replicas with force_primary_key setting [#58010](https://github.com/ClickHouse/ClickHouse/pull/58010) ([Igor Nikonov](https://github.com/devcrafter)).
* Update 00002_log_and_exception_messages_formatting.sql [#58012](https://github.com/ClickHouse/ClickHouse/pull/58012) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix rare race in external sort/aggregation with temporary data in cache [#58013](https://github.com/ClickHouse/ClickHouse/pull/58013) ([Anton Popov](https://github.com/CurtizJ)).
* Fix segfault in FuzzJSON engine [#58015](https://github.com/ClickHouse/ClickHouse/pull/58015) ([Julia Kartseva](https://github.com/jkartseva)).
* fix freebsd build [#58019](https://github.com/ClickHouse/ClickHouse/pull/58019) ([Julia Kartseva](https://github.com/jkartseva)).
* Rename canUseParallelReplicas to canUseTaskBasedParallelReplicas [#58025](https://github.com/ClickHouse/ClickHouse/pull/58025) ([Raúl Marín](https://github.com/Algunenano)).
* Remove fixed tests from analyzer_tech_debt.txt [#58028](https://github.com/ClickHouse/ClickHouse/pull/58028) ([Raúl Marín](https://github.com/Algunenano)).
* More verbose errors on 00002_log_and_exception_messages_formatting [#58037](https://github.com/ClickHouse/ClickHouse/pull/58037) ([Raúl Marín](https://github.com/Algunenano)).
* Make window insert result into constant [#58045](https://github.com/ClickHouse/ClickHouse/pull/58045) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* CI: Happy new year [#58046](https://github.com/ClickHouse/ClickHouse/pull/58046) ([Raúl Marín](https://github.com/Algunenano)).
* Follow up for [#57691](https://github.com/ClickHouse/ClickHouse/issues/57691) [#58048](https://github.com/ClickHouse/ClickHouse/pull/58048) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* always run ast_fuzz and sqllancer [#58049](https://github.com/ClickHouse/ClickHouse/pull/58049) ([Max K.](https://github.com/mkaynov)).
* Add GH status for PR formating [#58050](https://github.com/ClickHouse/ClickHouse/pull/58050) ([Max K.](https://github.com/mkaynov)).
* Small improvement for SystemLogBase [#58051](https://github.com/ClickHouse/ClickHouse/pull/58051) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Bump Azure to v1.6.0 [#58052](https://github.com/ClickHouse/ClickHouse/pull/58052) ([Robert Schulze](https://github.com/rschu1ze)).
* Correct values for randomization [#58058](https://github.com/ClickHouse/ClickHouse/pull/58058) ([Anton Popov](https://github.com/CurtizJ)).
* Non post request should be readonly [#58060](https://github.com/ClickHouse/ClickHouse/pull/58060) ([San](https://github.com/santrancisco)).
* Revert "Merge pull request [#55710](https://github.com/ClickHouse/ClickHouse/issues/55710) from guoxiaolongzte/clickhouse-test… [#58066](https://github.com/ClickHouse/ClickHouse/pull/58066) ([Raúl Marín](https://github.com/Algunenano)).
* fix typo in the test 02479 [#58072](https://github.com/ClickHouse/ClickHouse/pull/58072) ([Sema Checherinda](https://github.com/CheSema)).
* Bump Azure to 1.7.2 [#58075](https://github.com/ClickHouse/ClickHouse/pull/58075) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix flaky test `02567_and_consistency` [#58076](https://github.com/ClickHouse/ClickHouse/pull/58076) ([Anton Popov](https://github.com/CurtizJ)).
* Fix Tests Bugfix Validate Check [#58078](https://github.com/ClickHouse/ClickHouse/pull/58078) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix for nightly job for digest-ci [#58079](https://github.com/ClickHouse/ClickHouse/pull/58079) ([Max K.](https://github.com/mkaynov)).
* Test for parallel replicas with remote() [#58081](https://github.com/ClickHouse/ClickHouse/pull/58081) ([Igor Nikonov](https://github.com/devcrafter)).
* Minor cosmetic changes [#58092](https://github.com/ClickHouse/ClickHouse/pull/58092) ([Raúl Marín](https://github.com/Algunenano)).
* Reintroduce OPTIMIZE CLEANUP as no-op [#58100](https://github.com/ClickHouse/ClickHouse/pull/58100) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add compatibility in the replication protocol for a removed feature [#58104](https://github.com/ClickHouse/ClickHouse/pull/58104) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Flaky 02922_analyzer_aggregate_nothing_type [#58105](https://github.com/ClickHouse/ClickHouse/pull/58105) ([Raúl Marín](https://github.com/Algunenano)).
* Update version_date.tsv and changelogs after v23.11.3.23-stable [#58106](https://github.com/ClickHouse/ClickHouse/pull/58106) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Limited CI on the master for docs only change [#58121](https://github.com/ClickHouse/ClickHouse/pull/58121) ([Max K.](https://github.com/mkaynov)).
* style fix [#58125](https://github.com/ClickHouse/ClickHouse/pull/58125) ([Max K.](https://github.com/mkaynov)).
* Support "do not test" label with ci.py [#58128](https://github.com/ClickHouse/ClickHouse/pull/58128) ([Max K.](https://github.com/mkaynov)).
* Use the single images list for integration tests everywhere [#58130](https://github.com/ClickHouse/ClickHouse/pull/58130) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Disable parallel replicas with IN (subquery) [#58133](https://github.com/ClickHouse/ClickHouse/pull/58133) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix clang-tidy [#58134](https://github.com/ClickHouse/ClickHouse/pull/58134) ([Raúl Marín](https://github.com/Algunenano)).
* Run build report check job on build failures, fix [#58135](https://github.com/ClickHouse/ClickHouse/pull/58135) ([Max K.](https://github.com/mkaynov)).
* Fix dashboard legend sorting and rows number [#58151](https://github.com/ClickHouse/ClickHouse/pull/58151) ([Sergei Trifonov](https://github.com/serxa)).
* Remove retryStrategy assignments overwritten in ClientFactory::create() [#58163](https://github.com/ClickHouse/ClickHouse/pull/58163) ([Daniel Pozo Escalona](https://github.com/danipozo)).
* Helper improvements [#58164](https://github.com/ClickHouse/ClickHouse/pull/58164) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Pass through exceptions for reading from S3 [#58165](https://github.com/ClickHouse/ClickHouse/pull/58165) ([Azat Khuzhin](https://github.com/azat)).
* [RFC] Adjust all std::ios implementations in poco to set failbit/badbit by default [#58166](https://github.com/ClickHouse/ClickHouse/pull/58166) ([Azat Khuzhin](https://github.com/azat)).
* Add bytes_uncompressed to system.part_log [#58167](https://github.com/ClickHouse/ClickHouse/pull/58167) ([Jordi Villar](https://github.com/jrdi)).
* Update docker/test/stateful/run.sh [#58168](https://github.com/ClickHouse/ClickHouse/pull/58168) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Update 00165_jit_aggregate_functions.sql [#58169](https://github.com/ClickHouse/ClickHouse/pull/58169) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Update clickhouse-test [#58170](https://github.com/ClickHouse/ClickHouse/pull/58170) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Profile event 'ParallelReplicasUsedCount' [#58173](https://github.com/ClickHouse/ClickHouse/pull/58173) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix flaky test `02719_aggregate_with_empty_string_key` [#58176](https://github.com/ClickHouse/ClickHouse/pull/58176) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix [#58171](https://github.com/ClickHouse/ClickHouse/issues/58171) [#58177](https://github.com/ClickHouse/ClickHouse/pull/58177) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add base backup name to system.backups and system.backup_log tables [#58178](https://github.com/ClickHouse/ClickHouse/pull/58178) ([Pradeep Chhetri](https://github.com/chhetripradeep)).
* Fix use-after-move [#58182](https://github.com/ClickHouse/ClickHouse/pull/58182) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Looking at strange code [#58196](https://github.com/ClickHouse/ClickHouse/pull/58196) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix all Exception with missing arguments [#58198](https://github.com/ClickHouse/ClickHouse/pull/58198) ([Azat Khuzhin](https://github.com/azat)).
* Fix leftover processes/hangs in tests [#58200](https://github.com/ClickHouse/ClickHouse/pull/58200) ([Azat Khuzhin](https://github.com/azat)).
* Fix DWARFBlockInputFormat failing on DWARF 5 unit address ranges [#58204](https://github.com/ClickHouse/ClickHouse/pull/58204) ([Michael Kolupaev](https://github.com/al13n321)).
* Fix error in archive reader [#58206](https://github.com/ClickHouse/ClickHouse/pull/58206) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix DWARFBlockInputFormat using wrong base address sometimes [#58208](https://github.com/ClickHouse/ClickHouse/pull/58208) ([Michael Kolupaev](https://github.com/al13n321)).
* Add support for specifying query parameters in the command line in clickhouse-local [#58210](https://github.com/ClickHouse/ClickHouse/pull/58210) ([Pradeep Chhetri](https://github.com/chhetripradeep)).
* Fix leftover processes/hangs in tests (resubmit) [#58213](https://github.com/ClickHouse/ClickHouse/pull/58213) ([Azat Khuzhin](https://github.com/azat)).
* Add optimization for AND notEquals chain in logical expression optimizer [#58214](https://github.com/ClickHouse/ClickHouse/pull/58214) ([Kevin Mingtarja](https://github.com/kevinmingtarja)).
* Fix syntax and doc [#58221](https://github.com/ClickHouse/ClickHouse/pull/58221) ([San](https://github.com/santrancisco)).
* Cleanup some known short messages [#58226](https://github.com/ClickHouse/ClickHouse/pull/58226) ([Raúl Marín](https://github.com/Algunenano)).
* Some code refactoring (was an attempt to improve build time, but failed) [#58237](https://github.com/ClickHouse/ClickHouse/pull/58237) ([Azat Khuzhin](https://github.com/azat)).
* Fix perf test README [#58245](https://github.com/ClickHouse/ClickHouse/pull/58245) ([Raúl Marín](https://github.com/Algunenano)).
* [Analyzer] Add test for [#57086](https://github.com/ClickHouse/ClickHouse/issues/57086) [#58249](https://github.com/ClickHouse/ClickHouse/pull/58249) ([Raúl Marín](https://github.com/Algunenano)).
* Reintroduce compatibility with `is_deleted` on a syntax level [#58251](https://github.com/ClickHouse/ClickHouse/pull/58251) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Avoid throwing ABORTED on normal situations [#58252](https://github.com/ClickHouse/ClickHouse/pull/58252) ([Raúl Marín](https://github.com/Algunenano)).
* Remove mayBenefitFromIndexForIn [#58265](https://github.com/ClickHouse/ClickHouse/pull/58265) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Allow a few retries when committing a part during shutdown [#58269](https://github.com/ClickHouse/ClickHouse/pull/58269) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Revert [#58267](https://github.com/ClickHouse/ClickHouse/issues/58267) [#58274](https://github.com/ClickHouse/ClickHouse/pull/58274) ([Alexey Milovidov](https://github.com/alexey-milovidov)).

View File

@ -1,206 +1,206 @@
---
slug: /en/development/build-cross-s390x
sidebar_position: 69
title: How to Build, Run and Debug ClickHouse on Linux for s390x (zLinux)
sidebar_label: Build on Linux for s390x (zLinux)
---
As of writing (2023/3/10) building for s390x considered to be experimental. Not all features can be enabled, has broken features and is currently under active development.
## Building
As s390x does not support boringssl, it uses OpenSSL and has two related build options.
- By default, the s390x build will dynamically link to OpenSSL libraries. It will build OpenSSL shared objects, so it's not necessary to install OpenSSL beforehand. (This option is recommended in all cases.)
- Another option is to build OpenSSL in-tree. In this case two build flags need to be supplied to cmake
```bash
-DENABLE_OPENSSL_DYNAMIC=0 -DENABLE_OPENSSL=1
```
These instructions assume that the host machine is x86_64 and has all the tooling required to build natively based on the [build instructions](../development/build.md). It also assumes that the host is Ubuntu 22.04 but the following instructions should also work on Ubuntu 20.04.
In addition to installing the tooling used to build natively, the following additional packages need to be installed:
```bash
apt-get install binutils-s390x-linux-gnu libc6-dev-s390x-cross gcc-s390x-linux-gnu binfmt-support qemu-user-static
```
If you wish to cross compile rust code install the rust cross compile target for s390x:
```bash
rustup target add s390x-unknown-linux-gnu
```
To build for s390x:
```bash
cmake -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-s390x.cmake ..
ninja
```
## Running
Once built, the binary can be run with, eg.:
```bash
qemu-s390x-static -L /usr/s390x-linux-gnu ./clickhouse
```
## Debugging
Install LLDB:
```bash
apt-get install lldb-15
```
To Debug a s390x executable, run clickhouse using QEMU in debug mode:
```bash
qemu-s390x-static -g 31338 -L /usr/s390x-linux-gnu ./clickhouse
```
In another shell run LLDB and attach, replace `<Clickhouse Parent Directory>` and `<build directory>` with the values corresponding to your environment.
```bash
lldb-15
(lldb) target create ./clickhouse
Current executable set to '/<Clickhouse Parent Directory>/ClickHouse/<build directory>/programs/clickhouse' (s390x).
(lldb) settings set target.source-map <build directory> /<Clickhouse Parent Directory>/ClickHouse
(lldb) gdb-remote 31338
Process 1 stopped
* thread #1, stop reason = signal SIGTRAP
frame #0: 0x0000004020e74cd0
-> 0x4020e74cd0: lgr %r2, %r15
0x4020e74cd4: aghi %r15, -160
0x4020e74cd8: xc 0(8,%r15), 0(%r15)
0x4020e74cde: brasl %r14, 275429939040
(lldb) b main
Breakpoint 1: 9 locations.
(lldb) c
Process 1 resuming
Process 1 stopped
* thread #1, stop reason = breakpoint 1.1
frame #0: 0x0000004005cd9fc0 clickhouse`main(argc_=1, argv_=0x0000004020e594a8) at main.cpp:450:17
447 #if !defined(FUZZING_MODE)
448 int main(int argc_, char ** argv_)
449 {
-> 450 inside_main = true;
451 SCOPE_EXIT({ inside_main = false; });
452
453 /// PHDR cache is required for query profiler to work reliably
```
## Visual Studio Code integration
- [CodeLLDB](https://github.com/vadimcn/vscode-lldb) extension is required for visual debugging.
- [Command Variable](https://github.com/rioj7/command-variable) extension can help dynamic launches if using [CMake Variants](https://github.com/microsoft/vscode-cmake-tools/blob/main/docs/variants.md).
- Make sure to set the backend to your LLVM installation eg. `"lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so"`
- Make sure to run the clickhouse executable in debug mode prior to launch. (It is also possible to create a `preLaunchTask` that automates this)
### Example configurations
#### cmake-variants.yaml
```yaml
buildType:
default: relwithdebinfo
choices:
debug:
short: Debug
long: Emit debug information
buildType: Debug
release:
short: Release
long: Optimize generated code
buildType: Release
relwithdebinfo:
short: RelWithDebInfo
long: Release with Debug Info
buildType: RelWithDebInfo
tsan:
short: MinSizeRel
long: Minimum Size Release
buildType: MinSizeRel
toolchain:
default: default
description: Select toolchain
choices:
default:
short: x86_64
long: x86_64
s390x:
short: s390x
long: s390x
settings:
CMAKE_TOOLCHAIN_FILE: cmake/linux/toolchain-s390x.cmake
```
#### launch.json
```json
{
"version": "0.2.0",
"configurations": [
{
"type": "lldb",
"request": "custom",
"name": "(lldb) Launch s390x with qemu",
"targetCreateCommands": ["target create ${command:cmake.launchTargetPath}"],
"processCreateCommands": ["gdb-remote 2159"],
"preLaunchTask": "Run ClickHouse"
}
]
}
```
#### settings.json
This would also put different builds under different subfolders of the `build` folder.
```json
{
"cmake.buildDirectory": "${workspaceFolder}/build/${buildKitVendor}-${buildKitVersion}-${variant:toolchain}-${variant:buildType}",
"lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so"
}
```
#### run-debug.sh
```sh
#! /bin/sh
echo 'Starting debugger session'
cd $1
qemu-s390x-static -g 2159 -L /usr/s390x-linux-gnu $2 $3 $4
```
#### tasks.json
Defines a task to run the compiled executable in `server` mode under a `tmp` folder next to the binaries, with configuration from under `programs/server/config.xml`.
```json
{
"version": "2.0.0",
"tasks": [
{
"label": "Run ClickHouse",
"type": "shell",
"isBackground": true,
"command": "${workspaceFolder}/.vscode/run-debug.sh",
"args": [
"${command:cmake.launchTargetDirectory}/tmp",
"${command:cmake.launchTargetPath}",
"server",
"--config-file=${workspaceFolder}/programs/server/config.xml"
],
"problemMatcher": [
{
"pattern": [
{
"regexp": ".",
"file": 1,
"location": 2,
"message": 3
}
],
"background": {
"activeOnStart": true,
"beginsPattern": "^Starting debugger session",
"endsPattern": ".*"
}
}
]
}
]
}
```
---
slug: /en/development/build-cross-s390x
sidebar_position: 69
title: How to Build, Run and Debug ClickHouse on Linux for s390x (zLinux)
sidebar_label: Build on Linux for s390x (zLinux)
---
As of writing (2023/3/10) building for s390x considered to be experimental. Not all features can be enabled, has broken features and is currently under active development.
## Building
As s390x does not support boringssl, it uses OpenSSL and has two related build options.
- By default, the s390x build will dynamically link to OpenSSL libraries. It will build OpenSSL shared objects, so it's not necessary to install OpenSSL beforehand. (This option is recommended in all cases.)
- Another option is to build OpenSSL in-tree. In this case two build flags need to be supplied to cmake
```bash
-DENABLE_OPENSSL_DYNAMIC=0 -DENABLE_OPENSSL=1
```
These instructions assume that the host machine is x86_64 and has all the tooling required to build natively based on the [build instructions](../development/build.md). It also assumes that the host is Ubuntu 22.04 but the following instructions should also work on Ubuntu 20.04.
In addition to installing the tooling used to build natively, the following additional packages need to be installed:
```bash
apt-get install binutils-s390x-linux-gnu libc6-dev-s390x-cross gcc-s390x-linux-gnu binfmt-support qemu-user-static
```
If you wish to cross compile rust code install the rust cross compile target for s390x:
```bash
rustup target add s390x-unknown-linux-gnu
```
To build for s390x:
```bash
cmake -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-s390x.cmake ..
ninja
```
## Running
Once built, the binary can be run with, eg.:
```bash
qemu-s390x-static -L /usr/s390x-linux-gnu ./clickhouse
```
## Debugging
Install LLDB:
```bash
apt-get install lldb-15
```
To Debug a s390x executable, run clickhouse using QEMU in debug mode:
```bash
qemu-s390x-static -g 31338 -L /usr/s390x-linux-gnu ./clickhouse
```
In another shell run LLDB and attach, replace `<Clickhouse Parent Directory>` and `<build directory>` with the values corresponding to your environment.
```bash
lldb-15
(lldb) target create ./clickhouse
Current executable set to '/<Clickhouse Parent Directory>/ClickHouse/<build directory>/programs/clickhouse' (s390x).
(lldb) settings set target.source-map <build directory> /<Clickhouse Parent Directory>/ClickHouse
(lldb) gdb-remote 31338
Process 1 stopped
* thread #1, stop reason = signal SIGTRAP
frame #0: 0x0000004020e74cd0
-> 0x4020e74cd0: lgr %r2, %r15
0x4020e74cd4: aghi %r15, -160
0x4020e74cd8: xc 0(8,%r15), 0(%r15)
0x4020e74cde: brasl %r14, 275429939040
(lldb) b main
Breakpoint 1: 9 locations.
(lldb) c
Process 1 resuming
Process 1 stopped
* thread #1, stop reason = breakpoint 1.1
frame #0: 0x0000004005cd9fc0 clickhouse`main(argc_=1, argv_=0x0000004020e594a8) at main.cpp:450:17
447 #if !defined(FUZZING_MODE)
448 int main(int argc_, char ** argv_)
449 {
-> 450 inside_main = true;
451 SCOPE_EXIT({ inside_main = false; });
452
453 /// PHDR cache is required for query profiler to work reliably
```
## Visual Studio Code integration
- [CodeLLDB](https://github.com/vadimcn/vscode-lldb) extension is required for visual debugging.
- [Command Variable](https://github.com/rioj7/command-variable) extension can help dynamic launches if using [CMake Variants](https://github.com/microsoft/vscode-cmake-tools/blob/main/docs/variants.md).
- Make sure to set the backend to your LLVM installation eg. `"lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so"`
- Make sure to run the clickhouse executable in debug mode prior to launch. (It is also possible to create a `preLaunchTask` that automates this)
### Example configurations
#### cmake-variants.yaml
```yaml
buildType:
default: relwithdebinfo
choices:
debug:
short: Debug
long: Emit debug information
buildType: Debug
release:
short: Release
long: Optimize generated code
buildType: Release
relwithdebinfo:
short: RelWithDebInfo
long: Release with Debug Info
buildType: RelWithDebInfo
tsan:
short: MinSizeRel
long: Minimum Size Release
buildType: MinSizeRel
toolchain:
default: default
description: Select toolchain
choices:
default:
short: x86_64
long: x86_64
s390x:
short: s390x
long: s390x
settings:
CMAKE_TOOLCHAIN_FILE: cmake/linux/toolchain-s390x.cmake
```
#### launch.json
```json
{
"version": "0.2.0",
"configurations": [
{
"type": "lldb",
"request": "custom",
"name": "(lldb) Launch s390x with qemu",
"targetCreateCommands": ["target create ${command:cmake.launchTargetPath}"],
"processCreateCommands": ["gdb-remote 2159"],
"preLaunchTask": "Run ClickHouse"
}
]
}
```
#### settings.json
This would also put different builds under different subfolders of the `build` folder.
```json
{
"cmake.buildDirectory": "${workspaceFolder}/build/${buildKitVendor}-${buildKitVersion}-${variant:toolchain}-${variant:buildType}",
"lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so"
}
```
#### run-debug.sh
```sh
#! /bin/sh
echo 'Starting debugger session'
cd $1
qemu-s390x-static -g 2159 -L /usr/s390x-linux-gnu $2 $3 $4
```
#### tasks.json
Defines a task to run the compiled executable in `server` mode under a `tmp` folder next to the binaries, with configuration from under `programs/server/config.xml`.
```json
{
"version": "2.0.0",
"tasks": [
{
"label": "Run ClickHouse",
"type": "shell",
"isBackground": true,
"command": "${workspaceFolder}/.vscode/run-debug.sh",
"args": [
"${command:cmake.launchTargetDirectory}/tmp",
"${command:cmake.launchTargetPath}",
"server",
"--config-file=${workspaceFolder}/programs/server/config.xml"
],
"problemMatcher": [
{
"pattern": [
{
"regexp": ".",
"file": 1,
"location": 2,
"message": 3
}
],
"background": {
"activeOnStart": true,
"beginsPattern": "^Starting debugger session",
"endsPattern": ".*"
}
}
]
}
]
}
```

View File

@ -25,6 +25,7 @@ ClickHouse server provides embedded visual interfaces for power users:
- Play UI: open `/play` in the browser;
- Advanced Dashboard: open `/dashboard` in the browser;
- Binary symbols viewer for ClickHouse engineers: open `/binary` in the browser;
There are also a wide range of third-party libraries for working with ClickHouse:

View File

@ -29,8 +29,8 @@ Transactionally inconsistent caching is traditionally provided by client tools o
the same caching logic and configuration is often duplicated. With ClickHouse's query cache, the caching logic moves to the server side.
This reduces maintenance effort and avoids redundancy.
:::security consideration
The cached query result is tied to the user executing it. Authorization checks are performed when the query is executed. This means that if there are any alterations to the user's role or permissions between the time the query is cached and when the cache is accessed, the result will not reflect these changes. We recommend using different users to distinguish between different levels of access, instead of actively toggling roles for a single user between queries, as this practice may lead to unexpected query results.
:::note
Security consideration: The cached query result is tied to the user executing it. Authorization checks are performed when the query is executed. This means that if there are any alterations to the user's role or permissions between the time the query is cached and when the cache is accessed, the result will not reflect these changes. We recommend using different users to distinguish between different levels of access, instead of actively toggling roles for a single user between queries, as this practice may lead to unexpected query results.
:::
## Configuration Settings and Usage

View File

@ -239,6 +239,10 @@ The amount of virtual memory mapped for the pages of machine code of the server
The amount of virtual memory mapped for the use of stack and for the allocated memory, in bytes. It is unspecified whether it includes the per-thread stacks and most of the allocated memory, that is allocated with the 'mmap' system call. This metric exists only for completeness reasons. I recommend to use the `MemoryResident` metric for monitoring.
### MemoryResidentMax
Maximum amount of physical memory used by the server process, in bytes.
### MemoryResident
The amount of physical memory used by the server process, in bytes.
@ -547,6 +551,14 @@ Total amount of bytes (compressed, including data and indices) stored in all tab
Total amount of data parts in all tables of MergeTree family. Numbers larger than 10 000 will negatively affect the server startup time and it may indicate unreasonable choice of the partition key.
### TotalPrimaryKeyBytesInMemory
The total amount of memory (in bytes) used by primary key values (only takes active parts into account).
### TotalPrimaryKeyBytesInMemoryAllocated
The total amount of memory (in bytes) reserved for primary key values (only takes active parts into account).
### TotalRowsOfMergeTreeTables
Total amount of rows (records) stored in all tables of MergeTree family.

View File

@ -0,0 +1,43 @@
---
slug: /en/operations/system-tables/view_refreshes
---
# view_refreshes
Information about [Refreshable Materialized Views](../../sql-reference/statements/create/view.md#refreshable-materialized-view). Contains all refreshable materialized views, regardless of whether there's a refresh in progress or not.
Columns:
- `database` ([String](../../sql-reference/data-types/string.md)) — The name of the database the table is in.
- `view` ([String](../../sql-reference/data-types/string.md)) — Table name.
- `status` ([String](../../sql-reference/data-types/string.md)) — Current state of the refresh.
- `last_refresh_result` ([String](../../sql-reference/data-types/string.md)) — Outcome of the latest refresh attempt.
- `last_refresh_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Time of the last refresh attempt. `NULL` if no refresh attempts happened since server startup or table creation.
- `last_success_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Time of the last successful refresh. `NULL` if no successful refreshes happened since server startup or table creation.
- `duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — How long the last refresh attempt took.
- `next_refresh_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Time at which the next refresh is scheduled to start.
- `remaining_dependencies` ([Array(String)](../../sql-reference/data-types/array.md)) — If the view has [refresh dependencies](../../sql-reference/statements/create/view.md#refresh-dependencies), this array contains the subset of those dependencies that are not satisfied for the current refresh yet. If `status = 'WaitingForDependencies'`, a refresh is ready to start as soon as these dependencies are fulfilled.
- `exception` ([String](../../sql-reference/data-types/string.md)) — if `last_refresh_result = 'Exception'`, i.e. the last refresh attempt failed, this column contains the corresponding error message and stack trace.
- `refresh_count` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of successful refreshes since last server restart or table creation.
- `progress` ([Float64](../../sql-reference/data-types/float.md)) — Progress of the current refresh, between 0 and 1.
- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of rows read by the current refresh so far.
- `total_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Estimated total number of rows that need to be read by the current refresh.
(There are additional columns related to current refresh progress, but they are currently unreliable.)
**Example**
```sql
SELECT
database,
view,
status,
last_refresh_result,
last_refresh_time,
next_refresh_time
FROM system.view_refreshes
┌─database─┬─view───────────────────────┬─status────┬─last_refresh_result─┬───last_refresh_time─┬───next_refresh_time─┐
│ default │ hello_documentation_reader │ Scheduled │ Finished │ 2023-12-01 01:24:00 │ 2023-12-01 01:25:00 │
└──────────┴────────────────────────────┴───────────┴─────────────────────┴─────────────────────┴─────────────────────┘
```

View File

@ -1,62 +1,64 @@
---
slug: /en/sql-reference/aggregate-functions/reference/sparkbar
sidebar_position: 311
sidebar_label: sparkbar
---
# sparkbar
The function plots a frequency histogram for values `x` and the repetition rate `y` of these values over the interval `[min_x, max_x]`.
Repetitions for all `x` falling into the same bucket are averaged, so data should be pre-aggregated.
Negative repetitions are ignored.
If no interval is specified, then the minimum `x` is used as the interval start, and the maximum `x` — as the interval end.
Otherwise, values outside the interval are ignored.
**Syntax**
``` sql
sparkbar(buckets[, min_x, max_x])(x, y)
```
**Parameters**
- `buckets` — The number of segments. Type: [Integer](../../../sql-reference/data-types/int-uint.md).
- `min_x` — The interval start. Optional parameter.
- `max_x` — The interval end. Optional parameter.
**Arguments**
- `x` — The field with values.
- `y` — The field with the frequency of values.
**Returned value**
- The frequency histogram.
**Example**
Query:
``` sql
CREATE TABLE spark_bar_data (`value` Int64, `event_date` Date) ENGINE = MergeTree ORDER BY event_date;
INSERT INTO spark_bar_data VALUES (1,'2020-01-01'), (3,'2020-01-02'), (4,'2020-01-02'), (-3,'2020-01-02'), (5,'2020-01-03'), (2,'2020-01-04'), (3,'2020-01-05'), (7,'2020-01-06'), (6,'2020-01-07'), (8,'2020-01-08'), (2,'2020-01-11');
SELECT sparkbar(9)(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date);
SELECT sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date);
```
Result:
``` text
┌─sparkbar(9)(event_date, cnt)─┐
│ ▂▅▂▃▆█ ▂ │
└──────────────────────────────┘
┌─sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date, cnt)─┐
│ ▂▅▂▃▇▆█ │
└──────────────────────────────────────────────────────────────────────────┘
```
---
slug: /en/sql-reference/aggregate-functions/reference/sparkbar
sidebar_position: 311
sidebar_label: sparkbar
---
# sparkbar
The function plots a frequency histogram for values `x` and the repetition rate `y` of these values over the interval `[min_x, max_x]`.
Repetitions for all `x` falling into the same bucket are averaged, so data should be pre-aggregated.
Negative repetitions are ignored.
If no interval is specified, then the minimum `x` is used as the interval start, and the maximum `x` — as the interval end.
Otherwise, values outside the interval are ignored.
**Syntax**
``` sql
sparkbar(buckets[, min_x, max_x])(x, y)
```
**Parameters**
- `buckets` — The number of segments. Type: [Integer](../../../sql-reference/data-types/int-uint.md).
- `min_x` — The interval start. Optional parameter.
- `max_x` — The interval end. Optional parameter.
**Arguments**
- `x` — The field with values.
- `y` — The field with the frequency of values.
**Returned value**
- The frequency histogram.
**Example**
Query:
``` sql
CREATE TABLE spark_bar_data (`value` Int64, `event_date` Date) ENGINE = MergeTree ORDER BY event_date;
INSERT INTO spark_bar_data VALUES (1,'2020-01-01'), (3,'2020-01-02'), (4,'2020-01-02'), (-3,'2020-01-02'), (5,'2020-01-03'), (2,'2020-01-04'), (3,'2020-01-05'), (7,'2020-01-06'), (6,'2020-01-07'), (8,'2020-01-08'), (2,'2020-01-11');
SELECT sparkbar(9)(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date);
SELECT sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date);
```
Result:
``` text
┌─sparkbar(9)(event_date, cnt)─┐
│ ▂▅▂▃▆█ ▂ │
└──────────────────────────────┘
┌─sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date, cnt)─┐
│ ▂▅▂▃▇▆█ │
└──────────────────────────────────────────────────────────────────────────┘
```
The alias for this function is sparkBar.

View File

@ -1383,71 +1383,6 @@ Result:
└──────────────────┘
```
## punycodeEncode
Returns the [Punycode](https://en.wikipedia.org/wiki/Punycode) of a string.
The string must be UTF8-encoded, otherwise results are undefined.
**Syntax**
``` sql
punycodeEncode(val)
```
**Arguments**
- `val` - Input value. [String](../data-types/string.md)
**Returned value**
- A Punycode representation of the input value. [String](../data-types/string.md)
**Example**
``` sql
select punycodeEncode('München');
```
Result:
```result
┌─punycodeEncode('München')─┐
│ Mnchen-3ya │
└───────────────────────────┘
```
## punycodeDecode
Returns the UTF8-encoded plaintext of a [Punycode](https://en.wikipedia.org/wiki/Punycode)-encoded string.
**Syntax**
``` sql
punycodeEncode(val)
```
**Arguments**
- `val` - Punycode-encoded string. [String](../data-types/string.md)
**Returned value**
- The plaintext of the input value. [String](../data-types/string.md)
**Example**
``` sql
select punycodeDecode('Mnchen-3ya');
```
Result:
```result
┌─punycodeEncode('Mnchen-3ya')─┐
│ München │
└──────────────────────────────┘
```
## byteHammingDistance
Calculates the [hamming distance](https://en.wikipedia.org/wiki/Hamming_distance) between two byte strings.

View File

@ -6,28 +6,28 @@ sidebar_label: VIEW
# ALTER TABLE … MODIFY QUERY Statement
You can modify `SELECT` query that was specified when a [materialized view](../create/view.md#materialized) was created with the `ALTER TABLE … MODIFY QUERY` statement without interrupting ingestion process.
You can modify `SELECT` query that was specified when a [materialized view](../create/view.md#materialized) was created with the `ALTER TABLE … MODIFY QUERY` statement without interrupting ingestion process.
The `allow_experimental_alter_materialized_view_structure` setting must be enabled.
The `allow_experimental_alter_materialized_view_structure` setting must be enabled.
This command is created to change materialized view created with `TO [db.]name` clause. It does not change the structure of the underling storage table and it does not change the columns' definition of the materialized view, because of this the application of this command is very limited for materialized views are created without `TO [db.]name` clause.
**Example with TO table**
```sql
CREATE TABLE events (ts DateTime, event_type String)
CREATE TABLE events (ts DateTime, event_type String)
ENGINE = MergeTree ORDER BY (event_type, ts);
CREATE TABLE events_by_day (ts DateTime, event_type String, events_cnt UInt64)
CREATE TABLE events_by_day (ts DateTime, event_type String, events_cnt UInt64)
ENGINE = SummingMergeTree ORDER BY (event_type, ts);
CREATE MATERIALIZED VIEW mv TO events_by_day AS
CREATE MATERIALIZED VIEW mv TO events_by_day AS
SELECT toStartOfDay(ts) ts, event_type, count() events_cnt
FROM events
GROUP BY ts, event_type;
GROUP BY ts, event_type;
INSERT INTO events
SELECT Date '2020-01-01' + interval number * 900 second,
INSERT INTO events
SELECT Date '2020-01-01' + interval number * 900 second,
['imp', 'click'][number%2+1]
FROM numbers(100);
@ -43,23 +43,23 @@ ORDER BY ts, event_type;
│ 2020-01-02 00:00:00 │ imp │ 2 │
└─────────────────────┴────────────┴─────────────────┘
-- Let's add the new measurment `cost`
-- Let's add the new measurment `cost`
-- and the new dimension `browser`.
ALTER TABLE events
ALTER TABLE events
ADD COLUMN browser String,
ADD COLUMN cost Float64;
-- Column do not have to match in a materialized view and TO
-- (destination table), so the next alter does not break insertion.
ALTER TABLE events_by_day
ALTER TABLE events_by_day
ADD COLUMN cost Float64,
ADD COLUMN browser String after event_type,
MODIFY ORDER BY (event_type, ts, browser);
INSERT INTO events
SELECT Date '2020-01-02' + interval number * 900 second,
INSERT INTO events
SELECT Date '2020-01-02' + interval number * 900 second,
['imp', 'click'][number%2+1],
['firefox', 'safary', 'chrome'][number%3+1],
10/(number+1)%33
@ -82,16 +82,16 @@ ORDER BY ts, event_type;
└─────────────────────┴────────────┴─────────┴────────────┴──────┘
SET allow_experimental_alter_materialized_view_structure=1;
ALTER TABLE mv MODIFY QUERY
ALTER TABLE mv MODIFY QUERY
SELECT toStartOfDay(ts) ts, event_type, browser,
count() events_cnt,
sum(cost) cost
FROM events
GROUP BY ts, event_type, browser;
INSERT INTO events
SELECT Date '2020-01-03' + interval number * 900 second,
INSERT INTO events
SELECT Date '2020-01-03' + interval number * 900 second,
['imp', 'click'][number%2+1],
['firefox', 'safary', 'chrome'][number%3+1],
10/(number+1)%33
@ -138,7 +138,7 @@ PRIMARY KEY (event_type, ts)
ORDER BY (event_type, ts, browser)
SETTINGS index_granularity = 8192
-- !!! The columns' definition is unchanged but it does not matter, we are not quering
-- !!! The columns' definition is unchanged but it does not matter, we are not quering
-- MATERIALIZED VIEW, we are quering TO (storage) table.
-- SELECT section is updated.
@ -169,7 +169,7 @@ The application is very limited because you can only change the `SELECT` section
```sql
CREATE TABLE src_table (`a` UInt32) ENGINE = MergeTree ORDER BY a;
CREATE MATERIALIZED VIEW mv (`a` UInt32) ENGINE = MergeTree ORDER BY a AS SELECT a FROM src_table;
CREATE MATERIALIZED VIEW mv (`a` UInt32) ENGINE = MergeTree ORDER BY a AS SELECT a FROM src_table;
INSERT INTO src_table (a) VALUES (1), (2);
SELECT * FROM mv;
```
@ -199,3 +199,7 @@ SELECT * FROM mv;
## ALTER LIVE VIEW Statement
`ALTER LIVE VIEW ... REFRESH` statement refreshes a [Live view](../create/view.md#live-view). See [Force Live View Refresh](../create/view.md#live-view-alter-refresh).
## ALTER TABLE … MODIFY REFRESH Statement
`ALTER TABLE ... MODIFY REFRESH` statement changes refresh parameters of a [Refreshable Materialized View](../create/view.md#refreshable-materialized-view). See [Changing Refresh Parameters](../create/view.md#changing-refresh-parameters).

View File

@ -37,6 +37,7 @@ SELECT a, b, c FROM (SELECT ...)
```
## Parameterized View
Parametrized views are similar to normal views, but can be created with parameters which are not resolved immediately. These views can be used with table functions, which specify the name of the view as function name and the parameter values as its arguments.
``` sql
@ -66,7 +67,7 @@ When creating a materialized view with `TO [db].[table]`, you can't also use `PO
A materialized view is implemented as follows: when inserting data to the table specified in `SELECT`, part of the inserted data is converted by this `SELECT` query, and the result is inserted in the view.
:::note
:::note
Materialized views in ClickHouse use **column names** instead of column order during insertion into destination table. If some column names are not present in the `SELECT` query result, ClickHouse uses a default value, even if the column is not [Nullable](../../data-types/nullable.md). A safe practice would be to add aliases for every column when using Materialized views.
Materialized views in ClickHouse are implemented more like insert triggers. If theres some aggregation in the view query, its applied only to the batch of freshly inserted data. Any changes to existing data of source table (like update, delete, drop partition, etc.) does not change the materialized view.
@ -96,9 +97,116 @@ This feature is deprecated and will be removed in the future.
For your convenience, the old documentation is located [here](https://pastila.nl/?00f32652/fdf07272a7b54bda7e13b919264e449f.md)
## Refreshable Materialized View {#refreshable-materialized-view}
```sql
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name
REFRESH EVERY|AFTER interval [OFFSET interval]
RANDOMIZE FOR interval
DEPENDS ON [db.]name [, [db.]name [, ...]]
[TO[db.]name] [(columns)] [ENGINE = engine] [EMPTY]
AS SELECT ...
```
where `interval` is a sequence of simple intervals:
```sql
number SECOND|MINUTE|HOUR|DAY|WEEK|MONTH|YEAR
```
Periodically runs the corresponding query and stores its result in a table, atomically replacing the table's previous contents.
Differences from regular non-refreshable materialized views:
* No insert trigger. I.e. when new data is inserted into the table specified in SELECT, it's *not* automatically pushed to the refreshable materialized view. The periodic refresh runs the entire query and replaces the entire table.
* No restrictions on the SELECT query. Table functions (e.g. `url()`), views, UNION, JOIN, are all allowed.
:::note
Refreshable materialized views are a work in progress. Setting `allow_experimental_refreshable_materialized_view = 1` is required for creating one. Current limitations:
* not compatible with Replicated database or table engines,
* require [Atomic database engine](../../../engines/database-engines/atomic.md),
* no retries for failed refresh - we just skip to the next scheduled refresh time,
* no limit on number of concurrent refreshes.
:::
### Refresh Schedule
Example refresh schedules:
```sql
REFRESH EVERY 1 DAY -- every day, at midnight (UTC)
REFRESH EVERY 1 MONTH -- on 1st day of every month, at midnight
REFRESH EVERY 1 MONTH OFFSET 5 DAY 2 HOUR -- on 6th day of every month, at 2:00 am
REFRESH EVERY 2 WEEK OFFSET 5 DAY 15 HOUR 10 MINUTE -- every other Saturday, at 3:10 pm
REFRESH EVERY 30 MINUTE -- at 00:00, 00:30, 01:00, 01:30, etc
REFRESH AFTER 30 MINUTE -- 30 minutes after the previous refresh completes, no alignment with time of day
-- REFRESH AFTER 1 HOUR OFFSET 1 MINUTE -- syntax errror, OFFSET is not allowed with AFTER
```
`RANDOMIZE FOR` randomly adjusts the time of each refresh, e.g.:
```sql
REFRESH EVERY 1 DAY OFFSET 2 HOUR RANDOMIZE FOR 1 HOUR -- every day at random time between 01:30 and 02:30
```
At most one refresh may be running at a time, for a given view. E.g. if a view with `REFRESH EVERY 1 MINUTE` takes 2 minutes to refresh, it'll just be refreshing every 2 minutes. If it then becomes faster and starts refreshing in 10 seconds, it'll go back to refreshing every minute. (In particular, it won't refresh every 10 seconds to catch up with a backlog of missed refreshes - there's no such backlog.)
Additionally, a refresh is started immediately after the materialized view is created, unless `EMPTY` is specified in the `CREATE` query. If `EMPTY` is specified, the first refresh happens according to schedule.
### Dependencies {#refresh-dependencies}
`DEPENDS ON` synchronizes refreshes of different tables. By way of example, suppose there's a chain of two refreshable materialized views:
```sql
CREATE MATERIALIZED VIEW source REFRESH EVERY 1 DAY AS SELECT * FROM url(...)
CREATE MATERIALIZED VIEW destination REFRESH EVERY 1 DAY AS SELECT ... FROM source
```
Without `DEPENDS ON`, both views will start a refresh at midnight, and `destination` typically will see yesterday's data in `source`. If we add dependency:
```
CREATE MATERIALIZED VIEW destination REFRESH EVERY 1 DAY DEPENDS ON source AS SELECT ... FROM source
```
then `destination`'s refresh will start only after `source`'s refresh finished for that day, so `destination` will be based on fresh data.
Alternatively, the same result can be achieved with:
```
CREATE MATERIALIZED VIEW destination REFRESH AFTER 1 HOUR DEPENDS ON source AS SELECT ... FROM source
```
where `1 HOUR` can be any duration less than `source`'s refresh period. The dependent table won't be refreshed more frequently than any of its dependencies. This is a valid way to set up a chain of refreshable views without specifying the real refresh period more than once.
A few more examples:
* `REFRESH EVERY 1 DAY OFFSET 10 MINUTE` (`destination`) depends on `REFRESH EVERY 1 DAY` (`source`)<br/>
If `source` refresh takes more than 10 minutes, `destination` will wait for it.
* `REFRESH EVERY 1 DAY OFFSET 1 HOUR` depends on `REFRESH EVERY 1 DAY OFFSET 23 HOUR`<br/>
Similar to the above, even though the corresponding refreshes happen on different calendar days.
`destination`'s refresh on day X+1 will wait for `source`'s refresh on day X (if it takes more than 2 hours).
* `REFRESH EVERY 2 HOUR` depends on `REFRESH EVERY 1 HOUR`<br/>
The 2 HOUR refresh happens after the 1 HOUR refresh for every other hour, e.g. after the midnight
refresh, then after the 2am refresh, etc.
* `REFRESH EVERY 1 MINUTE` depends on `REFRESH EVERY 2 HOUR`<br/>
`REFRESH AFTER 1 MINUTE` depends on `REFRESH EVERY 2 HOUR`<br/>
`REFRESH AFTER 1 MINUTE` depends on `REFRESH AFTER 2 HOUR`<br/>
`destination` is refreshed once after every `source` refresh, i.e. every 2 hours. The `1 MINUTE` is effectively ignored.
* `REFRESH AFTER 1 HOUR` depends on `REFRESH AFTER 1 HOUR`<br/>
Currently this is not recommended.
:::note
`DEPENDS ON` only works between refreshable materialized views. Listing a regular table in the `DEPENDS ON` list will prevent the view from ever refreshing (dependencies can be removed with `ALTER`, see below).
:::
### Changing Refresh Parameters {#changing-refresh-parameters}
To change refresh parameters:
```
ALTER TABLE [db.]name MODIFY REFRESH EVERY|AFTER ... [RANDOMIZE FOR ...] [DEPENDS ON ...]
```
:::note
This replaces refresh schedule *and* dependencies. If the table had a `DEPENDS ON`, doing a `MODIFY REFRESH` without `DEPENDS ON` will remove the dependencies.
:::
### Other operations
The status of all refreshable materialized views is available in table [`system.view_refreshes`](../../../operations/system-tables/view_refreshes.md). In particular, it contains refresh progress (if running), last and next refresh time, exception message if a refresh failed.
To manually stop, start, trigger, or cancel refreshes use [`SYSTEM STOP|START|REFRESH|CANCEL VIEW`](../system.md#refreshable-materialized-views).
## Window View [Experimental]
:::info
:::info
This is an experimental feature that may change in backwards-incompatible ways in the future releases. Enable usage of window views and `WATCH` query using [allow_experimental_window_view](../../../operations/settings/settings.md#allow-experimental-window-view) setting. Input the command `set allow_experimental_window_view = 1`.
:::

View File

@ -11,7 +11,7 @@ Inserts data into a table.
**Syntax**
``` sql
INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] [SETTINGS ...] VALUES (v11, v12, v13), (v21, v22, v23), ...
```
You can specify a list of columns to insert using the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#except-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier).
@ -126,7 +126,7 @@ To insert a default value instead of `NULL` into a column with not nullable data
**Syntax**
``` sql
INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] FORMAT format_name
INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] [SETTINGS ...] [FORMAT format_name]
```
Use the syntax above to insert data from a file, or files, stored on the **client** side. `file_name` and `type` are string literals. Input file [format](../../interfaces/formats.md) must be set in the `FORMAT` clause.

View File

@ -34,7 +34,7 @@ Queries that use `FINAL` are executed slightly slower than similar queries that
- Data is merged during query execution.
- Queries with `FINAL` read primary key columns in addition to the columns specified in the query.
**In most cases, avoid using `FINAL`.** The common approach is to use different queries that assume the background processes of the `MergeTree` engine havent happened yet and deal with it by applying aggregation (for example, to discard duplicates).
`FINAL` requires additional compute and memory resources, as the processing that normally would occur at merge time must occur in memory at the time of the query. However, using FINAL is sometimes necessary in order to produce accurate results, and is less expensive than running `OPTIMIZE` to force a merge. It is also sometimes possible to use different queries that assume the background processes of the `MergeTree` engine havent happened yet and deal with it by applying aggregation (for example, to discard duplicates). If you need to use FINAL in your queries in order to get the required results, then it is okay to do so but be aware of the additional processing required.
`FINAL` can be applied automatically using [FINAL](../../../operations/settings/settings.md#final) setting to all tables in a query using a session or a user profile.

View File

@ -43,6 +43,7 @@ Additional join types available in ClickHouse:
- `LEFT ANTI JOIN` and `RIGHT ANTI JOIN`, a blacklist on “join keys”, without producing a cartesian product.
- `LEFT ANY JOIN`, `RIGHT ANY JOIN` and `INNER ANY JOIN`, partially (for opposite side of `LEFT` and `RIGHT`) or completely (for `INNER` and `FULL`) disables the cartesian product for standard `JOIN` types.
- `ASOF JOIN` and `LEFT ASOF JOIN`, joining sequences with a non-exact match. `ASOF JOIN` usage is described below.
- `PASTE JOIN`, performs a horizontal concatenation of two tables.
:::note
When [join_algorithm](../../../operations/settings/settings.md#join_algorithm) is set to `partial_merge`, `RIGHT JOIN` and `FULL JOIN` are supported only with `ALL` strictness (`SEMI`, `ANTI`, `ANY`, and `ASOF` are not supported).
@ -269,6 +270,33 @@ For example, consider the following tables:
`ASOF` join is **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine.
:::
## PASTE JOIN Usage
The result of `PASTE JOIN` is a table that contains all columns from left subquery followed by all columns from the right subquery.
The rows are matched based on their positions in the original tables (the order of rows should be defined).
If the subqueries return a different number of rows, extra rows will be cut.
Example:
```SQL
SELECT *
FROM
(
SELECT number AS a
FROM numbers(2)
) AS t1
PASTE JOIN
(
SELECT number AS a
FROM numbers(2)
ORDER BY a DESC
) AS t2
┌─a─┬─t2.a─┐
│ 0 │ 1 │
│ 1 │ 0 │
└───┴──────┘
```
## Distributed JOIN
There are two ways to execute join involving distributed tables:

View File

@ -449,7 +449,7 @@ SYSTEM SYNC FILE CACHE [ON CLUSTER cluster_name]
```
### SYSTEM STOP LISTEN
## SYSTEM STOP LISTEN
Closes the socket and gracefully terminates the existing connections to the server on the specified port with the specified protocol.
@ -464,7 +464,7 @@ SYSTEM STOP LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QU
- If `QUERIES DEFAULT [EXCEPT .. [,..]]` modifier is specified, all default protocols are stopped, unless specified with `EXCEPT` clause.
- If `QUERIES CUSTOM [EXCEPT .. [,..]]` modifier is specified, all custom protocols are stopped, unless specified with `EXCEPT` clause.
### SYSTEM START LISTEN
## SYSTEM START LISTEN
Allows new connections to be established on the specified protocols.
@ -473,3 +473,47 @@ However, if the server on the specified port and protocol was not stopped using
```sql
SYSTEM START LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP WITH PROXY | TCP SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol']
```
## Managing Refreshable Materialized Views {#refreshable-materialized-views}
Commands to control background tasks performed by [Refreshable Materialized Views](../../sql-reference/statements/create/view.md#refreshable-materialized-view)
Keep an eye on [`system.view_refreshes`](../../operations/system-tables/view_refreshes.md) while using them.
### SYSTEM REFRESH VIEW
Trigger an immediate out-of-schedule refresh of a given view.
```sql
SYSTEM REFRESH VIEW [db.]name
```
### SYSTEM STOP VIEW, SYSTEM STOP VIEWS
Disable periodic refreshing of the given view or all refreshable views. If a refresh is in progress, cancel it too.
```sql
SYSTEM STOP VIEW [db.]name
```
```sql
SYSTEM STOP VIEWS
```
### SYSTEM START VIEW, SYSTEM START VIEWS
Enable periodic refreshing for the given view or all refreshable views. No immediate refresh is triggered.
```sql
SYSTEM START VIEW [db.]name
```
```sql
SYSTEM START VIEWS
```
### SYSTEM CANCEL VIEW
If there's a refresh in progress for the given view, interrupt and cancel it. Otherwise do nothing.
```sql
SYSTEM CANCEL VIEW [db.]name
```

View File

@ -1,115 +1,115 @@
---
---
slug: /ru/operations/utilities/clickhouse-format
sidebar_position: 65
sidebar_label: clickhouse-format
---
# clickhouse-format {#clickhouse-format}
Позволяет форматировать входящие запросы.
Ключи:
- `--help` или`-h` — выводит описание ключей.
- `--query` — форматирует запрос любой длины и сложности.
- `--hilite` — добавляет подсветку синтаксиса с экранированием символов.
- `--oneline` — форматирование в одну строку.
- `--quiet` или `-q` — проверяет синтаксис без вывода результата.
- `--multiquery` or `-n` — поддерживает несколько запросов в одной строке.
- `--obfuscate` — обфусцирует вместо форматирования.
- `--seed <строка>` — задает строку, которая определяет результат обфускации.
- `--backslash` — добавляет обратный слеш в конце каждой строки отформатированного запроса. Удобно использовать если многострочный запрос скопирован из интернета или другого источника и его нужно выполнить из командной строки.
## Примеры {#examples}
1. Форматирование запроса:
```bash
$ clickhouse-format --query "select number from numbers(10) where number%2 order by number desc;"
```
Результат:
```text
SELECT number
FROM numbers(10)
WHERE number % 2
ORDER BY number DESC
```
2. Подсветка синтаксиса и форматирование в одну строку:
```bash
$ clickhouse-format --oneline --hilite <<< "SELECT sum(number) FROM numbers(5);"
```
Результат:
```sql
SELECT sum(number) FROM numbers(5)
```
3. Несколько запросов в одной строке:
```bash
$ clickhouse-format -n <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELECT 1 UNION DISTINCT SELECT 3);"
```
Результат:
```text
SELECT *
FROM
(
SELECT 1 AS x
UNION ALL
SELECT 1
UNION DISTINCT
SELECT 3
)
;
```
4. Обфускация:
```bash
$ clickhouse-format --seed Hello --obfuscate <<< "SELECT cost_first_screen BETWEEN a AND b, CASE WHEN x >= 123 THEN y ELSE NULL END;"
```
Результат:
```text
SELECT treasury_mammoth_hazelnut BETWEEN nutmeg AND span, CASE WHEN chive >= 116 THEN switching ELSE ANYTHING END;
```
Тот же запрос с другой инициализацией обфускатора:
```bash
$ clickhouse-format --seed World --obfuscate <<< "SELECT cost_first_screen BETWEEN a AND b, CASE WHEN x >= 123 THEN y ELSE NULL END;"
```
Результат:
```text
SELECT horse_tape_summer BETWEEN folklore AND moccasins, CASE WHEN intestine >= 116 THEN nonconformist ELSE FORESTRY END;
```
5. Добавление обратного слеша:
```bash
$ clickhouse-format --backslash <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELECT 1 UNION DISTINCT SELECT 3);"
```
Результат:
```text
SELECT * \
FROM \
( \
SELECT 1 AS x \
UNION ALL \
SELECT 1 \
UNION DISTINCT \
SELECT 3 \
)
```
sidebar_position: 65
sidebar_label: clickhouse-format
---
# clickhouse-format {#clickhouse-format}
Позволяет форматировать входящие запросы.
Ключи:
- `--help` или`-h` — выводит описание ключей.
- `--query` — форматирует запрос любой длины и сложности.
- `--hilite` — добавляет подсветку синтаксиса с экранированием символов.
- `--oneline` — форматирование в одну строку.
- `--quiet` или `-q` — проверяет синтаксис без вывода результата.
- `--multiquery` or `-n` — поддерживает несколько запросов в одной строке.
- `--obfuscate` — обфусцирует вместо форматирования.
- `--seed <строка>` — задает строку, которая определяет результат обфускации.
- `--backslash` — добавляет обратный слеш в конце каждой строки отформатированного запроса. Удобно использовать если многострочный запрос скопирован из интернета или другого источника и его нужно выполнить из командной строки.
## Примеры {#examples}
1. Форматирование запроса:
```bash
$ clickhouse-format --query "select number from numbers(10) where number%2 order by number desc;"
```
Результат:
```text
SELECT number
FROM numbers(10)
WHERE number % 2
ORDER BY number DESC
```
2. Подсветка синтаксиса и форматирование в одну строку:
```bash
$ clickhouse-format --oneline --hilite <<< "SELECT sum(number) FROM numbers(5);"
```
Результат:
```sql
SELECT sum(number) FROM numbers(5)
```
3. Несколько запросов в одной строке:
```bash
$ clickhouse-format -n <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELECT 1 UNION DISTINCT SELECT 3);"
```
Результат:
```text
SELECT *
FROM
(
SELECT 1 AS x
UNION ALL
SELECT 1
UNION DISTINCT
SELECT 3
)
;
```
4. Обфускация:
```bash
$ clickhouse-format --seed Hello --obfuscate <<< "SELECT cost_first_screen BETWEEN a AND b, CASE WHEN x >= 123 THEN y ELSE NULL END;"
```
Результат:
```text
SELECT treasury_mammoth_hazelnut BETWEEN nutmeg AND span, CASE WHEN chive >= 116 THEN switching ELSE ANYTHING END;
```
Тот же запрос с другой инициализацией обфускатора:
```bash
$ clickhouse-format --seed World --obfuscate <<< "SELECT cost_first_screen BETWEEN a AND b, CASE WHEN x >= 123 THEN y ELSE NULL END;"
```
Результат:
```text
SELECT horse_tape_summer BETWEEN folklore AND moccasins, CASE WHEN intestine >= 116 THEN nonconformist ELSE FORESTRY END;
```
5. Добавление обратного слеша:
```bash
$ clickhouse-format --backslash <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELECT 1 UNION DISTINCT SELECT 3);"
```
Результат:
```text
SELECT * \
FROM \
( \
SELECT 1 AS x \
UNION ALL \
SELECT 1 \
UNION DISTINCT \
SELECT 3 \
)
```

View File

@ -1,62 +1,62 @@
---
slug: /ru/sql-reference/aggregate-functions/reference/sparkbar
sidebar_position: 311
sidebar_label: sparkbar
---
# sparkbar {#sparkbar}
Функция строит гистограмму частот по заданным значениям `x` и частоте повторения этих значений `y` на интервале `[min_x, max_x]`. Повторения для всех `x`, попавших в один бакет, усредняются, поэтому данные должны быть предварительно агрегированы. Отрицательные повторения игнорируются.
Если интервал для построения не указан, то в качестве нижней границы интервала будет взято минимальное значение `x`, а в качестве верхней границы — максимальное значение `x`.
Значения `x` вне указанного интервала игнорируются.
**Синтаксис**
``` sql
sparkbar(width[, min_x, max_x])(x, y)
```
**Параметры**
- `width` — Количество столбцов гистограммы. Тип: [Integer](../../../sql-reference/data-types/int-uint.md).
- `min_x` — Начало интервала. Необязательный параметр.
- `max_x` — Конец интервала. Необязательный параметр.
**Аргументы**
- `x` — Поле со значениями.
- `y` — Поле с частотой повторения значений.
**Возвращаемые значения**
- Гистограмма частот.
**Пример**
Запрос:
``` sql
CREATE TABLE spark_bar_data (`value` Int64, `event_date` Date) ENGINE = MergeTree ORDER BY event_date;
INSERT INTO spark_bar_data VALUES (1,'2020-01-01'), (3,'2020-01-02'), (4,'2020-01-02'), (-3,'2020-01-02'), (5,'2020-01-03'), (2,'2020-01-04'), (3,'2020-01-05'), (7,'2020-01-06'), (6,'2020-01-07'), (8,'2020-01-08'), (2,'2020-01-11');
SELECT sparkbar(9)(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date);
SELECT sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date);
```
Результат:
``` text
┌─sparkbar(9)(event_date, cnt)─┐
│ ▂▅▂▃▆█ ▂ │
└──────────────────────────────┘
┌─sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date, cnt)─┐
│ ▂▅▂▃▇▆█ │
└──────────────────────────────────────────────────────────────────────────┘
```
---
slug: /ru/sql-reference/aggregate-functions/reference/sparkbar
sidebar_position: 311
sidebar_label: sparkbar
---
# sparkbar {#sparkbar}
Функция строит гистограмму частот по заданным значениям `x` и частоте повторения этих значений `y` на интервале `[min_x, max_x]`. Повторения для всех `x`, попавших в один бакет, усредняются, поэтому данные должны быть предварительно агрегированы. Отрицательные повторения игнорируются.
Если интервал для построения не указан, то в качестве нижней границы интервала будет взято минимальное значение `x`, а в качестве верхней границы — максимальное значение `x`.
Значения `x` вне указанного интервала игнорируются.
**Синтаксис**
``` sql
sparkbar(width[, min_x, max_x])(x, y)
```
**Параметры**
- `width` — Количество столбцов гистограммы. Тип: [Integer](../../../sql-reference/data-types/int-uint.md).
- `min_x` — Начало интервала. Необязательный параметр.
- `max_x` — Конец интервала. Необязательный параметр.
**Аргументы**
- `x` — Поле со значениями.
- `y` — Поле с частотой повторения значений.
**Возвращаемые значения**
- Гистограмма частот.
**Пример**
Запрос:
``` sql
CREATE TABLE spark_bar_data (`value` Int64, `event_date` Date) ENGINE = MergeTree ORDER BY event_date;
INSERT INTO spark_bar_data VALUES (1,'2020-01-01'), (3,'2020-01-02'), (4,'2020-01-02'), (-3,'2020-01-02'), (5,'2020-01-03'), (2,'2020-01-04'), (3,'2020-01-05'), (7,'2020-01-06'), (6,'2020-01-07'), (8,'2020-01-08'), (2,'2020-01-11');
SELECT sparkbar(9)(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date);
SELECT sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date);
```
Результат:
``` text
┌─sparkbar(9)(event_date, cnt)─┐
│ ▂▅▂▃▆█ ▂ │
└──────────────────────────────┘
┌─sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date, cnt)─┐
│ ▂▅▂▃▇▆█ │
└──────────────────────────────────────────────────────────────────────────┘
```

File diff suppressed because it is too large Load Diff

View File

@ -1,18 +1,18 @@
---
---
slug: /zh/faq/general/dbms-naming
title: "\u201CClickHouse\u201D 有什么含义?"
toc_hidden: true
sidebar_position: 10
---
# “ClickHouse” 有什么含义? {#what-does-clickhouse-mean}
它是“**点击**流”和“数据**仓库**”的组合。它来自于Yandex最初的用例。在Metrica网站上ClickHouse本应该保存人们在互联网上的所有点击记录现在它仍然在做这项工作。你可以在[ClickHouse history](../../introduction/history.md)页面上阅读更多关于这个用例的信息。
这个由两部分组成的意思有两个结果:
- 唯一正确的写“Click**H**ouse”的方式是用大写H。
- 如果需要缩写,请使用“**CH**”。由于一些历史原因缩写CK在中国也很流行主要是因为中文中最早的一个关于ClickHouse的演讲使用了这种形式。
!!! info “有趣的事实”
多年后ClickHouse闻名于世, 这种命名方法:结合各有深意的两个词被赞扬为最好的数据库命名方式, 卡内基梅隆大学数据库副教授[Andy Pavlo做的研究](https://www.cs.cmu.edu/~pavlo/blog/2020/03/on-naming-a-database-management-system.html) 。ClickHouse与Postgres共同获得“史上最佳数据库名”奖。
title: "\u201CClickHouse\u201D 有什么含义?"
toc_hidden: true
sidebar_position: 10
---
# “ClickHouse” 有什么含义? {#what-does-clickhouse-mean}
它是“**点击**流”和“数据**仓库**”的组合。它来自于Yandex最初的用例。在Metrica网站上ClickHouse本应该保存人们在互联网上的所有点击记录现在它仍然在做这项工作。你可以在[ClickHouse history](../../introduction/history.md)页面上阅读更多关于这个用例的信息。
这个由两部分组成的意思有两个结果:
- 唯一正确的写“Click**H**ouse”的方式是用大写H。
- 如果需要缩写,请使用“**CH**”。由于一些历史原因缩写CK在中国也很流行主要是因为中文中最早的一个关于ClickHouse的演讲使用了这种形式。
!!! info “有趣的事实”
多年后ClickHouse闻名于世, 这种命名方法:结合各有深意的两个词被赞扬为最好的数据库命名方式, 卡内基梅隆大学数据库副教授[Andy Pavlo做的研究](https://www.cs.cmu.edu/~pavlo/blog/2020/03/on-naming-a-database-management-system.html) 。ClickHouse与Postgres共同获得“史上最佳数据库名”奖。

View File

@ -1,18 +1,18 @@
---
---
slug: /zh/faq/general/how-do-i-contribute-code-to-clickhouse
title: 我如何为ClickHouse贡献代码?
toc_hidden: true
sidebar_position: 120
---
# 我如何为ClickHouse贡献代码? {#how-do-i-contribute-code-to-clickhouse}
ClickHouse是一个开源项目[在GitHub上开发](https://github.com/ClickHouse/ClickHouse)。
按照惯例,贡献指南发布在源代码库根目录的 [CONTRIBUTING.md](https://github.com/ClickHouse/ClickHouse/blob/master/CONTRIBUTING.md)文件中。
如果你想对ClickHouse提出实质性的改变建议可以考虑[在GitHub上发布一个问题](https://github.com/ClickHouse/ClickHouse/issues/new/choose),解释一下你想做什么,先与维护人员和社区讨论一下。[此类RFC问题的例子](https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aissue+is%3Aopen+rfc)。
如果您的贡献与安全相关,也请查看[我们的安全政策](https://github.com/ClickHouse/ClickHouse/security/policy/)。
title: 我如何为ClickHouse贡献代码?
toc_hidden: true
sidebar_position: 120
---
# 我如何为ClickHouse贡献代码? {#how-do-i-contribute-code-to-clickhouse}
ClickHouse是一个开源项目[在GitHub上开发](https://github.com/ClickHouse/ClickHouse)。
按照惯例,贡献指南发布在源代码库根目录的 [CONTRIBUTING.md](https://github.com/ClickHouse/ClickHouse/blob/master/CONTRIBUTING.md)文件中。
如果你想对ClickHouse提出实质性的改变建议可以考虑[在GitHub上发布一个问题](https://github.com/ClickHouse/ClickHouse/issues/new/choose),解释一下你想做什么,先与维护人员和社区讨论一下。[此类RFC问题的例子](https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aissue+is%3Aopen+rfc)。
如果您的贡献与安全相关,也请查看[我们的安全政策](https://github.com/ClickHouse/ClickHouse/security/policy/)。

View File

@ -1,22 +1,22 @@
---
slug: /zh/faq/integration/
title: 关于集成ClickHouse和其他系统的问题
toc_hidden_folder: true
sidebar_position: 4
sidebar_label: Integration
---
# 关于集成ClickHouse和其他系统的问题 {#question-about-integrating-clickhouse-and-other-systems}
问题:
- [如何从 ClickHouse 导出数据到一个文件?](../../faq/integration/file-export.md)
- [如何导入JSON到ClickHouse?](../../faq/integration/json-import.md)
- [如果我用ODBC链接Oracle数据库出现编码问题该怎么办?](../../faq/integration/oracle-odbc.md)
!!! info "没看到你要找的东西吗?"
查看[其他faq类别](../../faq/index.md)或浏览左边栏中的主要文档文章。
---
slug: /zh/faq/integration/
title: 关于集成ClickHouse和其他系统的问题
toc_hidden_folder: true
sidebar_position: 4
sidebar_label: Integration
---
# 关于集成ClickHouse和其他系统的问题 {#question-about-integrating-clickhouse-and-other-systems}
问题:
- [如何从 ClickHouse 导出数据到一个文件?](../../faq/integration/file-export.md)
- [如何导入JSON到ClickHouse?](../../faq/integration/json-import.md)
- [如果我用ODBC链接Oracle数据库出现编码问题该怎么办?](../../faq/integration/oracle-odbc.md)
!!! info "没看到你要找的东西吗?"
查看[其他faq类别](../../faq/index.md)或浏览左边栏中的主要文档文章。
{## [原文](https://clickhouse.com/docs/en/faq/integration/) ##}

View File

@ -1,21 +1,21 @@
---
slug: /zh/faq/operations/
title: 关于操作ClickHouse服务器和集群的问题
toc_hidden_folder: true
sidebar_position: 3
sidebar_label: Operations
---
# 关于操作ClickHouse服务器和集群的问题 {#question-about-operating-clickhouse-servers-and-clusters}
问题:
- [如果想在生产环境部署,需要用哪个版本的 ClickHouse 呢?](../../faq/operations/production.md)
- [是否可能从 ClickHouse 数据表中删除所有旧的数据记录?](../../faq/operations/delete-old-data.md)
- [ClickHouse支持多区域复制吗?](../../faq/operations/multi-region-replication.md)
!!! info "没看到你要找的东西吗?"
查看[其他faq类别](../../faq/index.md)或浏览左边栏中的主要文档文章。
{## [原文](https://clickhouse.com/docs/en/faq/production/) ##}
---
slug: /zh/faq/operations/
title: 关于操作ClickHouse服务器和集群的问题
toc_hidden_folder: true
sidebar_position: 3
sidebar_label: Operations
---
# 关于操作ClickHouse服务器和集群的问题 {#question-about-operating-clickhouse-servers-and-clusters}
问题:
- [如果想在生产环境部署,需要用哪个版本的 ClickHouse 呢?](../../faq/operations/production.md)
- [是否可能从 ClickHouse 数据表中删除所有旧的数据记录?](../../faq/operations/delete-old-data.md)
- [ClickHouse支持多区域复制吗?](../../faq/operations/multi-region-replication.md)
!!! info "没看到你要找的东西吗?"
查看[其他faq类别](../../faq/index.md)或浏览左边栏中的主要文档文章。
{## [原文](https://clickhouse.com/docs/en/faq/production/) ##}

View File

@ -1,15 +1,15 @@
---
---
slug: /zh/faq/operations/multi-region-replication
title: ClickHouse支持多区域复制吗?
toc_hidden: true
sidebar_position: 30
---
# ClickHouse支持多区域复制吗? {#does-clickhouse-support-multi-region-replication}
简短的回答是“是的”。然而,我们建议将所有区域/数据中心之间的延迟保持在两位数字范围内,否则,在通过分布式共识协议时,写性能将受到影响。例如,美国海岸之间的复制可能会很好,但美国和欧洲之间就不行。
在配置方面,这与单区域复制没有区别,只是使用位于不同位置的主机作为副本。
更多信息,请参见[关于数据复制的完整文章](../../engines/table-engines/mergetree-family/replication.md)。
title: ClickHouse支持多区域复制吗?
toc_hidden: true
sidebar_position: 30
---
# ClickHouse支持多区域复制吗? {#does-clickhouse-support-multi-region-replication}
简短的回答是“是的”。然而,我们建议将所有区域/数据中心之间的延迟保持在两位数字范围内,否则,在通过分布式共识协议时,写性能将受到影响。例如,美国海岸之间的复制可能会很好,但美国和欧洲之间就不行。
在配置方面,这与单区域复制没有区别,只是使用位于不同位置的主机作为副本。
更多信息,请参见[关于数据复制的完整文章](../../engines/table-engines/mergetree-family/replication.md)。

View File

@ -35,7 +35,6 @@
#include <Common/StudentTTest.h>
#include <Common/CurrentMetrics.h>
#include <Common/ErrorCodes.h>
#include <filesystem>
/** A tool for evaluating ClickHouse performance.

View File

@ -24,6 +24,7 @@
#include <TableFunctions/registerTableFunctions.h>
#include <Storages/StorageFactory.h>
#include <Storages/registerStorages.h>
#include <Storages/MergeTree/MergeTreeSettings.h>
#include <DataTypes/DataTypeFactory.h>
#include <Formats/FormatFactory.h>
#include <Formats/registerFormats.h>
@ -32,6 +33,9 @@
#pragma GCC diagnostic ignored "-Wunused-function"
#pragma GCC diagnostic ignored "-Wmissing-declarations"
extern const char * auto_time_zones[];
namespace DB
{
namespace ErrorCodes
@ -133,9 +137,25 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
auto all_known_storage_names = StorageFactory::instance().getAllRegisteredNames();
auto all_known_data_type_names = DataTypeFactory::instance().getAllRegisteredNames();
auto all_known_settings = Settings().getAllRegisteredNames();
auto all_known_merge_tree_settings = MergeTreeSettings().getAllRegisteredNames();
additional_names.insert(all_known_storage_names.begin(), all_known_storage_names.end());
additional_names.insert(all_known_data_type_names.begin(), all_known_data_type_names.end());
additional_names.insert(all_known_settings.begin(), all_known_settings.end());
additional_names.insert(all_known_merge_tree_settings.begin(), all_known_merge_tree_settings.end());
for (auto * it = auto_time_zones; *it; ++it)
{
String time_zone_name = *it;
/// Example: Europe/Amsterdam
Strings split;
boost::split(split, time_zone_name, [](char c){ return c == '/'; });
for (const auto & word : split)
if (!word.empty())
additional_names.insert(word);
}
KnownIdentifierFunc is_known_identifier = [&](std::string_view name)
{

View File

@ -776,6 +776,7 @@ void LocalServer::processConfig()
global_context->setQueryKindInitial();
global_context->setQueryKind(query_kind);
global_context->setQueryParameters(query_parameters);
}
@ -822,6 +823,7 @@ void LocalServer::printHelpMessage([[maybe_unused]] const OptionsDescription & o
std::cout << getHelpHeader() << "\n";
std::cout << options_description.main_description.value() << "\n";
std::cout << getHelpFooter() << "\n";
std::cout << "In addition, --param_name=value can be specified for substitution of parameters for parametrized queries.\n";
#endif
}
@ -898,7 +900,31 @@ void LocalServer::readArguments(int argc, char ** argv, Arguments & common_argum
for (int arg_num = 1; arg_num < argc; ++arg_num)
{
std::string_view arg = argv[arg_num];
if (arg == "--multiquery" && (arg_num + 1) < argc && !std::string_view(argv[arg_num + 1]).starts_with('-'))
/// Parameter arg after underline.
if (arg.starts_with("--param_"))
{
auto param_continuation = arg.substr(strlen("--param_"));
auto equal_pos = param_continuation.find_first_of('=');
if (equal_pos == std::string::npos)
{
/// param_name value
++arg_num;
if (arg_num >= argc)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter requires value");
arg = argv[arg_num];
query_parameters.emplace(String(param_continuation), String(arg));
}
else
{
if (equal_pos == 0)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter name cannot be empty");
/// param_name=value
query_parameters.emplace(param_continuation.substr(0, equal_pos), param_continuation.substr(equal_pos + 1));
}
}
else if (arg == "--multiquery" && (arg_num + 1) < argc && !std::string_view(argv[arg_num + 1]).starts_with('-'))
{
/// Transform the abbreviated syntax '--multiquery <SQL>' into the full syntax '--multiquery -q <SQL>'
++arg_num;

267
programs/server/binary.html Normal file
View File

@ -0,0 +1,267 @@
<!doctype html>
<html>
<head>
<meta charset="utf-8">
<link rel="icon" href="data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI1NCIgaGVpZ2h0PSI0OCIgdmlld0JveD0iMCAwIDkgOCI+PHN0eWxlPi5ve2ZpbGw6I2ZjMH0ucntmaWxsOnJlZH08L3N0eWxlPjxwYXRoIGQ9Ik0wLDcgaDEgdjEgaC0xIHoiIGNsYXNzPSJyIi8+PHBhdGggZD0iTTAsMCBoMSB2NyBoLTEgeiIgY2xhc3M9Im8iLz48cGF0aCBkPSJNMiwwIGgxIHY4IGgtMSB6IiBjbGFzcz0ibyIvPjxwYXRoIGQ9Ik00LDAgaDEgdjggaC0xIHoiIGNsYXNzPSJvIi8+PHBhdGggZD0iTTYsMCBoMSB2OCBoLTEgeiIgY2xhc3M9Im8iLz48cGF0aCBkPSJNOCwzLjI1IGgxIHYxLjUgaC0xIHoiIGNsYXNzPSJvIi8+PC9zdmc+">
<title>ClickHouse Binary Viewer</title>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/leaflet/1.9.4/leaflet.min.css" />
<style type="text/css">
html, body, #space {
width: 100%;
height: 100%;
padding: 0;
margin: 0;
}
#space {
background: #111;
image-rendering: pixelated;
}
#error {
display: none;
position: absolute;
z-index: 1001;
bottom: max(5%, 1em);
left: 50%;
transform: translate(-50%, 0);
background: #300;
color: white;
font-family: monospace;
white-space: pre-wrap;
font-size: 16pt;
padding: 1em;
min-width: 50%;
max-width: 80%;
}
.leaflet-fade-anim .leaflet-popup {
transition: none;
font-family: monospace;
}
.leaflet-control-attribution {
font-size: 12pt;
}
</style>
</head>
<body>
<div id="space"></div>
<div id="error"></div>
<script src="https://cdnjs.cloudflare.com/ajax/libs/leaflet/1.9.4/leaflet.min.js"></script>
<script>
let host = 'http://localhost:8123/';
let user = 'default';
let password = '';
let add_http_cors_header = true;
/// If it is hosted on server, assume that it is the address of ClickHouse.
if (location.protocol != 'file:') {
host = location.origin;
user = 'default';
add_http_cors_header = false;
}
let map = L.map('space', {
crs: L.CRS.Simple,
center: [-512, 512],
maxBounds: [[128, -128], [-1152, 1152]],
zoom: 0,
});
let cached_tiles = {};
async function render(coords, tile) {
const sql = `
WITH
bitShiftLeft(1::UInt64, 5 - {z:UInt8})::UInt64 AS zoom_factor,
number MOD 1024 AS tile_x,
number DIV 1024 AS tile_y,
(zoom_factor * (tile_x + {x:UInt16} * 1024))::UInt16 AS x,
(zoom_factor * (tile_y + {y:UInt16} * 1024))::UInt16 AS y,
mortonEncode(x, y) AS addr,
extract(demangle(addressToSymbol(addr)), '^[^<]+') AS name,
(empty(name) ? 0 : sipHash64(name)) AS hash,
hash MOD 256 AS r, hash DIV 256 MOD 256 AS g, hash DIV 65536 MOD 256 AS b
SELECT r::UInt8, g::UInt8, b::UInt8
FROM numbers_mt(1024*1024)
ORDER BY number`;
const key = `${coords.z}-${coords.x}-${coords.y}`;
let buf = cached_tiles[key];
if (!buf) {
let url = `${host}?default_format=RowBinary&allow_introspection_functions=1`;
if (add_http_cors_header) {
// For debug purposes, you may set add_http_cors_header from a browser console
url += '&add_http_cors_header=1';
}
if (user) {
url += `&user=${encodeURIComponent(user)}`;
}
if (password) {
url += `&password=${encodeURIComponent(password)}`;
}
url += `&param_z=${coords.z}&param_x=${coords.x}&param_y=${coords.y}`;
url += `&enable_http_compression=1&network_compression_method=zstd&network_zstd_compression_level=6`;
const response = await fetch(url, { method: 'POST', body: sql });
if (!response.ok) {
const text = await response.text();
let err = document.getElementById('error');
err.textContent = text;
err.style.display = 'block';
return;
}
buf = await response.arrayBuffer();
cached_tiles[key] = buf;
}
let ctx = tile.getContext('2d');
let image = ctx.createImageData(1024, 1024);
let arr = new Uint8ClampedArray(buf);
for (let i = 0; i < 1024 * 1024; ++i) {
image.data[i * 4 + 0] = arr[i * 3 + 0];
image.data[i * 4 + 1] = arr[i * 3 + 1];
image.data[i * 4 + 2] = arr[i * 3 + 2];
image.data[i * 4 + 3] = 255;
}
ctx.putImageData(image, 0, 0, 0, 0, 1024, 1024);
let err = document.getElementById('error');
err.style.display = 'none';
}
L.GridLayer.ClickHouse = L.GridLayer.extend({
createTile: function(coords, done) {
let tile = L.DomUtil.create('canvas', 'leaflet-tile');
tile.width = 1024;
tile.height = 1024;
if (coords.x < 0 || coords.y < 0 || coords.x >= Math.pow(2, coords.z) || coords.y >= Math.pow(2, coords.z)) return tile;
render(coords, tile).then(err => done(err, tile));
return tile;
}
});
let layer = new L.GridLayer.ClickHouse({
tileSize: 1024,
minZoom: 0,
maxZoom: 10,
minNativeZoom: 0,
maxNativeZoom: 5,
attribution: '© ClickHouse, Inc.'
});
layer.addTo(map);
map.attributionControl.setPrefix('<a href="https://github.com/ClickHouse/ClickHouse/">About</a>');
function latLngToPixel(latlng) {
return { x: ((latlng.lng / 1024) * 32768)|0, y: ((-latlng.lat / 1024) * 32768)|0 };
}
function pixelToLatLng(pixel) {
return { lat: (-pixel.y - 0.5) / 32768 * 1024, lng: (pixel.x + 0.5) / 32768 * 1024 };
}
let popup = L.popup({maxWidth: '100%'});
let current_requested_addr = '';
function updateHistory() {
const state = {
zoom: map.getZoom(),
center: latLngToPixel(map.getCenter()),
};
let query = `?zoom=${state.zoom}&x=${state.center.x}&y=${state.center.y}`;
if (popup.isOpen() && map.getBounds().contains(popup.getLatLng())) {
state.popup = latLngToPixel(popup.getLatLng());
query += `&px=${state.popup.x}&py=${state.popup.y}`;
}
history.replaceState(state, '', query);
}
window.onpopstate = function(event) {
const state = event.state;
if (!state) {
return;
}
map.setView(pixelToLatLng(state.center), state.zoom);
if (state.popup) {
showPopup(state.popup.x, state.popup.y);
}
};
if (window.location.search) {
const params = new URLSearchParams(window.location.search);
map.setView(pixelToLatLng({x: params.get('x')|0, y: params.get('y')|0}), params.get('zoom'));
if (params.get('px') !== null && params.get('py') !== null) {
showPopup(params.get('px')|0, params.get('py')|0);
}
}
function showPopup(x, y) {
const xn = BigInt(x);
const yn = BigInt(y);
let addr_int = 0n;
for (let bit = 0n; bit < 16n; ++bit) {
addr_int |= ((xn >> bit) & 1n) << (bit * 2n);
addr_int |= ((yn >> bit) & 1n) << (1n + bit * 2n);
}
current_requested_addr = addr_int;
const addr_hex = '0x' + addr_int.toString(16);
const response = fetch(
`http://localhost:8123/?default_format=JSON`,
{
method: 'POST',
body: `SELECT encodeXMLComponent(demangle(addressToSymbol(${addr_int}::UInt64))) AS name,
encodeXMLComponent(addressToLine(${addr_int}::UInt64)) AS line`
}).then(response => response.json().then(o => {
let name = o.rows ? o.data[0].name : 'nothing';
let line = o.rows ? o.data[0].line : '';
if (addr_int == current_requested_addr) {
popup.setContent(`<p><b>${addr_hex}</b></p><p>${name}</p><p>${line}</p>`);
}
}));
popup
.setLatLng(pixelToLatLng({x: x, y: y}))
.setContent(addr_hex)
.openOn(map);
}
map.on('click', e => {
const {x, y} = latLngToPixel(e.latlng);
if (x < 0 || x >= 32768 || y < 0 || y >= 32768) return;
showPopup(x, y);
updateHistory();
});
map.on('moveend', e => updateHistory());
</script>
</body>
</html>

View File

@ -965,12 +965,10 @@ document.getElementById('mass-editor-textarea').addEventListener('input', e => {
function legendAsTooltipPlugin({ className, style = { background: "var(--legend-background)" } } = {}) {
let legendEl;
let showTop = false;
const showLimit = 5;
let multiline;
function init(u, opts) {
legendEl = u.root.querySelector(".u-legend");
legendEl.classList.remove("u-inline");
className && legendEl.classList.add(className);
@ -986,18 +984,19 @@ function legendAsTooltipPlugin({ className, style = { background: "var(--legend-
...style
});
const nodes = legendEl.querySelectorAll("th");
for (let i = 0; i < nodes.length; i++)
nodes[i]._order = i;
if (opts.series.length == 2) {
const nodes = legendEl.querySelectorAll("th");
multiline = false;
for (let i = 0; i < nodes.length; i++)
nodes[i].style.display = "none";
} else {
multiline = true;
legendEl.querySelector("th").remove();
legendEl.querySelector("td").setAttribute('colspan', '2');
legendEl.querySelector("td").style.textAlign = 'center';
}
if (opts.series.length - 1 > showLimit) {
showTop = true;
let footer = legendEl.insertRow().insertCell();
footer.setAttribute('colspan', '2');
footer.style.textAlign = 'center';
@ -1024,18 +1023,20 @@ function legendAsTooltipPlugin({ className, style = { background: "var(--legend-
left -= legendEl.clientWidth / 2;
top -= legendEl.clientHeight / 2;
legendEl.style.transform = "translate(" + left + "px, " + top + "px)";
if (showTop) {
if (multiline) {
let nodes = nodeListToArray(legendEl.querySelectorAll("tr"));
let header = nodes.shift();
let footer = nodes.pop();
nodes.forEach(function (node) { node._sort_key = +node.querySelector("td").textContent; });
nodes.sort((a, b) => +b._sort_key - +a._sort_key);
let showLimit = Math.floor(u.height / 30);
nodes.forEach(function (node) { node._sort_key = nodes.length > showLimit ? +node.querySelector("td").textContent.replace(/,/g,'') : node._order; });
nodes.sort((a, b) => b._sort_key - a._sort_key);
nodes.forEach(function (node) { node.parentNode.appendChild(node); });
for (let i = 0; i < nodes.length; i++) {
nodes[i].style.display = i < showLimit ? null : "none";
delete nodes[i]._sort_key;
}
footer.parentNode.appendChild(footer);
footer.style.display = nodes.length > showLimit ? null : "none";
}
}

View File

@ -82,7 +82,8 @@ enum class AccessType
\
M(ALTER_VIEW_REFRESH, "ALTER LIVE VIEW REFRESH, REFRESH VIEW", VIEW, ALTER_VIEW) \
M(ALTER_VIEW_MODIFY_QUERY, "ALTER TABLE MODIFY QUERY", VIEW, ALTER_VIEW) \
M(ALTER_VIEW, "", GROUP, ALTER) /* allows to execute ALTER VIEW REFRESH, ALTER VIEW MODIFY QUERY;
M(ALTER_VIEW_MODIFY_REFRESH, "ALTER TABLE MODIFY QUERY", VIEW, ALTER_VIEW) \
M(ALTER_VIEW, "", GROUP, ALTER) /* allows to execute ALTER VIEW REFRESH, ALTER VIEW MODIFY QUERY, ALTER VIEW MODIFY REFRESH;
implicitly enabled by the grant ALTER_TABLE */\
\
M(ALTER, "", GROUP, ALL) /* allows to execute ALTER {TABLE|LIVE VIEW} */\
@ -177,6 +178,7 @@ enum class AccessType
M(SYSTEM_MOVES, "SYSTEM STOP MOVES, SYSTEM START MOVES, STOP MOVES, START MOVES", TABLE, SYSTEM) \
M(SYSTEM_PULLING_REPLICATION_LOG, "SYSTEM STOP PULLING REPLICATION LOG, SYSTEM START PULLING REPLICATION LOG", TABLE, SYSTEM) \
M(SYSTEM_CLEANUP, "SYSTEM STOP CLEANUP, SYSTEM START CLEANUP", TABLE, SYSTEM) \
M(SYSTEM_VIEWS, "SYSTEM REFRESH VIEW, SYSTEM START VIEWS, SYSTEM STOP VIEWS, SYSTEM START VIEW, SYSTEM STOP VIEW, SYSTEM CANCEL VIEW, REFRESH VIEW, START VIEWS, STOP VIEWS, START VIEW, STOP VIEW, CANCEL VIEW", VIEW, SYSTEM) \
M(SYSTEM_DISTRIBUTED_SENDS, "SYSTEM STOP DISTRIBUTED SENDS, SYSTEM START DISTRIBUTED SENDS, STOP DISTRIBUTED SENDS, START DISTRIBUTED SENDS", TABLE, SYSTEM_SENDS) \
M(SYSTEM_REPLICATED_SENDS, "SYSTEM STOP REPLICATED SENDS, SYSTEM START REPLICATED SENDS, STOP REPLICATED SENDS, START REPLICATED SENDS", TABLE, SYSTEM_SENDS) \
M(SYSTEM_SENDS, "SYSTEM STOP SENDS, SYSTEM START SENDS, STOP SENDS, START SENDS", GROUP, SYSTEM) \

View File

@ -51,7 +51,7 @@ TEST(AccessRights, Union)
"CREATE DICTIONARY, DROP DATABASE, DROP TABLE, DROP VIEW, DROP DICTIONARY, UNDROP TABLE, "
"TRUNCATE, OPTIMIZE, BACKUP, CREATE ROW POLICY, ALTER ROW POLICY, DROP ROW POLICY, "
"SHOW ROW POLICIES, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, "
"SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM CLEANUP, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, "
"SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM CLEANUP, SYSTEM VIEWS, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, "
"SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, "
"SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*, GRANT NAMED COLLECTION ADMIN ON db1");
}

View File

@ -51,10 +51,10 @@ void AggregateFunctionFactory::registerFunction(const String & name, Value creat
void AggregateFunctionFactory::registerNullsActionTransformation(const String & source_ignores_nulls, const String & target_respect_nulls)
{
if (!aggregate_functions.contains(source_ignores_nulls))
throw Exception(ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Source aggregation '{}' not found");
throw Exception(ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Source aggregation '{}' not found", source_ignores_nulls);
if (!aggregate_functions.contains(target_respect_nulls))
throw Exception(ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Target aggregation '{}' not found");
throw Exception(ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Target aggregation '{}' not found", target_respect_nulls);
if (!respect_nulls.emplace(source_ignores_nulls, target_respect_nulls).second)
throw Exception(

View File

@ -20,6 +20,7 @@
#include <Common/ArenaAllocator.h>
#include <Common/assert_cast.h>
#include <Common/thread_local_rng.h>
#include <AggregateFunctions/IAggregateFunction.h>

View File

@ -1,7 +1,7 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/HelpersMinMaxAny.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <AggregateFunctions/HelpersMinMaxAny.h>
#include <AggregateFunctions/findNumeric.h>
namespace DB
{
@ -10,10 +10,122 @@ struct Settings;
namespace
{
template <typename Data>
class AggregateFunctionsSingleValueMax final : public AggregateFunctionsSingleValue<Data>
{
using Parent = AggregateFunctionsSingleValue<Data>;
public:
explicit AggregateFunctionsSingleValueMax(const DataTypePtr & type) : Parent(type) { }
/// Specializations for native numeric types
ALWAYS_INLINE inline void addBatchSinglePlace(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
Arena * arena,
ssize_t if_argument_pos) const override;
ALWAYS_INLINE inline void addBatchSinglePlaceNotNull(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
const UInt8 * __restrict null_map,
Arena * arena,
ssize_t if_argument_pos) const override;
};
// NOLINTBEGIN(bugprone-macro-parentheses)
#define SPECIALIZE(TYPE) \
template <> \
void AggregateFunctionsSingleValueMax<typename DB::AggregateFunctionMaxData<SingleValueDataFixed<TYPE>>>::addBatchSinglePlace( \
size_t row_begin, \
size_t row_end, \
AggregateDataPtr __restrict place, \
const IColumn ** __restrict columns, \
Arena *, \
ssize_t if_argument_pos) const \
{ \
const auto & column = assert_cast<const DB::AggregateFunctionMaxData<SingleValueDataFixed<TYPE>>::ColVecType &>(*columns[0]); \
std::optional<TYPE> opt; \
if (if_argument_pos >= 0) \
{ \
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData(); \
opt = findNumericMaxIf(column.getData().data(), flags.data(), row_begin, row_end); \
} \
else \
opt = findNumericMax(column.getData().data(), row_begin, row_end); \
if (opt.has_value()) \
this->data(place).changeIfGreater(opt.value()); \
}
// NOLINTEND(bugprone-macro-parentheses)
FOR_BASIC_NUMERIC_TYPES(SPECIALIZE)
#undef SPECIALIZE
template <typename Data>
void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlace(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
Arena * arena,
ssize_t if_argument_pos) const
{
return Parent::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos);
}
// NOLINTBEGIN(bugprone-macro-parentheses)
#define SPECIALIZE(TYPE) \
template <> \
void AggregateFunctionsSingleValueMax<typename DB::AggregateFunctionMaxData<SingleValueDataFixed<TYPE>>>::addBatchSinglePlaceNotNull( \
size_t row_begin, \
size_t row_end, \
AggregateDataPtr __restrict place, \
const IColumn ** __restrict columns, \
const UInt8 * __restrict null_map, \
Arena *, \
ssize_t if_argument_pos) const \
{ \
const auto & column = assert_cast<const DB::AggregateFunctionMaxData<SingleValueDataFixed<TYPE>>::ColVecType &>(*columns[0]); \
std::optional<TYPE> opt; \
if (if_argument_pos >= 0) \
{ \
const auto * if_flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData().data(); \
auto final_flags = std::make_unique<UInt8[]>(row_end); \
for (size_t i = row_begin; i < row_end; ++i) \
final_flags[i] = (!null_map[i]) & !!if_flags[i]; \
opt = findNumericMaxIf(column.getData().data(), final_flags.get(), row_begin, row_end); \
} \
else \
opt = findNumericMaxNotNull(column.getData().data(), null_map, row_begin, row_end); \
if (opt.has_value()) \
this->data(place).changeIfGreater(opt.value()); \
}
// NOLINTEND(bugprone-macro-parentheses)
FOR_BASIC_NUMERIC_TYPES(SPECIALIZE)
#undef SPECIALIZE
template <typename Data>
void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlaceNotNull(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
const UInt8 * __restrict null_map,
Arena * arena,
ssize_t if_argument_pos) const
{
return Parent::addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, if_argument_pos);
}
AggregateFunctionPtr createAggregateFunctionMax(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionMaxData>(name, argument_types, parameters, settings));
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValueMax, AggregateFunctionMaxData>(name, argument_types, parameters, settings));
}
AggregateFunctionPtr createAggregateFunctionArgMax(

View File

@ -1,6 +1,7 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/HelpersMinMaxAny.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <AggregateFunctions/HelpersMinMaxAny.h>
#include <AggregateFunctions/findNumeric.h>
namespace DB
@ -10,10 +11,123 @@ struct Settings;
namespace
{
template <typename Data>
class AggregateFunctionsSingleValueMin final : public AggregateFunctionsSingleValue<Data>
{
using Parent = AggregateFunctionsSingleValue<Data>;
public:
explicit AggregateFunctionsSingleValueMin(const DataTypePtr & type) : Parent(type) { }
/// Specializations for native numeric types
ALWAYS_INLINE inline void addBatchSinglePlace(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
Arena * arena,
ssize_t if_argument_pos) const override;
ALWAYS_INLINE inline void addBatchSinglePlaceNotNull(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
const UInt8 * __restrict null_map,
Arena * arena,
ssize_t if_argument_pos) const override;
};
// NOLINTBEGIN(bugprone-macro-parentheses)
#define SPECIALIZE(TYPE) \
template <> \
void AggregateFunctionsSingleValueMin<typename DB::AggregateFunctionMinData<SingleValueDataFixed<TYPE>>>::addBatchSinglePlace( \
size_t row_begin, \
size_t row_end, \
AggregateDataPtr __restrict place, \
const IColumn ** __restrict columns, \
Arena *, \
ssize_t if_argument_pos) const \
{ \
const auto & column = assert_cast<const DB::AggregateFunctionMinData<SingleValueDataFixed<TYPE>>::ColVecType &>(*columns[0]); \
std::optional<TYPE> opt; \
if (if_argument_pos >= 0) \
{ \
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData(); \
opt = findNumericMinIf(column.getData().data(), flags.data(), row_begin, row_end); \
} \
else \
opt = findNumericMin(column.getData().data(), row_begin, row_end); \
if (opt.has_value()) \
this->data(place).changeIfLess(opt.value()); \
}
// NOLINTEND(bugprone-macro-parentheses)
FOR_BASIC_NUMERIC_TYPES(SPECIALIZE)
#undef SPECIALIZE
template <typename Data>
void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlace(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
Arena * arena,
ssize_t if_argument_pos) const
{
return Parent::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos);
}
// NOLINTBEGIN(bugprone-macro-parentheses)
#define SPECIALIZE(TYPE) \
template <> \
void AggregateFunctionsSingleValueMin<typename DB::AggregateFunctionMinData<SingleValueDataFixed<TYPE>>>::addBatchSinglePlaceNotNull( \
size_t row_begin, \
size_t row_end, \
AggregateDataPtr __restrict place, \
const IColumn ** __restrict columns, \
const UInt8 * __restrict null_map, \
Arena *, \
ssize_t if_argument_pos) const \
{ \
const auto & column = assert_cast<const DB::AggregateFunctionMinData<SingleValueDataFixed<TYPE>>::ColVecType &>(*columns[0]); \
std::optional<TYPE> opt; \
if (if_argument_pos >= 0) \
{ \
const auto * if_flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData().data(); \
auto final_flags = std::make_unique<UInt8[]>(row_end); \
for (size_t i = row_begin; i < row_end; ++i) \
final_flags[i] = (!null_map[i]) & !!if_flags[i]; \
opt = findNumericMinIf(column.getData().data(), final_flags.get(), row_begin, row_end); \
} \
else \
opt = findNumericMinNotNull(column.getData().data(), null_map, row_begin, row_end); \
if (opt.has_value()) \
this->data(place).changeIfLess(opt.value()); \
}
// NOLINTEND(bugprone-macro-parentheses)
FOR_BASIC_NUMERIC_TYPES(SPECIALIZE)
#undef SPECIALIZE
template <typename Data>
void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlaceNotNull(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
const UInt8 * __restrict null_map,
Arena * arena,
ssize_t if_argument_pos) const
{
return Parent::addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, if_argument_pos);
}
AggregateFunctionPtr createAggregateFunctionMin(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionMinData>(name, argument_types, parameters, settings));
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValueMin, AggregateFunctionMinData>(
name, argument_types, parameters, settings));
}
AggregateFunctionPtr createAggregateFunctionArgMin(

View File

@ -43,14 +43,12 @@ namespace ErrorCodes
template <typename T>
struct SingleValueDataFixed
{
private:
using Self = SingleValueDataFixed;
using ColVecType = ColumnVectorOrDecimal<T>;
bool has_value = false; /// We need to remember if at least one value has been passed. This is necessary for AggregateFunctionIf.
T value = T{};
public:
static constexpr bool result_is_nullable = false;
static constexpr bool should_skip_null_arguments = true;
static constexpr bool is_any = false;
@ -157,6 +155,15 @@ public:
return false;
}
void changeIfLess(T from)
{
if (!has() || from < value)
{
has_value = true;
value = from;
}
}
bool changeIfGreater(const IColumn & column, size_t row_num, Arena * arena)
{
if (!has() || assert_cast<const ColVecType &>(column).getData()[row_num] > value)
@ -179,6 +186,15 @@ public:
return false;
}
void changeIfGreater(T & from)
{
if (!has() || from > value)
{
has_value = true;
value = from;
}
}
bool isEqualTo(const Self & to) const
{
return has() && to.value == value;
@ -448,7 +464,6 @@ public:
}
#endif
};
struct Compatibility
@ -1214,7 +1229,7 @@ struct AggregateFunctionAnyHeavyData : Data
template <typename Data>
class AggregateFunctionsSingleValue final : public IAggregateFunctionDataHelper<Data, AggregateFunctionsSingleValue<Data>>
class AggregateFunctionsSingleValue : public IAggregateFunctionDataHelper<Data, AggregateFunctionsSingleValue<Data>>
{
static constexpr bool is_any = Data::is_any;
@ -1230,8 +1245,11 @@ public:
|| StringRef(Data::name()) == StringRef("max"))
{
if (!type->isComparable())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of aggregate function {} "
"because the values of that data type are not comparable", type->getName(), getName());
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument of aggregate function {} because the values of that data type are not comparable",
type->getName(),
Data::name());
}
}

View File

@ -378,6 +378,7 @@ AggregateFunctionPtr createAggregateFunctionSparkbar(const std::string & name, c
void registerAggregateFunctionSparkbar(AggregateFunctionFactory & factory)
{
factory.registerFunction("sparkbar", createAggregateFunctionSparkbar);
factory.registerAlias("sparkBar", "sparkbar");
}
}

View File

@ -504,7 +504,7 @@ public:
const auto * if_flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData().data();
auto final_flags = std::make_unique<UInt8[]>(row_end);
for (size_t i = row_begin; i < row_end; ++i)
final_flags[i] = (!null_map[i]) & if_flags[i];
final_flags[i] = (!null_map[i]) & !!if_flags[i];
this->data(place).addManyConditional(column.getData().data(), final_flags.get(), row_begin, row_end);
}

View File

@ -197,7 +197,7 @@ public:
virtual void insertMergeResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const
{
if (isState())
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} is marked as State but method insertMergeResultInto is not implemented");
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} is marked as State but method insertMergeResultInto is not implemented", getName());
insertResultInto(place, to, arena);
}

View File

@ -1,6 +1,7 @@
#include <AggregateFunctions/QuantileTDigest.h>
#include <IO/WriteBufferFromString.h>
#include <IO/ReadBufferFromString.h>
#include <iostream>
int main(int, char **)
{

View File

@ -0,0 +1,15 @@
#include <AggregateFunctions/findNumeric.h>
namespace DB
{
#define INSTANTIATION(T) \
template std::optional<T> findNumericMin(const T * __restrict ptr, size_t start, size_t end); \
template std::optional<T> findNumericMinNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
template std::optional<T> findNumericMinIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
template std::optional<T> findNumericMax(const T * __restrict ptr, size_t start, size_t end); \
template std::optional<T> findNumericMaxNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
template std::optional<T> findNumericMaxIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end);
FOR_BASIC_NUMERIC_TYPES(INSTANTIATION)
#undef INSTANTIATION
}

View File

@ -0,0 +1,154 @@
#pragma once
#include <DataTypes/IDataType.h>
#include <base/defines.h>
#include <base/types.h>
#include <Common/Concepts.h>
#include <Common/TargetSpecific.h>
#include <algorithm>
#include <optional>
namespace DB
{
template <typename T>
concept is_any_native_number = (is_any_of<T, Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64>);
template <is_any_native_number T>
struct MinComparator
{
static ALWAYS_INLINE inline const T & cmp(const T & a, const T & b) { return std::min(a, b); }
};
template <is_any_native_number T>
struct MaxComparator
{
static ALWAYS_INLINE inline const T & cmp(const T & a, const T & b) { return std::max(a, b); }
};
MULTITARGET_FUNCTION_AVX2_SSE42(
MULTITARGET_FUNCTION_HEADER(template <is_any_native_number T, typename ComparatorClass, bool add_all_elements, bool add_if_cond_zero> static std::optional<T> NO_INLINE),
findNumericExtremeImpl,
MULTITARGET_FUNCTION_BODY((const T * __restrict ptr, const UInt8 * __restrict condition_map [[maybe_unused]], size_t row_begin, size_t row_end)
{
size_t count = row_end - row_begin;
ptr += row_begin;
if constexpr (!add_all_elements)
condition_map += row_begin;
T ret{};
size_t i = 0;
for (; i < count; i++)
{
if (add_all_elements || !condition_map[i] == add_if_cond_zero)
{
ret = ptr[i];
break;
}
}
if (i >= count)
return std::nullopt;
/// Unroll the loop manually for floating point, since the compiler doesn't do it without fastmath
/// as it might change the return value
if constexpr (std::is_floating_point_v<T>)
{
constexpr size_t unroll_block = 512 / sizeof(T); /// Chosen via benchmarks with AVX2 so YMMV
size_t unrolled_end = i + (((count - i) / unroll_block) * unroll_block);
if (i < unrolled_end)
{
T partial_min[unroll_block];
for (size_t unroll_it = 0; unroll_it < unroll_block; unroll_it++)
partial_min[unroll_it] = ret;
while (i < unrolled_end)
{
for (size_t unroll_it = 0; unroll_it < unroll_block; unroll_it++)
{
if (add_all_elements || !condition_map[i + unroll_it] == add_if_cond_zero)
partial_min[unroll_it] = ComparatorClass::cmp(partial_min[unroll_it], ptr[i + unroll_it]);
}
i += unroll_block;
}
for (size_t unroll_it = 0; unroll_it < unroll_block; unroll_it++)
ret = ComparatorClass::cmp(ret, partial_min[unroll_it]);
}
}
for (; i < count; i++)
{
if (add_all_elements || !condition_map[i] == add_if_cond_zero)
ret = ComparatorClass::cmp(ret, ptr[i]);
}
return ret;
}
))
/// Given a vector of T finds the extreme (MIN or MAX) value
template <is_any_native_number T, class ComparatorClass, bool add_all_elements, bool add_if_cond_zero>
static std::optional<T>
findNumericExtreme(const T * __restrict ptr, const UInt8 * __restrict condition_map [[maybe_unused]], size_t start, size_t end)
{
#if USE_MULTITARGET_CODE
/// We see no benefit from using AVX512BW or AVX512F (over AVX2), so we only declare SSE and AVX2
if (isArchSupported(TargetArch::AVX2))
return findNumericExtremeImplAVX2<T, ComparatorClass, add_all_elements, add_if_cond_zero>(ptr, condition_map, start, end);
if (isArchSupported(TargetArch::SSE42))
return findNumericExtremeImplSSE42<T, ComparatorClass, add_all_elements, add_if_cond_zero>(ptr, condition_map, start, end);
#endif
return findNumericExtremeImpl<T, ComparatorClass, add_all_elements, add_if_cond_zero>(ptr, condition_map, start, end);
}
template <is_any_native_number T>
std::optional<T> findNumericMin(const T * __restrict ptr, size_t start, size_t end)
{
return findNumericExtreme<T, MinComparator<T>, true, false>(ptr, nullptr, start, end);
}
template <is_any_native_number T>
std::optional<T> findNumericMinNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end)
{
return findNumericExtreme<T, MinComparator<T>, false, true>(ptr, condition_map, start, end);
}
template <is_any_native_number T>
std::optional<T> findNumericMinIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end)
{
return findNumericExtreme<T, MinComparator<T>, false, false>(ptr, condition_map, start, end);
}
template <is_any_native_number T>
std::optional<T> findNumericMax(const T * __restrict ptr, size_t start, size_t end)
{
return findNumericExtreme<T, MaxComparator<T>, true, false>(ptr, nullptr, start, end);
}
template <is_any_native_number T>
std::optional<T> findNumericMaxNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end)
{
return findNumericExtreme<T, MaxComparator<T>, false, true>(ptr, condition_map, start, end);
}
template <is_any_native_number T>
std::optional<T> findNumericMaxIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end)
{
return findNumericExtreme<T, MaxComparator<T>, false, false>(ptr, condition_map, start, end);
}
#define EXTERN_INSTANTIATION(T) \
extern template std::optional<T> findNumericMin(const T * __restrict ptr, size_t start, size_t end); \
extern template std::optional<T> findNumericMinNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
extern template std::optional<T> findNumericMinIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
extern template std::optional<T> findNumericMax(const T * __restrict ptr, size_t start, size_t end); \
extern template std::optional<T> findNumericMaxNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
extern template std::optional<T> findNumericMaxIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end);
FOR_BASIC_NUMERIC_TYPES(EXTERN_INSTANTIATION)
#undef EXTERN_INSTANTIATION
}

View File

@ -536,7 +536,8 @@ CNF CNF::toCNF(const QueryTreeNodePtr & node, ContextPtr context, size_t max_gro
if (!cnf)
throw Exception(ErrorCodes::TOO_MANY_TEMPORARY_COLUMNS,
"Cannot convert expression '{}' to CNF, because it produces to many clauses."
"Size of boolean formula in CNF can be exponential of size of source formula.");
"Size of boolean formula in CNF can be exponential of size of source formula.",
node->formatConvertedASTForErrorMessage());
return *cnf;
}

View File

@ -4,6 +4,7 @@
#include <Analyzer/FunctionNode.h>
#include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/Utils.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeEnum.h>
@ -41,22 +42,6 @@ DataTypePtr getEnumType(const std::set<std::string> & string_values)
return getDataEnumType<DataTypeEnum8>(string_values);
}
QueryTreeNodePtr createCastFunction(QueryTreeNodePtr from, DataTypePtr result_type, ContextPtr context)
{
auto enum_literal = std::make_shared<ConstantValue>(result_type->getName(), std::make_shared<DataTypeString>());
auto enum_literal_node = std::make_shared<ConstantNode>(std::move(enum_literal));
auto cast_function = FunctionFactory::instance().get("_CAST", std::move(context));
QueryTreeNodes arguments{ std::move(from), std::move(enum_literal_node) };
auto function_node = std::make_shared<FunctionNode>("_CAST");
function_node->getArguments().getNodes() = std::move(arguments);
function_node->resolveAsFunction(cast_function->build(function_node->getArgumentColumns()));
return function_node;
}
/// if(arg1, arg2, arg3) will be transformed to if(arg1, _CAST(arg2, Enum...), _CAST(arg3, Enum...))
/// where Enum is generated based on the possible values stored in string_values
void changeIfArguments(

View File

@ -9,6 +9,8 @@
#include <Analyzer/HashUtils.h>
#include <Analyzer/Utils.h>
#include <DataTypes/DataTypeLowCardinality.h>
namespace DB
{
@ -248,13 +250,13 @@ public:
if (function_node->getFunctionName() == "and")
{
tryReplaceAndEqualsChainsWithConstant(node);
tryOptimizeAndEqualsNotEqualsChain(node);
return;
}
}
private:
void tryReplaceAndEqualsChainsWithConstant(QueryTreeNodePtr & node)
void tryOptimizeAndEqualsNotEqualsChain(QueryTreeNodePtr & node)
{
auto & function_node = node->as<FunctionNode &>();
assert(function_node.getFunctionName() == "and");
@ -264,53 +266,132 @@ private:
QueryTreeNodes and_operands;
QueryTreeNodePtrWithHashMap<const ConstantNode *> node_to_constants;
QueryTreeNodePtrWithHashMap<const ConstantNode *> equals_node_to_constants;
QueryTreeNodePtrWithHashMap<QueryTreeNodeConstRawPtrWithHashSet> not_equals_node_to_constants;
QueryTreeNodePtrWithHashMap<QueryTreeNodes> node_to_not_equals_functions;
for (const auto & argument : function_node.getArguments())
{
auto * argument_function = argument->as<FunctionNode>();
if (!argument_function || argument_function->getFunctionName() != "equals")
const auto valid_functions = std::unordered_set<std::string>{"equals", "notEquals"};
if (!argument_function || !valid_functions.contains(argument_function->getFunctionName()))
{
and_operands.push_back(argument);
continue;
}
const auto & equals_arguments = argument_function->getArguments().getNodes();
const auto & lhs = equals_arguments[0];
const auto & rhs = equals_arguments[1];
const auto function_name = argument_function->getFunctionName();
const auto & function_arguments = argument_function->getArguments().getNodes();
const auto & lhs = function_arguments[0];
const auto & rhs = function_arguments[1];
const auto has_and_with_different_constant = [&](const QueryTreeNodePtr & expression, const ConstantNode * constant)
if (function_name == "equals")
{
if (auto it = node_to_constants.find(expression); it != node_to_constants.end())
const auto has_and_with_different_constant = [&](const QueryTreeNodePtr & expression, const ConstantNode * constant)
{
if (!it->second->isEqual(*constant))
return true;
if (auto it = equals_node_to_constants.find(expression); it != equals_node_to_constants.end())
{
if (!it->second->isEqual(*constant))
return true;
}
else
{
equals_node_to_constants.emplace(expression, constant);
and_operands.push_back(argument);
}
return false;
};
bool collapse_to_false = false;
if (const auto * lhs_literal = lhs->as<ConstantNode>())
collapse_to_false = has_and_with_different_constant(rhs, lhs_literal);
else if (const auto * rhs_literal = rhs->as<ConstantNode>())
collapse_to_false = has_and_with_different_constant(lhs, rhs_literal);
else
and_operands.push_back(argument);
if (collapse_to_false)
{
auto false_value = std::make_shared<ConstantValue>(0u, function_node.getResultType());
auto false_node = std::make_shared<ConstantNode>(std::move(false_value));
node = std::move(false_node);
return;
}
}
else if (function_name == "notEquals")
{
/// collect all inequality checks (x <> value)
const auto add_not_equals_function_if_not_present = [&](const auto & expression_node, const ConstantNode * constant)
{
auto & constant_set = not_equals_node_to_constants[expression_node];
if (!constant_set.contains(constant))
{
constant_set.insert(constant);
node_to_not_equals_functions[expression_node].push_back(argument);
}
};
if (const auto * lhs_literal = lhs->as<ConstantNode>();
lhs_literal && !lhs_literal->getValue().isNull())
add_not_equals_function_if_not_present(rhs, lhs_literal);
else if (const auto * rhs_literal = rhs->as<ConstantNode>();
rhs_literal && !rhs_literal->getValue().isNull())
add_not_equals_function_if_not_present(lhs, rhs_literal);
else
and_operands.push_back(argument);
}
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function name: '{}'", function_name);
}
auto not_in_function_resolver = FunctionFactory::instance().get("notIn", getContext());
for (auto & [expression, not_equals_functions] : node_to_not_equals_functions)
{
const auto & settings = getSettings();
if (not_equals_functions.size() < settings.optimize_min_inequality_conjunction_chain_length && !expression.node->getResultType()->lowCardinality())
{
std::move(not_equals_functions.begin(), not_equals_functions.end(), std::back_inserter(and_operands));
continue;
}
Tuple args;
args.reserve(not_equals_functions.size());
/// first we create tuple from RHS of notEquals functions
for (const auto & not_equals : not_equals_functions)
{
const auto * not_equals_function = not_equals->as<FunctionNode>();
assert(not_equals_function && not_equals_function->getFunctionName() == "notEquals");
const auto & not_equals_arguments = not_equals_function->getArguments().getNodes();
if (const auto * rhs_literal = not_equals_arguments[1]->as<ConstantNode>())
{
args.push_back(rhs_literal->getValue());
}
else
{
node_to_constants.emplace(expression, constant);
and_operands.push_back(argument);
const auto * lhs_literal = not_equals_arguments[0]->as<ConstantNode>();
assert(lhs_literal);
args.push_back(lhs_literal->getValue());
}
return false;
};
bool collapse_to_false = false;
if (const auto * lhs_literal = lhs->as<ConstantNode>())
collapse_to_false = has_and_with_different_constant(rhs, lhs_literal);
else if (const auto * rhs_literal = rhs->as<ConstantNode>())
collapse_to_false = has_and_with_different_constant(lhs, rhs_literal);
else
and_operands.push_back(argument);
if (collapse_to_false)
{
auto false_value = std::make_shared<ConstantValue>(0u, function_node.getResultType());
auto false_node = std::make_shared<ConstantNode>(std::move(false_value));
node = std::move(false_node);
return;
}
auto rhs_node = std::make_shared<ConstantNode>(std::move(args));
auto not_in_function = std::make_shared<FunctionNode>("notIn");
QueryTreeNodes not_in_arguments;
not_in_arguments.reserve(2);
not_in_arguments.push_back(expression.node);
not_in_arguments.push_back(std::move(rhs_node));
not_in_function->getArguments().getNodes() = std::move(not_in_arguments);
not_in_function->resolveAsFunction(not_in_function_resolver);
and_operands.push_back(std::move(not_in_function));
}
if (and_operands.size() == function_node.getArguments().getNodes().size())
@ -320,11 +401,21 @@ private:
{
/// AND operator can have UInt8 or bool as its type.
/// bool is used if a bool constant is at least one operand.
/// Because we reduce the number of operands here by eliminating the same equality checks,
/// the only situation we can end up here is we had AND check where all the equality checks are the same so we know the type is UInt8.
/// Otherwise, we will have > 1 operands and we don't have to do anything.
assert(!function_node.getResultType()->isNullable() && and_operands[0]->getResultType()->equals(*function_node.getResultType()));
node = std::move(and_operands[0]);
auto operand_type = and_operands[0]->getResultType();
auto function_type = function_node.getResultType();
assert(!function_type->isNullable());
if (!function_type->equals(*operand_type))
{
/// Result of equality operator can be low cardinality, while AND always returns UInt8.
/// In that case we replace `(lc = 1) AND (lc = 1)` with `(lc = 1) AS UInt8`
assert(function_type->equals(*removeLowCardinality(operand_type)));
node = createCastFunction(std::move(and_operands[0]), function_type, getContext());
}
else
{
node = std::move(and_operands[0]);
}
return;
}
@ -389,11 +480,14 @@ private:
continue;
}
bool is_any_nullable = false;
Tuple args;
args.reserve(equals_functions.size());
/// first we create tuple from RHS of equals functions
for (const auto & equals : equals_functions)
{
is_any_nullable |= equals->getResultType()->isNullable();
const auto * equals_function = equals->as<FunctionNode>();
assert(equals_function && equals_function->getFunctionName() == "equals");
@ -421,8 +515,20 @@ private:
in_function->getArguments().getNodes() = std::move(in_arguments);
in_function->resolveAsFunction(in_function_resolver);
or_operands.push_back(std::move(in_function));
/** For `k :: UInt8`, expression `k = 1 OR k = NULL` with result type Nullable(UInt8)
* is replaced with `k IN (1, NULL)` with result type UInt8.
* Convert it back to Nullable(UInt8).
*/
if (is_any_nullable && !in_function->getResultType()->isNullable())
{
auto nullable_result_type = std::make_shared<DataTypeNullable>(in_function->getResultType());
auto in_function_nullable = createCastFunction(std::move(in_function), std::move(nullable_result_type), getContext());
or_operands.push_back(std::move(in_function_nullable));
}
else
{
or_operands.push_back(std::move(in_function));
}
}
if (or_operands.size() == function_node.getArguments().getNodes().size())

View File

@ -68,7 +68,25 @@ namespace DB
* WHERE a = 1 AND b = 'test';
* -------------------------------
*
* 5. Remove unnecessary IS NULL checks in JOIN ON clause
* 5. Replaces chains of inequality functions inside an AND with a single NOT IN operator.
* The replacement is done if:
* - one of the operands of the inequality function is a constant
* - length of chain is at least 'optimize_min_inequality_conjunction_chain_length' long OR the expression has type of LowCardinality
*
* E.g. (optimize_min_inequality_conjunction_chain_length = 2)
* -------------------------------
* SELECT *
* FROM table
* WHERE a <> 1 AND a <> 2;
*
* will be transformed into
*
* SELECT *
* FROM TABLE
* WHERE a NOT IN (1, 2);
* -------------------------------
*
* 6. Remove unnecessary IS NULL checks in JOIN ON clause
* - equality check with explicit IS NULL check replaced with <=> operator
* -------------------------------
* SELECT * FROM t1 JOIN t2 ON a = b OR (a IS NULL AND b IS NULL)
@ -85,7 +103,11 @@ class LogicalExpressionOptimizerPass final : public IQueryTreePass
public:
String getName() override { return "LogicalExpressionOptimizer"; }
String getDescription() override { return "Transform equality chain to a single IN function or a constant if possible"; }
String getDescription() override
{
return "Transforms chains of logical expressions if possible, i.e. "
"replace chains of equality functions inside an OR with a single IN operator";
}
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
};

View File

@ -667,4 +667,20 @@ NameSet collectIdentifiersFullNames(const QueryTreeNodePtr & node)
return out;
}
QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_type, ContextPtr context)
{
auto enum_literal = std::make_shared<ConstantValue>(result_type->getName(), std::make_shared<DataTypeString>());
auto enum_literal_node = std::make_shared<ConstantNode>(std::move(enum_literal));
auto cast_function = FunctionFactory::instance().get("_CAST", std::move(context));
QueryTreeNodes arguments{ std::move(node), std::move(enum_literal_node) };
auto function_node = std::make_shared<FunctionNode>("_CAST");
function_node->getArguments().getNodes() = std::move(arguments);
function_node->resolveAsFunction(cast_function->build(function_node->getArgumentColumns()));
return function_node;
}
}

View File

@ -99,4 +99,7 @@ void rerunFunctionResolve(FunctionNode * function_node, ContextPtr context);
/// Just collect all identifiers from query tree
NameSet collectIdentifiersFullNames(const QueryTreeNodePtr & node);
/// Wrap node into `_CAST` function
QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_type, ContextPtr context);
}

View File

@ -88,18 +88,19 @@ BackupEntriesCollector::BackupEntriesCollector(
, read_settings(read_settings_)
, context(context_)
, on_cluster_first_sync_timeout(context->getConfigRef().getUInt64("backups.on_cluster_first_sync_timeout", 180000))
, collect_metadata_timeout(context->getConfigRef().getUInt64("backups.collect_metadata_timeout", context->getConfigRef().getUInt64("backups.consistent_metadata_snapshot_timeout", 600000)))
, collect_metadata_timeout(context->getConfigRef().getUInt64(
"backups.collect_metadata_timeout", context->getConfigRef().getUInt64("backups.consistent_metadata_snapshot_timeout", 600000)))
, attempts_to_collect_metadata_before_sleep(context->getConfigRef().getUInt("backups.attempts_to_collect_metadata_before_sleep", 2))
, min_sleep_before_next_attempt_to_collect_metadata(context->getConfigRef().getUInt64("backups.min_sleep_before_next_attempt_to_collect_metadata", 100))
, max_sleep_before_next_attempt_to_collect_metadata(context->getConfigRef().getUInt64("backups.max_sleep_before_next_attempt_to_collect_metadata", 5000))
, min_sleep_before_next_attempt_to_collect_metadata(
context->getConfigRef().getUInt64("backups.min_sleep_before_next_attempt_to_collect_metadata", 100))
, max_sleep_before_next_attempt_to_collect_metadata(
context->getConfigRef().getUInt64("backups.max_sleep_before_next_attempt_to_collect_metadata", 5000))
, compare_collected_metadata(context->getConfigRef().getBool("backups.compare_collected_metadata", true))
, log(&Poco::Logger::get("BackupEntriesCollector"))
, global_zookeeper_retries_info(
"BackupEntriesCollector",
log,
context->getSettingsRef().backup_restore_keeper_max_retries,
context->getSettingsRef().backup_restore_keeper_retry_initial_backoff_ms,
context->getSettingsRef().backup_restore_keeper_retry_max_backoff_ms)
context->getSettingsRef().backup_restore_keeper_max_retries,
context->getSettingsRef().backup_restore_keeper_retry_initial_backoff_ms,
context->getSettingsRef().backup_restore_keeper_retry_max_backoff_ms)
, threadpool(threadpool_)
{
}
@ -572,7 +573,7 @@ std::vector<std::pair<ASTPtr, StoragePtr>> BackupEntriesCollector::findTablesInD
{
/// Database or table could be replicated - so may use ZooKeeper. We need to retry.
auto zookeeper_retries_info = global_zookeeper_retries_info;
ZooKeeperRetriesControl retries_ctl("getTablesForBackup", zookeeper_retries_info, nullptr);
ZooKeeperRetriesControl retries_ctl("getTablesForBackup", log, zookeeper_retries_info, nullptr);
retries_ctl.retryLoop([&](){ db_tables = database->getTablesForBackup(filter_by_table_name, context); });
}
catch (Exception & e)

View File

@ -68,7 +68,6 @@ namespace
client_configuration.connectTimeoutMs = 10 * 1000;
/// Requests in backups can be extremely long, set to one hour
client_configuration.requestTimeoutMs = 60 * 60 * 1000;
client_configuration.retryStrategy = std::make_shared<Aws::Client::DefaultRetryStrategy>(request_settings.retry_attempts);
return S3::ClientFactory::instance().create(
client_configuration,
@ -146,7 +145,7 @@ UInt64 BackupReaderS3::getFileSize(const String & file_name)
{
auto objects = listObjects(*client, s3_uri, file_name);
if (objects.empty())
throw Exception(ErrorCodes::S3_ERROR, "Object {} must exist");
throw Exception(ErrorCodes::S3_ERROR, "Object {} must exist", file_name);
return objects[0].GetSize();
}
@ -299,7 +298,7 @@ UInt64 BackupWriterS3::getFileSize(const String & file_name)
{
auto objects = listObjects(*client, s3_uri, file_name);
if (objects.empty())
throw Exception(ErrorCodes::S3_ERROR, "Object {} must exist");
throw Exception(ErrorCodes::S3_ERROR, "Object {} must exist", file_name);
return objects[0].GetSize();
}

View File

@ -157,11 +157,16 @@ BackupImpl::~BackupImpl()
void BackupImpl::open()
{
std::lock_guard lock{mutex};
LOG_INFO(log, "{} backup: {}", ((open_mode == OpenMode::WRITE) ? "Writing" : "Reading"), backup_name_for_logging);
ProfileEvents::increment((open_mode == OpenMode::WRITE) ? ProfileEvents::BackupsOpenedForWrite : ProfileEvents::BackupsOpenedForRead);
if (open_mode == OpenMode::WRITE)
if (open_mode == OpenMode::READ)
{
ProfileEvents::increment(ProfileEvents::BackupsOpenedForRead);
LOG_INFO(log, "Reading backup: {}", backup_name_for_logging);
}
else
{
ProfileEvents::increment(ProfileEvents::BackupsOpenedForWrite);
LOG_INFO(log, "Writing backup: {}", backup_name_for_logging);
timestamp = std::time(nullptr);
if (!uuid)
uuid = UUIDHelpers::generateV4();
@ -189,7 +194,7 @@ void BackupImpl::open()
void BackupImpl::close()
{
std::lock_guard lock{mutex};
closeArchive();
closeArchive(/* finalize= */ false);
if (!is_internal_backup && writer && !writing_finalized)
removeAllFilesAfterFailure();
@ -222,8 +227,11 @@ void BackupImpl::openArchive()
}
}
void BackupImpl::closeArchive()
void BackupImpl::closeArchive(bool finalize)
{
if (finalize && archive_writer)
archive_writer->finalize();
archive_reader.reset();
archive_writer.reset();
}
@ -978,7 +986,7 @@ void BackupImpl::finalizeWriting()
{
LOG_TRACE(log, "Finalizing backup {}", backup_name_for_logging);
writeBackupMetadata();
closeArchive();
closeArchive(/* finalize= */ true);
setCompressedSize();
removeLockFile();
LOG_TRACE(log, "Finalized backup {}", backup_name_for_logging);

View File

@ -89,7 +89,7 @@ private:
void close();
void openArchive();
void closeArchive();
void closeArchive(bool finalize);
/// Writes the file ".backup" containing backup's metadata.
void writeBackupMetadata() TSA_REQUIRES(mutex);

View File

@ -78,13 +78,16 @@ BackupInfo BackupInfo::fromAST(const IAST & ast)
}
}
res.args.reserve(list->children.size() - index);
for (; index < list->children.size(); ++index)
size_t args_size = list->children.size();
res.args.reserve(args_size - index);
for (; index < args_size; ++index)
{
const auto & elem = list->children[index];
const auto * lit = elem->as<const ASTLiteral>();
if (!lit)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected literal, got {}", serializeAST(*elem));
}
res.args.push_back(lit->value);
}
}

View File

@ -17,6 +17,9 @@ struct BackupOperationInfo
/// Operation name, a string like "Disk('backups', 'my_backup')"
String name;
/// Base Backup Operation name, a string like "Disk('backups', 'my_base_backup')"
String base_backup_name;
/// This operation is internal and should not be shown in system.backups
bool internal = false;

View File

@ -394,9 +394,13 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
auto backup_info = BackupInfo::fromAST(*backup_query->backup_name);
String backup_name_for_logging = backup_info.toStringForLogging();
String base_backup_name;
if (backup_settings.base_backup_info)
base_backup_name = backup_settings.base_backup_info->toString();
try
{
addInfo(backup_id, backup_name_for_logging, backup_settings.internal, BackupStatus::CREATING_BACKUP);
addInfo(backup_id, backup_name_for_logging, base_backup_name, backup_settings.internal, BackupStatus::CREATING_BACKUP);
/// Prepare context to use.
ContextPtr context_in_use = context;
@ -606,7 +610,6 @@ void BackupsWorker::doBackup(
void BackupsWorker::buildFileInfosForBackupEntries(const BackupPtr & backup, const BackupEntries & backup_entries, const ReadSettings & read_settings, std::shared_ptr<IBackupCoordination> backup_coordination)
{
LOG_TRACE(log, "{}", Stage::BUILDING_FILE_INFOS);
backup_coordination->setStage(Stage::BUILDING_FILE_INFOS, "");
backup_coordination->waitForStage(Stage::BUILDING_FILE_INFOS);
backup_coordination->addFileInfos(::DB::buildFileInfosForBackupEntries(backup_entries, backup->getBaseBackup(), read_settings, getThreadPool(ThreadPoolId::BACKUP_MAKE_FILES_LIST)));
@ -745,8 +748,11 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt
{
auto backup_info = BackupInfo::fromAST(*restore_query->backup_name);
String backup_name_for_logging = backup_info.toStringForLogging();
String base_backup_name;
if (restore_settings.base_backup_info)
base_backup_name = restore_settings.base_backup_info->toString();
addInfo(restore_id, backup_name_for_logging, restore_settings.internal, BackupStatus::RESTORING);
addInfo(restore_id, backup_name_for_logging, base_backup_name, restore_settings.internal, BackupStatus::RESTORING);
/// Prepare context to use.
ContextMutablePtr context_in_use = context;
@ -1005,11 +1011,12 @@ void BackupsWorker::restoreTablesData(const OperationID & restore_id, BackupPtr
}
void BackupsWorker::addInfo(const OperationID & id, const String & name, bool internal, BackupStatus status)
void BackupsWorker::addInfo(const OperationID & id, const String & name, const String & base_backup_name, bool internal, BackupStatus status)
{
BackupOperationInfo info;
info.id = id;
info.name = name;
info.base_backup_name = base_backup_name;
info.internal = internal;
info.status = status;
info.start_time = std::chrono::system_clock::now();

View File

@ -83,7 +83,7 @@ private:
/// Run data restoring tasks which insert data to tables.
void restoreTablesData(const BackupOperationID & restore_id, BackupPtr backup, DataRestoreTasks && tasks, ThreadPool & thread_pool);
void addInfo(const BackupOperationID & id, const String & name, bool internal, BackupStatus status);
void addInfo(const BackupOperationID & id, const String & name, const String & base_backup_name, bool internal, BackupStatus status);
void setStatus(const BackupOperationID & id, BackupStatus status, bool throw_if_error = true);
void setStatusSafe(const String & id, BackupStatus status) { setStatus(id, status, false); }
void setNumFilesAndSize(const BackupOperationID & id, size_t num_files, UInt64 total_size, size_t num_entries,

View File

@ -43,14 +43,6 @@ namespace Stage = BackupCoordinationStage;
namespace
{
/// Uppercases the first character of a passed string.
String toUpperFirst(const String & str)
{
String res = str;
res[0] = std::toupper(res[0]);
return res;
}
/// Outputs "table <name>" or "temporary table <name>"
String tableNameWithTypeToString(const String & database_name, const String & table_name, bool first_upper)
{
@ -145,7 +137,7 @@ RestorerFromBackup::DataRestoreTasks RestorerFromBackup::run(Mode mode)
void RestorerFromBackup::setStage(const String & new_stage, const String & message)
{
LOG_TRACE(log, fmt::runtime(toUpperFirst(new_stage)));
LOG_TRACE(log, "Setting stage: {}", new_stage);
current_stage = new_stage;
if (restore_coordination)

View File

@ -20,22 +20,19 @@ WithRetries::KeeperSettings WithRetries::KeeperSettings::fromContext(ContextPtr
};
}
WithRetries::WithRetries(Poco::Logger * log_, zkutil::GetZooKeeper get_zookeeper_, const KeeperSettings & settings_, RenewerCallback callback_)
WithRetries::WithRetries(
Poco::Logger * log_, zkutil::GetZooKeeper get_zookeeper_, const KeeperSettings & settings_, RenewerCallback callback_)
: log(log_)
, get_zookeeper(get_zookeeper_)
, settings(settings_)
, callback(callback_)
, global_zookeeper_retries_info(
log->name(),
log,
settings.keeper_max_retries,
settings.keeper_retry_initial_backoff_ms,
settings.keeper_retry_max_backoff_ms)
settings.keeper_max_retries, settings.keeper_retry_initial_backoff_ms, settings.keeper_retry_max_backoff_ms)
{}
WithRetries::RetriesControlHolder::RetriesControlHolder(const WithRetries * parent, const String & name)
: info(parent->global_zookeeper_retries_info)
, retries_ctl(name, info, nullptr)
, retries_ctl(name, parent->log, info, nullptr)
, faulty_zookeeper(parent->getFaultyZooKeeper())
{}

View File

@ -226,6 +226,7 @@ add_object_library(clickhouse_storages_statistics Storages/Statistics)
add_object_library(clickhouse_storages_liveview Storages/LiveView)
add_object_library(clickhouse_storages_windowview Storages/WindowView)
add_object_library(clickhouse_storages_s3queue Storages/S3Queue)
add_object_library(clickhouse_storages_materializedview Storages/MaterializedView)
add_object_library(clickhouse_client Client)
add_object_library(clickhouse_bridge BridgeHelper)
add_object_library(clickhouse_server Server)

View File

@ -201,7 +201,7 @@ void LocalConnection::sendQuery(
catch (...)
{
state->io.onException();
state->exception = std::make_unique<Exception>(ErrorCodes::UNKNOWN_EXCEPTION, "Unknown exception");
state->exception = std::make_unique<Exception>(Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Unknown exception"));
}
}
@ -311,7 +311,7 @@ bool LocalConnection::poll(size_t)
catch (...)
{
state->io.onException();
state->exception = std::make_unique<Exception>(ErrorCodes::UNKNOWN_EXCEPTION, "Unknown exception");
state->exception = std::make_unique<Exception>(Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Unknown exception"));
}
}

View File

@ -48,7 +48,7 @@ Suggest::Suggest()
"GRANT", "REVOKE", "OPTION", "ADMIN", "EXCEPT", "REPLACE", "IDENTIFIED", "HOST",
"NAME", "READONLY", "WRITABLE", "PERMISSIVE", "FOR", "RESTRICTIVE", "RANDOMIZED", "INTERVAL",
"LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE", "CLEANUP", "APPEND",
"IGNORE NULLS", "RESPECT NULLS", "OVER"});
"IGNORE NULLS", "RESPECT NULLS", "OVER", "PASTE"});
}
static String getLoadSuggestionQuery(Int32 suggestion_limit, bool basic_suggestion)

View File

@ -1,4 +1,5 @@
#include <Columns/ColumnCompressed.h>
#include <Common/formatReadable.h>
#pragma clang diagnostic ignored "-Wold-style-cast"

View File

@ -1,4 +1,5 @@
#include <Common/Arena.h>
#include <Core/Field.h>
#include <Columns/IColumnDummy.h>
#include <Columns/ColumnsCommon.h>

View File

@ -4,6 +4,7 @@
#include <vector>
#include <Columns/ColumnsNumber.h>
#include <Common/randomSeed.h>
#include <Common/thread_local_rng.h>
#include <gtest/gtest.h>
using namespace DB;

View File

@ -1,9 +1,190 @@
#include "Allocator.h"
#include <Common/Allocator.h>
#include <Common/Exception.h>
#include <Common/logger_useful.h>
#include <Common/formatReadable.h>
#include <Common/CurrentMemoryTracker.h>
#include <base/errnoToString.h>
#include <base/getPageSize.h>
#include <Poco/Logger.h>
#include <sys/mman.h> /// MADV_POPULATE_WRITE
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_ALLOCATE_MEMORY;
extern const int LOGICAL_ERROR;
}
}
namespace
{
using namespace DB;
#if defined(MADV_POPULATE_WRITE)
/// Address passed to madvise is required to be aligned to the page boundary.
auto adjustToPageSize(void * buf, size_t len, size_t page_size)
{
const uintptr_t address_numeric = reinterpret_cast<uintptr_t>(buf);
const size_t next_page_start = ((address_numeric + page_size - 1) / page_size) * page_size;
return std::make_pair(reinterpret_cast<void *>(next_page_start), len - (next_page_start - address_numeric));
}
#endif
void prefaultPages([[maybe_unused]] void * buf_, [[maybe_unused]] size_t len_)
{
#if defined(MADV_POPULATE_WRITE)
if (len_ < POPULATE_THRESHOLD)
return;
static const size_t page_size = ::getPageSize();
if (len_ < page_size) /// Rounded address should be still within [buf, buf + len).
return;
auto [buf, len] = adjustToPageSize(buf_, len_, page_size);
if (auto res = ::madvise(buf, len, MADV_POPULATE_WRITE); res < 0)
LOG_TRACE(
LogFrequencyLimiter(&Poco::Logger::get("Allocator"), 1),
"Attempt to populate pages failed: {} (EINVAL is expected for kernels < 5.14)",
errnoToString(res));
#endif
}
template <bool clear_memory, bool populate>
void * allocNoTrack(size_t size, size_t alignment)
{
void * buf;
if (alignment <= MALLOC_MIN_ALIGNMENT)
{
if constexpr (clear_memory)
buf = ::calloc(size, 1);
else
buf = ::malloc(size);
if (nullptr == buf)
throw DB::ErrnoException(DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Allocator: Cannot malloc {}.", ReadableSize(size));
}
else
{
buf = nullptr;
int res = posix_memalign(&buf, alignment, size);
if (0 != res)
throw DB::ErrnoException(
DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Cannot allocate memory (posix_memalign) {}.", ReadableSize(size));
if constexpr (clear_memory)
memset(buf, 0, size);
}
if constexpr (populate)
prefaultPages(buf, size);
return buf;
}
void freeNoTrack(void * buf)
{
::free(buf);
}
void checkSize(size_t size)
{
/// More obvious exception in case of possible overflow (instead of just "Cannot mmap").
if (size >= 0x8000000000000000ULL)
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Too large size ({}) passed to allocator. It indicates an error.", size);
}
}
/// Constant is chosen almost arbitrarily, what I observed is 128KB is too small, 1MB is almost indistinguishable from 64MB and 1GB is too large.
extern const size_t POPULATE_THRESHOLD = 16 * 1024 * 1024;
template <bool clear_memory_, bool populate>
void * Allocator<clear_memory_, populate>::alloc(size_t size, size_t alignment)
{
checkSize(size);
auto trace = CurrentMemoryTracker::alloc(size);
void * ptr = allocNoTrack<clear_memory_, populate>(size, alignment);
trace.onAlloc(ptr, size);
return ptr;
}
template <bool clear_memory_, bool populate>
void Allocator<clear_memory_, populate>::free(void * buf, size_t size)
{
try
{
checkSize(size);
freeNoTrack(buf);
auto trace = CurrentMemoryTracker::free(size);
trace.onFree(buf, size);
}
catch (...)
{
DB::tryLogCurrentException("Allocator::free");
throw;
}
}
template <bool clear_memory_, bool populate>
void * Allocator<clear_memory_, populate>::realloc(void * buf, size_t old_size, size_t new_size, size_t alignment)
{
checkSize(new_size);
if (old_size == new_size)
{
/// nothing to do.
/// BTW, it's not possible to change alignment while doing realloc.
}
else if (alignment <= MALLOC_MIN_ALIGNMENT)
{
/// Resize malloc'd memory region with no special alignment requirement.
auto trace_free = CurrentMemoryTracker::free(old_size);
auto trace_alloc = CurrentMemoryTracker::alloc(new_size);
trace_free.onFree(buf, old_size);
void * new_buf = ::realloc(buf, new_size);
if (nullptr == new_buf)
{
throw DB::ErrnoException(
DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY,
"Allocator: Cannot realloc from {} to {}",
ReadableSize(old_size),
ReadableSize(new_size));
}
buf = new_buf;
trace_alloc.onAlloc(buf, new_size);
if constexpr (clear_memory)
if (new_size > old_size)
memset(reinterpret_cast<char *>(buf) + old_size, 0, new_size - old_size);
}
else
{
/// Big allocs that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods.
void * new_buf = alloc(new_size, alignment);
memcpy(new_buf, buf, std::min(old_size, new_size));
free(buf, old_size);
buf = new_buf;
}
if constexpr (populate)
prefaultPages(buf, new_size);
return buf;
}
template class Allocator<false, false>;
template class Allocator<true, false>;
template class Allocator<false, true>;

View File

@ -8,47 +8,19 @@
#define ALLOCATOR_ASLR 1
#endif
#include <pcg_random.hpp>
#include <Common/thread_local_rng.h>
#if !defined(OS_DARWIN) && !defined(OS_FREEBSD)
#include <malloc.h>
#endif
#include <cstdlib>
#include <algorithm>
#include <sys/mman.h>
#include <Core/Defines.h>
#include <base/getPageSize.h>
#include <Common/CurrentMemoryTracker.h>
#include <Common/CurrentMetrics.h>
#include <Common/Exception.h>
#include <Common/formatReadable.h>
#include <Common/Allocator_fwd.h>
#include <base/errnoToString.h>
#include <Poco/Logger.h>
#include <Common/logger_useful.h>
#include <cstdlib>
extern const size_t POPULATE_THRESHOLD;
static constexpr size_t MALLOC_MIN_ALIGNMENT = 8;
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_ALLOCATE_MEMORY;
extern const int LOGICAL_ERROR;
}
}
/** Previously there was a code which tried to use manual mmap and mremap (clickhouse_mremap.h) for large allocations/reallocations (64MB+).
* Most modern allocators (including jemalloc) don't use mremap, so the idea was to take advantage from mremap system call for large reallocs.
* Actually jemalloc had support for mremap, but it was intentionally removed from codebase https://github.com/jemalloc/jemalloc/commit/e2deab7a751c8080c2b2cdcfd7b11887332be1bb.
@ -69,83 +41,16 @@ class Allocator
{
public:
/// Allocate memory range.
void * alloc(size_t size, size_t alignment = 0)
{
checkSize(size);
auto trace = CurrentMemoryTracker::alloc(size);
void * ptr = allocNoTrack(size, alignment);
trace.onAlloc(ptr, size);
return ptr;
}
void * alloc(size_t size, size_t alignment = 0);
/// Free memory range.
void free(void * buf, size_t size)
{
try
{
checkSize(size);
freeNoTrack(buf);
auto trace = CurrentMemoryTracker::free(size);
trace.onFree(buf, size);
}
catch (...)
{
DB::tryLogCurrentException("Allocator::free");
throw;
}
}
void free(void * buf, size_t size);
/** Enlarge memory range.
* Data from old range is moved to the beginning of new range.
* Address of memory range could change.
*/
void * realloc(void * buf, size_t old_size, size_t new_size, size_t alignment = 0)
{
checkSize(new_size);
if (old_size == new_size)
{
/// nothing to do.
/// BTW, it's not possible to change alignment while doing realloc.
}
else if (alignment <= MALLOC_MIN_ALIGNMENT)
{
/// Resize malloc'd memory region with no special alignment requirement.
auto trace_free = CurrentMemoryTracker::free(old_size);
auto trace_alloc = CurrentMemoryTracker::alloc(new_size);
trace_free.onFree(buf, old_size);
void * new_buf = ::realloc(buf, new_size);
if (nullptr == new_buf)
{
throw DB::ErrnoException(
DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY,
"Allocator: Cannot realloc from {} to {}",
ReadableSize(old_size),
ReadableSize(new_size));
}
buf = new_buf;
trace_alloc.onAlloc(buf, new_size);
if constexpr (clear_memory)
if (new_size > old_size)
memset(reinterpret_cast<char *>(buf) + old_size, 0, new_size - old_size);
}
else
{
/// Big allocs that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods.
void * new_buf = alloc(new_size, alignment);
memcpy(new_buf, buf, std::min(old_size, new_size));
free(buf, old_size);
buf = new_buf;
}
if constexpr (populate)
prefaultPages(buf, new_size);
return buf;
}
void * realloc(void * buf, size_t old_size, size_t new_size, size_t alignment = 0);
protected:
static constexpr size_t getStackThreshold()
@ -156,76 +61,6 @@ protected:
static constexpr bool clear_memory = clear_memory_;
private:
void * allocNoTrack(size_t size, size_t alignment)
{
void * buf;
if (alignment <= MALLOC_MIN_ALIGNMENT)
{
if constexpr (clear_memory)
buf = ::calloc(size, 1);
else
buf = ::malloc(size);
if (nullptr == buf)
throw DB::ErrnoException(DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Allocator: Cannot malloc {}.", ReadableSize(size));
}
else
{
buf = nullptr;
int res = posix_memalign(&buf, alignment, size);
if (0 != res)
throw DB::ErrnoException(
DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Cannot allocate memory (posix_memalign) {}.", ReadableSize(size));
if constexpr (clear_memory)
memset(buf, 0, size);
}
if constexpr (populate)
prefaultPages(buf, size);
return buf;
}
void freeNoTrack(void * buf)
{
::free(buf);
}
void checkSize(size_t size)
{
/// More obvious exception in case of possible overflow (instead of just "Cannot mmap").
if (size >= 0x8000000000000000ULL)
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Too large size ({}) passed to allocator. It indicates an error.", size);
}
/// Address passed to madvise is required to be aligned to the page boundary.
auto adjustToPageSize(void * buf, size_t len, size_t page_size)
{
const uintptr_t address_numeric = reinterpret_cast<uintptr_t>(buf);
const size_t next_page_start = ((address_numeric + page_size - 1) / page_size) * page_size;
return std::make_pair(reinterpret_cast<void *>(next_page_start), len - (next_page_start - address_numeric));
}
void prefaultPages([[maybe_unused]] void * buf_, [[maybe_unused]] size_t len_)
{
#if defined(MADV_POPULATE_WRITE)
if (len_ < POPULATE_THRESHOLD)
return;
static const size_t page_size = ::getPageSize();
if (len_ < page_size) /// Rounded address should be still within [buf, buf + len).
return;
auto [buf, len] = adjustToPageSize(buf_, len_, page_size);
if (auto res = ::madvise(buf, len, MADV_POPULATE_WRITE); res < 0)
LOG_TRACE(
LogFrequencyLimiter(&Poco::Logger::get("Allocator"), 1),
"Attempt to populate pages failed: {} (EINVAL is expected for kernels < 5.14)",
errnoToString(res));
#endif
}
};

View File

@ -8,6 +8,7 @@
#include <Common/Allocator.h>
#include <Common/ProfileEvents.h>
#include <Common/memcpySmall.h>
#include <base/getPageSize.h>
#if __has_include(<sanitizer/asan_interface.h>) && defined(ADDRESS_SANITIZER)
# include <sanitizer/asan_interface.h>

View File

@ -1,5 +1,6 @@
#pragma once
#include <mutex>
#include <Core/Defines.h>
#if __has_include(<sanitizer/asan_interface.h>) && defined(ADDRESS_SANITIZER)
# include <sanitizer/asan_interface.h>

View File

@ -19,11 +19,6 @@
#include <Common/randomSeed.h>
#include <Common/formatReadable.h>
/// Required for older Darwin builds, that lack definition of MAP_ANONYMOUS
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
namespace DB
{

View File

@ -1,3 +1,4 @@
#include <Common/formatReadable.h>
#include <Common/AsynchronousMetrics.h>
#include <Common/Exception.h>
#include <Common/setThreadName.h>
@ -8,6 +9,8 @@
#include <IO/MMappedFileCache.h>
#include <IO/ReadHelpers.h>
#include <base/errnoToString.h>
#include <base/getPageSize.h>
#include <sys/resource.h>
#include <chrono>
#include "config.h"
@ -655,6 +658,19 @@ void AsynchronousMetrics::update(TimePoint update_time)
total_memory_tracker.setRSS(rss, free_memory_in_allocator_arenas);
}
}
{
struct rusage rusage{};
if (!getrusage(RUSAGE_SELF, &rusage))
{
new_values["MemoryResidentMax"] = { rusage.ru_maxrss * 1024 /* KiB -> bytes */,
"Maximum amount of physical memory used by the server process, in bytes." };
}
else
{
LOG_ERROR(log, "Cannot obtain resource usage: {}", errnoToString(errno));
}
}
#endif
#if defined(OS_LINUX)

View File

@ -1,5 +1,6 @@
#pragma once
#include <algorithm>
#include <cassert>
#include <concepts>
#include <cstddef>

View File

@ -0,0 +1,144 @@
#include <Common/CalendarTimeInterval.h>
#include <Common/Exception.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
CalendarTimeInterval::CalendarTimeInterval(const CalendarTimeInterval::Intervals & intervals)
{
for (auto [kind, val] : intervals)
{
switch (kind.kind)
{
case IntervalKind::Nanosecond:
case IntervalKind::Microsecond:
case IntervalKind::Millisecond:
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Sub-second intervals are not supported here");
case IntervalKind::Second:
case IntervalKind::Minute:
case IntervalKind::Hour:
case IntervalKind::Day:
case IntervalKind::Week:
seconds += val * kind.toAvgSeconds();
break;
case IntervalKind::Month:
months += val;
break;
case IntervalKind::Quarter:
months += val * 3;
break;
case IntervalKind::Year:
months += val * 12;
break;
}
}
}
CalendarTimeInterval::Intervals CalendarTimeInterval::toIntervals() const
{
Intervals res;
auto greedy = [&](UInt64 x, std::initializer_list<std::pair<IntervalKind, UInt64>> kinds)
{
for (auto [kind, count] : kinds)
{
UInt64 k = x / count;
if (k == 0)
continue;
x -= k * count;
res.emplace_back(kind, k);
}
chassert(x == 0);
};
greedy(months, {{IntervalKind::Year, 12}, {IntervalKind::Month, 1}});
greedy(seconds, {{IntervalKind::Week, 3600*24*7}, {IntervalKind::Day, 3600*24}, {IntervalKind::Hour, 3600}, {IntervalKind::Minute, 60}, {IntervalKind::Second, 1}});
return res;
}
UInt64 CalendarTimeInterval::minSeconds() const
{
return 3600*24 * (months/12 * 365 + months%12 * 28) + seconds;
}
UInt64 CalendarTimeInterval::maxSeconds() const
{
return 3600*24 * (months/12 * 366 + months%12 * 31) + seconds;
}
void CalendarTimeInterval::assertSingleUnit() const
{
if (seconds && months)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Interval shouldn't contain both calendar units and clock units (e.g. months and days)");
}
void CalendarTimeInterval::assertPositive() const
{
if (!seconds && !months)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Interval must be positive");
}
/// Number of whole months between 1970-01-01 and `t`.
static Int64 toAbsoluteMonth(std::chrono::system_clock::time_point t)
{
std::chrono::year_month_day ymd(std::chrono::floor<std::chrono::days>(t));
return (Int64(int(ymd.year())) - 1970) * 12 + Int64(unsigned(ymd.month()) - 1);
}
static std::chrono::sys_seconds startOfAbsoluteMonth(Int64 absolute_month)
{
Int64 year = absolute_month >= 0 ? absolute_month/12 : -((-absolute_month+11)/12);
Int64 month = absolute_month - year*12;
chassert(month >= 0 && month < 12);
std::chrono::year_month_day ymd(
std::chrono::year(int(year + 1970)),
std::chrono::month(unsigned(month + 1)),
std::chrono::day(1));
return std::chrono::sys_days(ymd);
}
std::chrono::sys_seconds CalendarTimeInterval::advance(std::chrono::system_clock::time_point tp) const
{
auto t = std::chrono::sys_seconds(std::chrono::floor<std::chrono::seconds>(tp));
if (months)
{
auto m = toAbsoluteMonth(t);
auto s = t - startOfAbsoluteMonth(m);
t = startOfAbsoluteMonth(m + Int64(months)) + s;
}
return t + std::chrono::seconds(Int64(seconds));
}
std::chrono::sys_seconds CalendarTimeInterval::floor(std::chrono::system_clock::time_point tp) const
{
assertSingleUnit();
assertPositive();
if (months)
return startOfAbsoluteMonth(toAbsoluteMonth(tp) / months * months);
else
{
constexpr std::chrono::seconds epoch(-3600*24*3);
auto t = std::chrono::sys_seconds(std::chrono::floor<std::chrono::seconds>(tp));
/// We want to align with weeks, but 1970-01-01 is a Thursday, so align with 1969-12-29 instead.
return std::chrono::sys_seconds((t.time_since_epoch() - epoch) / seconds * seconds + epoch);
}
}
bool CalendarTimeInterval::operator==(const CalendarTimeInterval & rhs) const
{
return std::tie(months, seconds) == std::tie(rhs.months, rhs.seconds);
}
bool CalendarTimeInterval::operator!=(const CalendarTimeInterval & rhs) const
{
return !(*this == rhs);
}
}

View File

@ -0,0 +1,63 @@
#pragma once
#include <Common/IntervalKind.h>
#include <chrono>
namespace DB
{
/// Represents a duration of calendar time, e.g.:
/// * 2 weeks + 5 minutes + and 21 seconds (aka 605121 seconds),
/// * 1 (calendar) month - not equivalent to any number of seconds!
/// * 3 years + 2 weeks (aka 36 months + 604800 seconds).
///
/// Be careful with calendar arithmetic: it's missing many familiar properties of numbers.
/// E.g. x + y - y is not always equal to x (October 31 + 1 month - 1 month = November 1).
struct CalendarTimeInterval
{
UInt64 seconds = 0;
UInt64 months = 0;
using Intervals = std::vector<std::pair<IntervalKind, UInt64>>;
CalendarTimeInterval() = default;
/// Year, Quarter, Month are converted to months.
/// Week, Day, Hour, Minute, Second are converted to seconds.
/// Millisecond, Microsecond, Nanosecond throw exception.
explicit CalendarTimeInterval(const Intervals & intervals);
/// E.g. for {36 months, 604801 seconds} returns {3 years, 2 weeks, 1 second}.
Intervals toIntervals() const;
/// Approximate shortest and longest duration in seconds. E.g. a month is [28, 31] days.
UInt64 minSeconds() const;
UInt64 maxSeconds() const;
/// Checks that the interval has only months or only seconds, throws otherwise.
void assertSingleUnit() const;
void assertPositive() const;
/// Add this interval to the timestamp. First months, then seconds.
/// Gets weird near month boundaries: October 31 + 1 month = December 1.
std::chrono::sys_seconds advance(std::chrono::system_clock::time_point t) const;
/// Rounds the timestamp down to the nearest timestamp "aligned" with this interval.
/// The interval must satisfy assertSingleUnit() and assertPositive().
/// * For months, rounds to the start of a month whose abosolute index is divisible by `months`.
/// The month index is 0-based starting from January 1970.
/// E.g. if the interval is 1 month, rounds down to the start of the month.
/// * For seconds, rounds to a timestamp x such that (x - December 29 1969 (Monday)) is divisible
/// by this interval.
/// E.g. if the interval is 1 week, rounds down to the start of the week (Monday).
///
/// Guarantees:
/// * advance(floor(x)) > x
/// * floor(advance(floor(x))) = advance(floor(x))
std::chrono::sys_seconds floor(std::chrono::system_clock::time_point t) const;
bool operator==(const CalendarTimeInterval & rhs) const;
bool operator!=(const CalendarTimeInterval & rhs) const;
};
}

View File

@ -31,6 +31,17 @@ public:
using HashMethodContextPtr = std::shared_ptr<HashMethodContext>;
struct LastElementCacheStats
{
UInt64 hits = 0;
UInt64 misses = 0;
void update(size_t num_tries, size_t num_misses)
{
hits += num_tries - num_misses;
misses += num_misses;
}
};
namespace columns_hashing_impl
{
@ -39,14 +50,19 @@ template <typename Value, bool consecutive_keys_optimization_>
struct LastElementCache
{
static constexpr bool consecutive_keys_optimization = consecutive_keys_optimization_;
Value value;
bool empty = true;
bool found = false;
UInt64 misses = 0;
bool check(const Value & value_) { return !empty && value == value_; }
bool check(const Value & value_) const { return value == value_; }
template <typename Key>
bool check(const Key & key) { return !empty && value.first == key; }
bool check(const Key & key) const { return value.first == key; }
bool hasOnlyOneValue() const { return found && misses == 1; }
UInt64 getMisses() const { return misses; }
};
template <typename Data>
@ -166,6 +182,7 @@ public:
return EmplaceResult(!has_null_key);
}
}
auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool);
return emplaceImpl(key_holder, data);
}
@ -183,6 +200,7 @@ public:
return FindResult(data.hasNullKeyData(), 0);
}
}
auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool);
return findKeyImpl(keyHolderGetKey(key_holder), data);
}
@ -194,6 +212,30 @@ public:
return data.hash(keyHolderGetKey(key_holder));
}
ALWAYS_INLINE void resetCache()
{
if constexpr (consecutive_keys_optimization)
{
cache.empty = true;
cache.found = false;
cache.misses = 0;
}
}
ALWAYS_INLINE bool hasOnlyOneValueSinceLastReset() const
{
if constexpr (consecutive_keys_optimization)
return cache.hasOnlyOneValue();
return false;
}
ALWAYS_INLINE UInt64 getCacheMissesSinceLastReset() const
{
if constexpr (consecutive_keys_optimization)
return cache.getMisses();
return 0;
}
ALWAYS_INLINE bool isNullAt(size_t row) const
{
if constexpr (nullable)
@ -225,17 +267,15 @@ protected:
else
cache.value = Value();
}
if constexpr (nullable)
{
if constexpr (nullable)
null_map = &checkAndGetColumn<ColumnNullable>(column)->getNullMapColumn();
}
}
template <typename Data, typename KeyHolder>
ALWAYS_INLINE EmplaceResult emplaceImpl(KeyHolder & key_holder, Data & data)
{
if constexpr (Cache::consecutive_keys_optimization)
if constexpr (consecutive_keys_optimization)
{
if (cache.found && cache.check(keyHolderGetKey(key_holder)))
{
@ -266,6 +306,7 @@ protected:
{
cache.found = true;
cache.empty = false;
++cache.misses;
if constexpr (has_mapped)
{
@ -288,12 +329,12 @@ protected:
template <typename Data, typename Key>
ALWAYS_INLINE FindResult findKeyImpl(Key key, Data & data)
{
if constexpr (Cache::consecutive_keys_optimization)
if constexpr (consecutive_keys_optimization)
{
/// It's possible to support such combination, but code will became more complex.
/// Now there's not place where we need this options enabled together
static_assert(!FindResult::has_offset, "`consecutive_keys_optimization` and `has_offset` are conflicting options");
if (cache.check(key))
if (likely(!cache.empty) && cache.check(key))
{
if constexpr (has_mapped)
return FindResult(&cache.value.second, cache.found, 0);
@ -308,6 +349,7 @@ protected:
{
cache.found = it != nullptr;
cache.empty = false;
++cache.misses;
if constexpr (has_mapped)
{
@ -325,9 +367,8 @@ protected:
size_t offset = 0;
if constexpr (FindResult::has_offset)
{
offset = it ? data.offsetInternal(it) : 0;
}
if constexpr (has_mapped)
return FindResult(it ? &it->getMapped() : nullptr, it != nullptr, offset);
else

View File

@ -253,6 +253,8 @@
M(MergeTreeAllRangesAnnouncementsSent, "The current number of announcement being sent in flight from the remote server to the initiator server about the set of data parts (for MergeTree tables). Measured on the remote server side.") \
M(CreatedTimersInQueryProfiler, "Number of Created thread local timers in QueryProfiler") \
M(ActiveTimersInQueryProfiler, "Number of Active thread local timers in QueryProfiler") \
M(RefreshableViews, "Number materialized views with periodic refreshing (REFRESH)") \
M(RefreshingViews, "Number of materialized views currently executing a refresh") \
#ifdef APPLY_FOR_EXTERNAL_METRICS
#define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) APPLY_FOR_EXTERNAL_METRICS(M)

View File

@ -93,15 +93,6 @@ public:
return Exception(msg, code, remote_);
}
/// Message must be a compile-time constant
template <typename T>
requires std::is_convertible_v<T, String>
Exception(int code, T && message) : Exception(message, code)
{
capture_thread_frame_pointers = thread_frame_pointers;
message_format_string = tryGetStaticFormatString(message);
}
/// These creators are for messages that were received by network or generated by a third-party library in runtime.
/// Please use a constructor for all other cases.
static Exception createRuntime(int code, const String & message) { return Exception(message, code); }

Some files were not shown because too many files have changed in this diff Show More