Merge branch 'master' into rocksdb_bypass_memtable

Try fix CI
This commit is contained in:
Duc Canh Le 2024-05-01 11:21:31 +00:00
commit 6a324ae67a
507 changed files with 12723 additions and 5388 deletions

View File

@ -8,4 +8,4 @@ jobs:
DebugInfo:
runs-on: ubuntu-latest
steps:
- uses: hmarr/debug-action@a701ed95a46e6f2fb0df25e1a558c16356fae35a
- uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6

View File

@ -16,7 +16,7 @@ jobs:
data: ${{ steps.runconfig.outputs.CI_DATA }}
steps:
- name: DebugInfo
uses: hmarr/debug-action@a701ed95a46e6f2fb0df25e1a558c16356fae35a
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
@ -59,16 +59,17 @@ jobs:
uses: ./.github/workflows/reusable_docker.yml
with:
data: ${{ needs.RunConfig.outputs.data }}
StyleCheck:
needs: [RunConfig, BuildDockers]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Style check
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 style_check.py --no-push
# Tested in MQ
# StyleCheck:
# needs: [RunConfig, BuildDockers]
# if: ${{ !failure() && !cancelled() }}
# uses: ./.github/workflows/reusable_test.yml
# with:
# test_name: Style check
# runner_type: style-checker
# data: ${{ needs.RunConfig.outputs.data }}
# run_command: |
# python3 style_check.py --no-push
CompatibilityCheckX86:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
@ -447,6 +448,14 @@ jobs:
test_name: Stateless tests (debug)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestAsanAzure:
needs: [RunConfig, BuilderDebAsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateless tests (azure, asan)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
##############################################################################################
############################ FUNCTIONAl STATEFUL TESTS #######################################
##############################################################################################
@ -597,6 +606,14 @@ jobs:
test_name: Stress test (tsan)
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
StressTestTsanAzure:
needs: [RunConfig, BuilderDebTsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stress test (azure, tsan)
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
StressTestMsan:
needs: [RunConfig, BuilderDebMsan]
if: ${{ !failure() && !cancelled() }}

View File

@ -22,7 +22,7 @@ jobs:
data: ${{ steps.runconfig.outputs.CI_DATA }}
steps:
- name: DebugInfo
uses: hmarr/debug-action@a701ed95a46e6f2fb0df25e1a558c16356fae35a
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
@ -158,7 +158,7 @@ jobs:
#
FinishCheck:
if: ${{ !failure() && !cancelled() }}
needs: [Tests_1, Tests_2, Builds_1_Report, Builds_2_Report]
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2]
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
@ -171,7 +171,7 @@ jobs:
- name: Finish label
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 finish_check.py
python3 finish_check.py ${{ (contains(needs.*.result, 'failure') && github.event_name == 'merge_group') && '--pipeline-failure' || '' }}
- name: Auto merge if approved
if: ${{ github.event_name != 'merge_group' }}
run: |

View File

@ -63,7 +63,7 @@ jobs:
GITHUB_JOB_OVERRIDDEN: ${{inputs.test_name}}
steps:
- name: DebugInfo
uses: hmarr/debug-action@a701ed95a46e6f2fb0df25e1a558c16356fae35a
uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:

View File

@ -16,7 +16,7 @@
#ci_set_reduced
#ci_set_arm
#ci_set_integration
#ci_set_analyzer
#ci_set_old_analyzer
## To run specified job in CI:
#job_<JOB NAME>

View File

@ -1,4 +1,5 @@
### Table of Contents
**[ClickHouse release v24.4, 2024-04-30](#244)**<br/>
**[ClickHouse release v24.3 LTS, 2024-03-26](#243)**<br/>
**[ClickHouse release v24.2, 2024-02-29](#242)**<br/>
**[ClickHouse release v24.1, 2024-01-30](#241)**<br/>
@ -6,6 +7,168 @@
# 2024 Changelog
### <a id="244"></a> ClickHouse release 24.4 LTS, 2024-04-30
#### Upgrade Notes
* `clickhouse-odbc-bridge` and `clickhouse-library-bridge` are now separate packages. This closes [#61677](https://github.com/ClickHouse/ClickHouse/issues/61677). [#62114](https://github.com/ClickHouse/ClickHouse/pull/62114) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Don't allow to set max_parallel_replicas (for the experimental parallel reading from replicas) to `0` as it doesn't make sense. Closes [#60140](https://github.com/ClickHouse/ClickHouse/issues/60140). [#61201](https://github.com/ClickHouse/ClickHouse/pull/61201) ([Kruglov Pavel](https://github.com/Avogar)).
* Remove support for `INSERT WATCH` query (part of the deprecated `LIVE VIEW` feature). [#62382](https://github.com/ClickHouse/ClickHouse/pull/62382) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Removed the `optimize_monotonous_functions_in_order_by` setting. [#63004](https://github.com/ClickHouse/ClickHouse/pull/63004) ([Raúl Marín](https://github.com/Algunenano)).
* Remove experimental tag from the `Replicated` database engine. Now it is in Beta stage. [#62937](https://github.com/ClickHouse/ClickHouse/pull/62937) ([Justin de Guzman](https://github.com/justindeguzman)).
#### New Feature
* Support recursive CTEs. [#62074](https://github.com/ClickHouse/ClickHouse/pull/62074) ([Maksim Kita](https://github.com/kitaisreal)).
* Support `QUALIFY` clause. Closes [#47819](https://github.com/ClickHouse/ClickHouse/issues/47819). [#62619](https://github.com/ClickHouse/ClickHouse/pull/62619) ([Maksim Kita](https://github.com/kitaisreal)).
* Table engines are grantable now, and it won't affect existing users behavior. [#60117](https://github.com/ClickHouse/ClickHouse/pull/60117) ([jsc0218](https://github.com/jsc0218)).
* Added a rewritable S3 disk which supports INSERT operations and does not require locally stored metadata. [#61116](https://github.com/ClickHouse/ClickHouse/pull/61116) ([Julia Kartseva](https://github.com/jkartseva)). The main use case is for system tables.
* The syntax highlighting while typing in the client will work on the syntax level (previously, it worked on the lexer level). [#62123](https://github.com/ClickHouse/ClickHouse/pull/62123) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Supports dropping multiple tables at the same time like `DROP TABLE a, b, c`;. [#58705](https://github.com/ClickHouse/ClickHouse/pull/58705) ([zhongyuankai](https://github.com/zhongyuankai)).
* Modifying memory table settings through `ALTER MODIFY SETTING` is now supported. Example: `ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100, max_rows_to_keep = 1000;`. [#62039](https://github.com/ClickHouse/ClickHouse/pull/62039) ([zhongyuankai](https://github.com/zhongyuankai)).
* Added `role` query parameter to the HTTP interface. It works similarly to `SET ROLE x`, applying the role before the statement is executed. This allows for overcoming the limitation of the HTTP interface, as multiple statements are not allowed, and it is not possible to send both `SET ROLE x` and the statement itself at the same time. It is possible to set multiple roles that way, e.g., `?role=x&role=y`, which will be an equivalent of `SET ROLE x, y`. [#62669](https://github.com/ClickHouse/ClickHouse/pull/62669) ([Serge Klochkov](https://github.com/slvrtrn)).
* Add `SYSTEM UNLOAD PRIMARY KEY` to free up memory usage for a table's primary key. [#62738](https://github.com/ClickHouse/ClickHouse/pull/62738) ([Pablo Marcos](https://github.com/pamarcos)).
* Added `value1`, `value2`, ..., `value10` columns to `system.text_log`. These columns contain values that were used to format the message. [#59619](https://github.com/ClickHouse/ClickHouse/pull/59619) ([Alexey Katsman](https://github.com/alexkats)).
* Added persistent virtual column `_block_offset` which stores original number of row in block that was assigned at insert. Persistence of column `_block_offset` can be enabled by the MergeTree setting `enable_block_offset_column`. Added virtual column`_part_data_version` which contains either min block number or mutation version of part. Persistent virtual column `_block_number` is not considered experimental anymore. [#60676](https://github.com/ClickHouse/ClickHouse/pull/60676) ([Anton Popov](https://github.com/CurtizJ)).
* Add a setting `input_format_json_throw_on_bad_escape_sequence`, disabling it allows saving bad escape sequences in JSON input formats. [#61889](https://github.com/ClickHouse/ClickHouse/pull/61889) ([Kruglov Pavel](https://github.com/Avogar)).
#### Performance Improvement
* JOIN filter push down improvements using equivalent sets. [#61216](https://github.com/ClickHouse/ClickHouse/pull/61216) ([Maksim Kita](https://github.com/kitaisreal)).
* Convert OUTER JOIN to INNER JOIN optimization if the filter after JOIN always filters default values. Optimization can be controlled with setting `query_plan_convert_outer_join_to_inner_join`, enabled by default. [#62907](https://github.com/ClickHouse/ClickHouse/pull/62907) ([Maksim Kita](https://github.com/kitaisreal)).
* Enabled fast Parquet encoder by default (output_format_parquet_use_custom_encoder). [#62088](https://github.com/ClickHouse/ClickHouse/pull/62088) ([Michael Kolupaev](https://github.com/al13n321)).
* Improvement for AWS S3. Client has to send header 'Keep-Alive: timeout=X' to the server. If a client receives a response from the server with that header, client has to use the value from the server. Also for a client it is better not to use a connection which is nearly expired in order to avoid connection close race. [#62249](https://github.com/ClickHouse/ClickHouse/pull/62249) ([Sema Checherinda](https://github.com/CheSema)).
* Reduce overhead of the mutations for SELECTs (v2). [#60856](https://github.com/ClickHouse/ClickHouse/pull/60856) ([Azat Khuzhin](https://github.com/azat)).
* More frequently invoked functions in PODArray are now force-inlined. [#61144](https://github.com/ClickHouse/ClickHouse/pull/61144) ([李扬](https://github.com/taiyang-li)).
* Speed up parsing of JSON by skipping the rest of the object when all required columns are read. [#62210](https://github.com/ClickHouse/ClickHouse/pull/62210) ([lgbo](https://github.com/lgbo-ustc)).
* Improve trivial insert select from files in file/s3/hdfs/url/... table functions. Add separate max_parsing_threads setting to control the number of threads used in parallel parsing. [#62404](https://github.com/ClickHouse/ClickHouse/pull/62404) ([Kruglov Pavel](https://github.com/Avogar)).
* Functions `to_utc_timestamp` and `from_utc_timestamp` are now about 2x faster. [#62583](https://github.com/ClickHouse/ClickHouse/pull/62583) ([KevinyhZou](https://github.com/KevinyhZou)).
* Functions `parseDateTimeOrNull`, `parseDateTimeOrZero`, `parseDateTimeInJodaSyntaxOrNull` and `parseDateTimeInJodaSyntaxOrZero` now run significantly faster (10x - 1000x) when the input contains mostly non-parseable values. [#62634](https://github.com/ClickHouse/ClickHouse/pull/62634) ([LiuNeng](https://github.com/liuneng1994)).
* SELECTs against `system.query_cache` are now noticeably faster when the query cache contains lots of entries (e.g. more than 100.000). [#62671](https://github.com/ClickHouse/ClickHouse/pull/62671) ([Robert Schulze](https://github.com/rschu1ze)).
* Less contention in filesystem cache (part 3): execute removal from filesystem without lock on space reservation attempt. [#61163](https://github.com/ClickHouse/ClickHouse/pull/61163) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Speed up dynamic resize of filesystem cache. [#61723](https://github.com/ClickHouse/ClickHouse/pull/61723) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Dictionary source with `INVALIDATE_QUERY` is not reloaded twice on startup. [#62050](https://github.com/ClickHouse/ClickHouse/pull/62050) ([vdimir](https://github.com/vdimir)).
* Fix an issue where when a redundant `= 1` or `= 0` is added after a boolean expression involving the primary key, the primary index is not used. For example, both `SELECT * FROM <table> WHERE <primary-key> IN (<value>) = 1` and `SELECT * FROM <table> WHERE <primary-key> NOT IN (<value>) = 0` will both perform a full table scan, when the primary index can be used. [#62142](https://github.com/ClickHouse/ClickHouse/pull/62142) ([josh-hildred](https://github.com/josh-hildred)).
* Return stream of chunks from `system.remote_data_paths` instead of accumulating the whole result in one big chunk. This allows to consume less memory, show intermediate progress and cancel the query. [#62613](https://github.com/ClickHouse/ClickHouse/pull/62613) ([Alexander Gololobov](https://github.com/davenger)).
#### Experimental Feature
* Support parallel write buffer for Azure Blob Storage managed by setting `azure_allow_parallel_part_upload`. [#62534](https://github.com/ClickHouse/ClickHouse/pull/62534) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Userspace page cache works with static web storage (`disk(type = web)`) now. Use client setting `use_page_cache_for_disks_without_file_cache=1` to enable. [#61911](https://github.com/ClickHouse/ClickHouse/pull/61911) ([Michael Kolupaev](https://github.com/al13n321)).
* Don't treat Bool and number variants as suspicious in the `Variant` type. [#61999](https://github.com/ClickHouse/ClickHouse/pull/61999) ([Kruglov Pavel](https://github.com/Avogar)).
* Implement better conversion from String to `Variant` using parsing. [#62005](https://github.com/ClickHouse/ClickHouse/pull/62005) ([Kruglov Pavel](https://github.com/Avogar)).
* Support `Variant` in JSONExtract functions. [#62014](https://github.com/ClickHouse/ClickHouse/pull/62014) ([Kruglov Pavel](https://github.com/Avogar)).
* Mark type `Variant` as comparable so it can be used in primary key. [#62693](https://github.com/ClickHouse/ClickHouse/pull/62693) ([Kruglov Pavel](https://github.com/Avogar)).
#### Improvement
* For convenience purpose, `SELECT * FROM numbers() `will work in the same way as `SELECT * FROM system.numbers` - without a limit. [#61969](https://github.com/ClickHouse/ClickHouse/pull/61969) ([YenchangChan](https://github.com/YenchangChan)).
* Introduce separate consumer/producer tags for the Kafka configuration. This avoids warnings from librdkafka (a bad C library with a lot of bugs) that consumer properties were specified for producer instances and vice versa (e.g. `Configuration property session.timeout.ms is a consumer property and will be ignored by this producer instance`). Closes: [#58983](https://github.com/ClickHouse/ClickHouse/issues/58983). [#58956](https://github.com/ClickHouse/ClickHouse/pull/58956) ([Aleksandr Musorin](https://github.com/AVMusorin)).
* Functions `date_diff` and `age` now calculate their result at nanosecond instead of microsecond precision. They now also offer `nanosecond` (or `nanoseconds` or `ns`) as a possible value for the `unit` parameter. [#61409](https://github.com/ClickHouse/ClickHouse/pull/61409) ([Austin Kothig](https://github.com/kothiga)).
* Added nano-, micro-, milliseconds unit for `date_trunc`. [#62335](https://github.com/ClickHouse/ClickHouse/pull/62335) ([Misz606](https://github.com/Misz606)).
* Reload certificate chain during certificate reload. [#61671](https://github.com/ClickHouse/ClickHouse/pull/61671) ([Pervakov Grigorii](https://github.com/GrigoryPervakov)).
* Try to prevent an error [#60432](https://github.com/ClickHouse/ClickHouse/issues/60432) by not allowing a table to be attached if there is an active replica for that replica path. [#61876](https://github.com/ClickHouse/ClickHouse/pull/61876) ([Arthur Passos](https://github.com/arthurpassos)).
* Implement support for `input` for `clickhouse-local`. [#61923](https://github.com/ClickHouse/ClickHouse/pull/61923) ([Azat Khuzhin](https://github.com/azat)).
* `Join` table engine with strictness `ANY` is consistent after reload. When several rows with the same key are inserted, the first one will have higher priority (before, it was chosen randomly upon table loading). close [#51027](https://github.com/ClickHouse/ClickHouse/issues/51027). [#61972](https://github.com/ClickHouse/ClickHouse/pull/61972) ([vdimir](https://github.com/vdimir)).
* Automatically infer Nullable column types from Apache Arrow schema. [#61984](https://github.com/ClickHouse/ClickHouse/pull/61984) ([Maksim Kita](https://github.com/kitaisreal)).
* Allow to cancel parallel merge of aggregate states during aggregation. Example: `uniqExact`. [#61992](https://github.com/ClickHouse/ClickHouse/pull/61992) ([Maksim Kita](https://github.com/kitaisreal)).
* Use `system.keywords` to fill in the suggestions and also use them in the all places internally. [#62000](https://github.com/ClickHouse/ClickHouse/pull/62000) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* `OPTIMIZE FINAL` for `ReplicatedMergeTree` now will wait for currently active merges to finish and then reattempt to schedule a final merge. This will put it more in line with ordinary `MergeTree` behaviour. [#62067](https://github.com/ClickHouse/ClickHouse/pull/62067) ([Nikita Taranov](https://github.com/nickitat)).
* While read data from a hive text file, it would use the first line of hive text file to resize of number of input fields, and sometimes the fields number of first line is not matched with the hive table defined , such as the hive table is defined to have 3 columns, like `test_tbl(a Int32, b Int32, c Int32)`, but the first line of text file only has 2 fields, and in this suitation, the input fields will be resized to 2, and if the next line of the text file has 3 fields, then the third field can not be read but set a default value 0, which is not right. [#62086](https://github.com/ClickHouse/ClickHouse/pull/62086) ([KevinyhZou](https://github.com/KevinyhZou)).
* `CREATE AS` copies the table's comment. [#62117](https://github.com/ClickHouse/ClickHouse/pull/62117) ([Pablo Marcos](https://github.com/pamarcos)).
* Add query progress to table zookeeper. [#62152](https://github.com/ClickHouse/ClickHouse/pull/62152) ([JackyWoo](https://github.com/JackyWoo)).
* Add ability to turn on trace collector (Real and CPU) server-wide. [#62189](https://github.com/ClickHouse/ClickHouse/pull/62189) ([alesapin](https://github.com/alesapin)).
* Added setting `lightweight_deletes_sync` (default value: 2 - wait all replicas synchronously). It is similar to setting `mutations_sync` but affects only behaviour of lightweight deletes. [#62195](https://github.com/ClickHouse/ClickHouse/pull/62195) ([Anton Popov](https://github.com/CurtizJ)).
* Distinguish booleans and integers while parsing values for custom settings: `SET custom_a = true; SET custom_b = 1;`. [#62206](https://github.com/ClickHouse/ClickHouse/pull/62206) ([Vitaly Baranov](https://github.com/vitlibar)).
* Support S3 access through AWS Private Link Interface endpoints. Closes [#60021](https://github.com/ClickHouse/ClickHouse/issues/60021), [#31074](https://github.com/ClickHouse/ClickHouse/issues/31074) and [#53761](https://github.com/ClickHouse/ClickHouse/issues/53761). [#62208](https://github.com/ClickHouse/ClickHouse/pull/62208) ([Arthur Passos](https://github.com/arthurpassos)).
* Do not create a directory for UDF in clickhouse-client if it does not exist. This closes [#59597](https://github.com/ClickHouse/ClickHouse/issues/59597). [#62366](https://github.com/ClickHouse/ClickHouse/pull/62366) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* The query cache now no longer caches results of queries against system tables (`system.*`, `information_schema.*`, `INFORMATION_SCHEMA.*`). [#62376](https://github.com/ClickHouse/ClickHouse/pull/62376) ([Robert Schulze](https://github.com/rschu1ze)).
* `MOVE PARTITION TO TABLE` query can be delayed or can throw `TOO_MANY_PARTS` exception to avoid exceeding limits on the part count. The same settings and limits are applied as for the`INSERT` query (see `max_parts_in_total`, `parts_to_delay_insert`, `parts_to_throw_insert`, `inactive_parts_to_throw_insert`, `inactive_parts_to_delay_insert`, `max_avg_part_size_for_too_many_parts`, `min_delay_to_insert_ms` and `max_delay_to_insert` settings). [#62420](https://github.com/ClickHouse/ClickHouse/pull/62420) ([Sergei Trifonov](https://github.com/serxa)).
* Changed the default installation directory on macOS from `/usr/bin` to `/usr/local/bin`. This is necessary because Apple's System Integrity Protection introduced with macOS El Capitan (2015) prevents writing into `/usr/bin`, even with `sudo`. [#62489](https://github.com/ClickHouse/ClickHouse/pull/62489) ([haohang](https://github.com/yokofly)).
* Make transform always return the first match. [#62518](https://github.com/ClickHouse/ClickHouse/pull/62518) ([Raúl Marín](https://github.com/Algunenano)).
* Added the missing `hostname` column to system table `blob_storage_log`. [#62456](https://github.com/ClickHouse/ClickHouse/pull/62456) ([Jayme Bird](https://github.com/jaymebrd)).
* For consistency with other system tables, `system.backup_log` now has a column `event_time`. [#62541](https://github.com/ClickHouse/ClickHouse/pull/62541) ([Jayme Bird](https://github.com/jaymebrd)).
* Table `system.backup_log` now has the "default" sorting key which is `event_date, event_time`, the same as for other `_log` table engines. [#62667](https://github.com/ClickHouse/ClickHouse/pull/62667) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Avoid evaluating table DEFAULT expressions while executing `RESTORE`. [#62601](https://github.com/ClickHouse/ClickHouse/pull/62601) ([Vitaly Baranov](https://github.com/vitlibar)).
* S3 storage and backups also need the same default keep alive settings as s3 disk. [#62648](https://github.com/ClickHouse/ClickHouse/pull/62648) ([Sema Checherinda](https://github.com/CheSema)).
* Add librdkafka's (that infamous C library, which has a lot of bugs) client identifier to log messages to be able to differentiate log messages from different consumers of a single table. [#62813](https://github.com/ClickHouse/ClickHouse/pull/62813) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Allow special macros `{uuid}` and `{database}` in a Replicated database ZooKeeper path. [#62818](https://github.com/ClickHouse/ClickHouse/pull/62818) ([Vitaly Baranov](https://github.com/vitlibar)).
* Allow quota key with different auth scheme in HTTP requests. [#62842](https://github.com/ClickHouse/ClickHouse/pull/62842) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Reduce the verbosity of command line argument `--help` in `clickhouse client` and `clickhouse local`. The previous output is now generated by `--help --verbose`. [#62973](https://github.com/ClickHouse/ClickHouse/pull/62973) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* `log_bin_use_v1_row_events` was removed in MySQL 8.3, and we adjust the experimental `MaterializedMySQL` engine for it [#60479](https://github.com/ClickHouse/ClickHouse/issues/60479). [#63101](https://github.com/ClickHouse/ClickHouse/pull/63101) ([Eugene Klimov](https://github.com/Slach)). Author: Nikolay Yankin.
#### Build/Testing/Packaging Improvement
* Vendor in Rust dependencies, so the Rust code (that we use for minor features for hype and lulz) can be built in a sane way, similarly to C++. [#62297](https://github.com/ClickHouse/ClickHouse/pull/62297) ([Raúl Marín](https://github.com/Algunenano)).
* ClickHouse now uses OpenSSL 3.2 instead of BoringSSL. [#59870](https://github.com/ClickHouse/ClickHouse/pull/59870) ([Robert Schulze](https://github.com/rschu1ze)). Note that OpenSSL has generally worse engineering culture (such as non-zero number of sanitizer reports, that we had to patch, a complex build system with generated files, etc.) but has better compatibility.
* Ignore DROP queries in stress test with 1/2 probability, use TRUNCATE instead of ignoring DROP in upgrade check for Memory/JOIN tables. [#61476](https://github.com/ClickHouse/ClickHouse/pull/61476) ([Kruglov Pavel](https://github.com/Avogar)).
* Remove from the Keeper Docker image the volumes at /etc/clickhouse-keeper and /var/log/clickhouse-keeper. [#61683](https://github.com/ClickHouse/ClickHouse/pull/61683) ([Tristan](https://github.com/Tristan971)).
* Add tests for all issues which are no longer relevant with Analyzer being enabled by default. Closes: [#55794](https://github.com/ClickHouse/ClickHouse/issues/55794) Closes: [#49472](https://github.com/ClickHouse/ClickHouse/issues/49472) Closes: [#44414](https://github.com/ClickHouse/ClickHouse/issues/44414) Closes: [#13843](https://github.com/ClickHouse/ClickHouse/issues/13843) Closes: [#55803](https://github.com/ClickHouse/ClickHouse/issues/55803) Closes: [#48308](https://github.com/ClickHouse/ClickHouse/issues/48308) Closes: [#45535](https://github.com/ClickHouse/ClickHouse/issues/45535) Closes: [#44365](https://github.com/ClickHouse/ClickHouse/issues/44365) Closes: [#44153](https://github.com/ClickHouse/ClickHouse/issues/44153) Closes: [#42399](https://github.com/ClickHouse/ClickHouse/issues/42399) Closes: [#27115](https://github.com/ClickHouse/ClickHouse/issues/27115) Closes: [#23162](https://github.com/ClickHouse/ClickHouse/issues/23162) Closes: [#15395](https://github.com/ClickHouse/ClickHouse/issues/15395) Closes: [#15411](https://github.com/ClickHouse/ClickHouse/issues/15411) Closes: [#14978](https://github.com/ClickHouse/ClickHouse/issues/14978) Closes: [#17319](https://github.com/ClickHouse/ClickHouse/issues/17319) Closes: [#11813](https://github.com/ClickHouse/ClickHouse/issues/11813) Closes: [#13210](https://github.com/ClickHouse/ClickHouse/issues/13210) Closes: [#23053](https://github.com/ClickHouse/ClickHouse/issues/23053) Closes: [#37729](https://github.com/ClickHouse/ClickHouse/issues/37729) Closes: [#32639](https://github.com/ClickHouse/ClickHouse/issues/32639) Closes: [#9954](https://github.com/ClickHouse/ClickHouse/issues/9954) Closes: [#41964](https://github.com/ClickHouse/ClickHouse/issues/41964) Closes: [#54317](https://github.com/ClickHouse/ClickHouse/issues/54317) Closes: [#7520](https://github.com/ClickHouse/ClickHouse/issues/7520) Closes: [#36973](https://github.com/ClickHouse/ClickHouse/issues/36973) Closes: [#40955](https://github.com/ClickHouse/ClickHouse/issues/40955) Closes: [#19687](https://github.com/ClickHouse/ClickHouse/issues/19687) Closes: [#23104](https://github.com/ClickHouse/ClickHouse/issues/23104) Closes: [#21584](https://github.com/ClickHouse/ClickHouse/issues/21584) Closes: [#23344](https://github.com/ClickHouse/ClickHouse/issues/23344) Closes: [#22627](https://github.com/ClickHouse/ClickHouse/issues/22627) Closes: [#10276](https://github.com/ClickHouse/ClickHouse/issues/10276) Closes: [#19687](https://github.com/ClickHouse/ClickHouse/issues/19687) Closes: [#4567](https://github.com/ClickHouse/ClickHouse/issues/4567) Closes: [#17710](https://github.com/ClickHouse/ClickHouse/issues/17710) Closes: [#11068](https://github.com/ClickHouse/ClickHouse/issues/11068) Closes: [#24395](https://github.com/ClickHouse/ClickHouse/issues/24395) Closes: [#23416](https://github.com/ClickHouse/ClickHouse/issues/23416) Closes: [#23162](https://github.com/ClickHouse/ClickHouse/issues/23162) Closes: [#25655](https://github.com/ClickHouse/ClickHouse/issues/25655) Closes: [#11757](https://github.com/ClickHouse/ClickHouse/issues/11757) Closes: [#6571](https://github.com/ClickHouse/ClickHouse/issues/6571) Closes: [#4432](https://github.com/ClickHouse/ClickHouse/issues/4432) Closes: [#8259](https://github.com/ClickHouse/ClickHouse/issues/8259) Closes: [#9233](https://github.com/ClickHouse/ClickHouse/issues/9233) Closes: [#14699](https://github.com/ClickHouse/ClickHouse/issues/14699) Closes: [#27068](https://github.com/ClickHouse/ClickHouse/issues/27068) Closes: [#28687](https://github.com/ClickHouse/ClickHouse/issues/28687) Closes: [#28777](https://github.com/ClickHouse/ClickHouse/issues/28777) Closes: [#29734](https://github.com/ClickHouse/ClickHouse/issues/29734) Closes: [#61238](https://github.com/ClickHouse/ClickHouse/issues/61238) Closes: [#33825](https://github.com/ClickHouse/ClickHouse/issues/33825) Closes: [#35608](https://github.com/ClickHouse/ClickHouse/issues/35608) Closes: [#29838](https://github.com/ClickHouse/ClickHouse/issues/29838) Closes: [#35652](https://github.com/ClickHouse/ClickHouse/issues/35652) Closes: [#36189](https://github.com/ClickHouse/ClickHouse/issues/36189) Closes: [#39634](https://github.com/ClickHouse/ClickHouse/issues/39634) Closes: [#47432](https://github.com/ClickHouse/ClickHouse/issues/47432) Closes: [#54910](https://github.com/ClickHouse/ClickHouse/issues/54910) Closes: [#57321](https://github.com/ClickHouse/ClickHouse/issues/57321) Closes: [#59154](https://github.com/ClickHouse/ClickHouse/issues/59154) Closes: [#61014](https://github.com/ClickHouse/ClickHouse/issues/61014) Closes: [#61950](https://github.com/ClickHouse/ClickHouse/issues/61950) Closes: [#55647](https://github.com/ClickHouse/ClickHouse/issues/55647) Closes: [#61947](https://github.com/ClickHouse/ClickHouse/issues/61947). [#62185](https://github.com/ClickHouse/ClickHouse/pull/62185) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Add more tests from issues which are no longer relevant or fixed by analyzer. Closes: [#58985](https://github.com/ClickHouse/ClickHouse/issues/58985) Closes: [#59549](https://github.com/ClickHouse/ClickHouse/issues/59549) Closes: [#36963](https://github.com/ClickHouse/ClickHouse/issues/36963) Closes: [#39453](https://github.com/ClickHouse/ClickHouse/issues/39453) Closes: [#56521](https://github.com/ClickHouse/ClickHouse/issues/56521) Closes: [#47552](https://github.com/ClickHouse/ClickHouse/issues/47552) Closes: [#56503](https://github.com/ClickHouse/ClickHouse/issues/56503) Closes: [#59101](https://github.com/ClickHouse/ClickHouse/issues/59101) Closes: [#50271](https://github.com/ClickHouse/ClickHouse/issues/50271) Closes: [#54954](https://github.com/ClickHouse/ClickHouse/issues/54954) Closes: [#56466](https://github.com/ClickHouse/ClickHouse/issues/56466) Closes: [#11000](https://github.com/ClickHouse/ClickHouse/issues/11000) Closes: [#10894](https://github.com/ClickHouse/ClickHouse/issues/10894) Closes: https://github.com/ClickHouse/ClickHouse/issues/448 Closes: [#8030](https://github.com/ClickHouse/ClickHouse/issues/8030) Closes: [#32139](https://github.com/ClickHouse/ClickHouse/issues/32139) Closes: [#47288](https://github.com/ClickHouse/ClickHouse/issues/47288) Closes: [#50705](https://github.com/ClickHouse/ClickHouse/issues/50705) Closes: [#54511](https://github.com/ClickHouse/ClickHouse/issues/54511) Closes: [#55466](https://github.com/ClickHouse/ClickHouse/issues/55466) Closes: [#58500](https://github.com/ClickHouse/ClickHouse/issues/58500) Closes: [#39923](https://github.com/ClickHouse/ClickHouse/issues/39923) Closes: [#39855](https://github.com/ClickHouse/ClickHouse/issues/39855) Closes: [#4596](https://github.com/ClickHouse/ClickHouse/issues/4596) Closes: [#47422](https://github.com/ClickHouse/ClickHouse/issues/47422) Closes: [#33000](https://github.com/ClickHouse/ClickHouse/issues/33000) Closes: [#14739](https://github.com/ClickHouse/ClickHouse/issues/14739) Closes: [#44039](https://github.com/ClickHouse/ClickHouse/issues/44039) Closes: [#8547](https://github.com/ClickHouse/ClickHouse/issues/8547) Closes: [#22923](https://github.com/ClickHouse/ClickHouse/issues/22923) Closes: [#23865](https://github.com/ClickHouse/ClickHouse/issues/23865) Closes: [#29748](https://github.com/ClickHouse/ClickHouse/issues/29748) Closes: [#4222](https://github.com/ClickHouse/ClickHouse/issues/4222). [#62457](https://github.com/ClickHouse/ClickHouse/pull/62457) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Fixed build errors when OpenSSL is linked dynamically (note: this is generally unsupported and only required for IBM's s390x platforms). [#62888](https://github.com/ClickHouse/ClickHouse/pull/62888) ([Harry Lee](https://github.com/HarryLeeIBM)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix logical-error when undoing quorum insert transaction. [#61953](https://github.com/ClickHouse/ClickHouse/pull/61953) ([Han Fei](https://github.com/hanfei1991)).
* Fix parser error when using COUNT(*) with FILTER clause [#61357](https://github.com/ClickHouse/ClickHouse/pull/61357) ([Duc Canh Le](https://github.com/canhld94)).
* Fix logical error in `group_by_use_nulls` + grouping sets + analyzer + materialize/constant [#61567](https://github.com/ClickHouse/ClickHouse/pull/61567) ([Kruglov Pavel](https://github.com/Avogar)).
* Cancel merges before removing moved parts [#61610](https://github.com/ClickHouse/ClickHouse/pull/61610) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Fix abort in Apache Arrow [#61720](https://github.com/ClickHouse/ClickHouse/pull/61720) ([Kruglov Pavel](https://github.com/Avogar)).
* Search for `convert_to_replicated` flag at the correct path corresponding to the specific disk [#61769](https://github.com/ClickHouse/ClickHouse/pull/61769) ([Kirill](https://github.com/kirillgarbar)).
* Fix possible connections data-race for distributed_foreground_insert/distributed_background_insert_batch [#61867](https://github.com/ClickHouse/ClickHouse/pull/61867) ([Azat Khuzhin](https://github.com/azat)).
* Mark CANNOT_PARSE_ESCAPE_SEQUENCE error as parse error to be able to skip it in row input formats [#61883](https://github.com/ClickHouse/ClickHouse/pull/61883) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix writing exception message in output format in HTTP when http_wait_end_of_query is used [#61951](https://github.com/ClickHouse/ClickHouse/pull/61951) ([Kruglov Pavel](https://github.com/Avogar)).
* Proper fix for LowCardinality together with JSONExtact functions [#61957](https://github.com/ClickHouse/ClickHouse/pull/61957) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Crash in Engine Merge if Row Policy does not have expression [#61971](https://github.com/ClickHouse/ClickHouse/pull/61971) ([Ilya Golshtein](https://github.com/ilejn)).
* Fix WriteBufferAzureBlobStorage destructor uncaught exception [#61988](https://github.com/ClickHouse/ClickHouse/pull/61988) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Fix CREATE TABLE without columns definition for ReplicatedMergeTree [#62040](https://github.com/ClickHouse/ClickHouse/pull/62040) ([Azat Khuzhin](https://github.com/azat)).
* Fix optimize_skip_unused_shards_rewrite_in for composite sharding key [#62047](https://github.com/ClickHouse/ClickHouse/pull/62047) ([Azat Khuzhin](https://github.com/azat)).
* ReadWriteBufferFromHTTP set right header host when redirected [#62068](https://github.com/ClickHouse/ClickHouse/pull/62068) ([Sema Checherinda](https://github.com/CheSema)).
* Fix external table cannot parse data type Bool [#62115](https://github.com/ClickHouse/ClickHouse/pull/62115) ([Duc Canh Le](https://github.com/canhld94)).
* Analyzer: Fix query parameter resolution [#62186](https://github.com/ClickHouse/ClickHouse/pull/62186) ([Dmitry Novik](https://github.com/novikd)).
* Fix restoring parts while readonly [#62207](https://github.com/ClickHouse/ClickHouse/pull/62207) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix crash in index definition containing SQL UDF [#62225](https://github.com/ClickHouse/ClickHouse/pull/62225) ([vdimir](https://github.com/vdimir)).
* Fixing NULL random seed for generateRandom with analyzer. [#62248](https://github.com/ClickHouse/ClickHouse/pull/62248) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Correctly handle const columns in Distinct Transfom [#62250](https://github.com/ClickHouse/ClickHouse/pull/62250) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix Parts Splitter for queries with the FINAL modifier [#62268](https://github.com/ClickHouse/ClickHouse/pull/62268) ([Nikita Taranov](https://github.com/nickitat)).
* Analyzer: Fix alias to parametrized view resolution [#62274](https://github.com/ClickHouse/ClickHouse/pull/62274) ([Dmitry Novik](https://github.com/novikd)).
* Analyzer: Fix name resolution from parent scopes [#62281](https://github.com/ClickHouse/ClickHouse/pull/62281) ([Dmitry Novik](https://github.com/novikd)).
* Fix argMax with nullable non native numeric column [#62285](https://github.com/ClickHouse/ClickHouse/pull/62285) ([Raúl Marín](https://github.com/Algunenano)).
* Fix BACKUP and RESTORE of a materialized view in Ordinary database [#62295](https://github.com/ClickHouse/ClickHouse/pull/62295) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix data race on scalars in Context [#62305](https://github.com/ClickHouse/ClickHouse/pull/62305) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix primary key in materialized view [#62319](https://github.com/ClickHouse/ClickHouse/pull/62319) ([Murat Khairulin](https://github.com/mxwell)).
* Do not build multithread insert pipeline for tables without support [#62333](https://github.com/ClickHouse/ClickHouse/pull/62333) ([vdimir](https://github.com/vdimir)).
* Fix analyzer with positional arguments in distributed query [#62362](https://github.com/ClickHouse/ClickHouse/pull/62362) ([flynn](https://github.com/ucasfl)).
* Fix filter pushdown from additional_table_filters in Merge engine in analyzer [#62398](https://github.com/ClickHouse/ClickHouse/pull/62398) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix GLOBAL IN table queries with analyzer. [#62409](https://github.com/ClickHouse/ClickHouse/pull/62409) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Respect settings truncate_on_insert/create_new_file_on_insert in s3/hdfs/azure engines during partitioned write [#62425](https://github.com/ClickHouse/ClickHouse/pull/62425) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix backup restore path for AzureBlobStorage [#62447](https://github.com/ClickHouse/ClickHouse/pull/62447) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Fix SimpleSquashingChunksTransform [#62451](https://github.com/ClickHouse/ClickHouse/pull/62451) ([Nikita Taranov](https://github.com/nickitat)).
* Fix capture of nested lambda. [#62462](https://github.com/ClickHouse/ClickHouse/pull/62462) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Avoid crash when reading protobuf with recursive types [#62506](https://github.com/ClickHouse/ClickHouse/pull/62506) ([Raúl Marín](https://github.com/Algunenano)).
* Fix a bug moving one partition from one to itself [#62524](https://github.com/ClickHouse/ClickHouse/pull/62524) ([helifu](https://github.com/helifu)).
* Fix scalar subquery in LIMIT [#62567](https://github.com/ClickHouse/ClickHouse/pull/62567) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix segfault in the experimental and unsupported Hive engine, which we don't like anyway [#62578](https://github.com/ClickHouse/ClickHouse/pull/62578) ([Nikolay Degterinsky](https://github.com/evillique)).
* Fix memory leak in groupArraySorted [#62597](https://github.com/ClickHouse/ClickHouse/pull/62597) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix crash in largestTriangleThreeBuckets [#62646](https://github.com/ClickHouse/ClickHouse/pull/62646) ([Raúl Marín](https://github.com/Algunenano)).
* Fix tumble\[Start,End\] and hop\[Start,End\] for bigger resolutions [#62705](https://github.com/ClickHouse/ClickHouse/pull/62705) ([Jordi Villar](https://github.com/jrdi)).
* Fix argMin/argMax combinator state [#62708](https://github.com/ClickHouse/ClickHouse/pull/62708) ([Raúl Marín](https://github.com/Algunenano)).
* Fix temporary data in cache failing because of cache lock contention optimization [#62715](https://github.com/ClickHouse/ClickHouse/pull/62715) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix crash in function `mergeTreeIndex` [#62762](https://github.com/ClickHouse/ClickHouse/pull/62762) ([Anton Popov](https://github.com/CurtizJ)).
* fix: update: nested materialized columns: size check fixes [#62773](https://github.com/ClickHouse/ClickHouse/pull/62773) ([Eliot Hautefeuille](https://github.com/hileef)).
* Fix FINAL modifier is not respected in CTE with analyzer [#62811](https://github.com/ClickHouse/ClickHouse/pull/62811) ([Duc Canh Le](https://github.com/canhld94)).
* Fix crash in function `formatRow` with `JSON` format and HTTP interface [#62840](https://github.com/ClickHouse/ClickHouse/pull/62840) ([Anton Popov](https://github.com/CurtizJ)).
* Azure: fix building final url from endpoint object [#62850](https://github.com/ClickHouse/ClickHouse/pull/62850) ([Daniel Pozo Escalona](https://github.com/danipozo)).
* Fix GCD codec [#62853](https://github.com/ClickHouse/ClickHouse/pull/62853) ([Nikita Taranov](https://github.com/nickitat)).
* Fix LowCardinality(Nullable) key in hyperrectangle [#62866](https://github.com/ClickHouse/ClickHouse/pull/62866) ([Amos Bird](https://github.com/amosbird)).
* Fix fromUnixtimestamp in joda syntax while the input value beyond UInt32 [#62901](https://github.com/ClickHouse/ClickHouse/pull/62901) ([KevinyhZou](https://github.com/KevinyhZou)).
* Disable optimize_rewrite_aggregate_function_with_if for sum(nullable) [#62912](https://github.com/ClickHouse/ClickHouse/pull/62912) ([Raúl Marín](https://github.com/Algunenano)).
* Fix PREWHERE for StorageBuffer with different source table column types. [#62916](https://github.com/ClickHouse/ClickHouse/pull/62916) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix temporary data in cache incorrectly processing failure of cache key directory creation [#62925](https://github.com/ClickHouse/ClickHouse/pull/62925) ([Kseniia Sumarokova](https://github.com/kssenii)).
* gRPC: fix crash on IPv6 peer connection [#62978](https://github.com/ClickHouse/ClickHouse/pull/62978) ([Konstantin Bogdanov](https://github.com/thevar1able)).
* Fix possible CHECKSUM_DOESNT_MATCH (and others) during replicated fetches [#62987](https://github.com/ClickHouse/ClickHouse/pull/62987) ([Azat Khuzhin](https://github.com/azat)).
* Fix terminate with uncaught exception in temporary data in cache [#62998](https://github.com/ClickHouse/ClickHouse/pull/62998) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix optimize_rewrite_aggregate_function_with_if implicit cast [#62999](https://github.com/ClickHouse/ClickHouse/pull/62999) ([Raúl Marín](https://github.com/Algunenano)).
* Fix unhandled exception in ~RestorerFromBackup [#63040](https://github.com/ClickHouse/ClickHouse/pull/63040) ([Vitaly Baranov](https://github.com/vitlibar)).
* Do not remove server constants from GROUP BY key for secondary query. [#63047](https://github.com/ClickHouse/ClickHouse/pull/63047) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix incorrect judgement of of monotonicity of function abs [#63097](https://github.com/ClickHouse/ClickHouse/pull/63097) ([Duc Canh Le](https://github.com/canhld94)).
* Set server name for SSL handshake in MongoDB engine [#63122](https://github.com/ClickHouse/ClickHouse/pull/63122) ([Alexander Gololobov](https://github.com/davenger)).
* Use user specified db instead of "config" for MongoDB wire protocol version check [#63126](https://github.com/ClickHouse/ClickHouse/pull/63126) ([Alexander Gololobov](https://github.com/davenger)).
### <a id="243"></a> ClickHouse release 24.3 LTS, 2024-03-27
#### Upgrade Notes
@ -38,9 +201,9 @@
* Optimized function `dotProduct` to omit unnecessary and expensive memory copies. [#60928](https://github.com/ClickHouse/ClickHouse/pull/60928) ([Robert Schulze](https://github.com/rschu1ze)).
* 30x faster printing for 256-bit integers. [#61100](https://github.com/ClickHouse/ClickHouse/pull/61100) ([Raúl Marín](https://github.com/Algunenano)).
* If the table's primary key contains mostly useless columns, don't keep them in memory. This is controlled by a new setting `primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns` with the value `0.9` by default, which means: for a composite primary key, if a column changes its value for at least 0.9 of all the times, the next columns after it will be not loaded. [#60255](https://github.com/ClickHouse/ClickHouse/pull/60255) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Improve the performance of serialized aggregation method when involving multiple `Nullable` columns. [#55809](https://github.com/ClickHouse/ClickHouse/pull/55809) ([Amos Bird](https://github.com/amosbird)).
* Lazy build JSON's output to improve performance of ALL JOIN. [#58278](https://github.com/ClickHouse/ClickHouse/pull/58278) ([LiuNeng](https://github.com/liuneng1994)).
* Make HTTP/HTTPs connections with external services, such as AWS S3 reusable for all uses cases. Even when response is 3xx or 4xx. [#58845](https://github.com/ClickHouse/ClickHouse/pull/58845) ([Sema Checherinda](https://github.com/CheSema)).
* Improve the performance of serialized aggregation methods when involving multiple `Nullable` columns. [#55809](https://github.com/ClickHouse/ClickHouse/pull/55809) ([Amos Bird](https://github.com/amosbird)).
* Lazy builds JSON's output to improve performance of ALL JOIN. [#58278](https://github.com/ClickHouse/ClickHouse/pull/58278) ([LiuNeng](https://github.com/liuneng1994)).
* Make HTTP/HTTPs connections with external services, such as AWS S3 reusable for all use cases. Even when the response is 3xx or 4xx. [#58845](https://github.com/ClickHouse/ClickHouse/pull/58845) ([Sema Checherinda](https://github.com/CheSema)).
* Improvements to aggregate functions `argMin` / `argMax` / `any` / `anyLast` / `anyHeavy`, as well as `ORDER BY {u8/u16/u32/u64/i8/i16/u32/i64) LIMIT 1` queries. [#58640](https://github.com/ClickHouse/ClickHouse/pull/58640) ([Raúl Marín](https://github.com/Algunenano)).
* Trivial optimization for column's filter. Peak memory can be reduced to 44% of the original in some cases. [#59698](https://github.com/ClickHouse/ClickHouse/pull/59698) ([李扬](https://github.com/taiyang-li)).
* Execute `multiIf` function in a columnar fashion when the result type's underlying type is a number. [#60384](https://github.com/ClickHouse/ClickHouse/pull/60384) ([李扬](https://github.com/taiyang-li)).
@ -49,7 +212,7 @@
* Optimize data movement between columns of a Nullable number or a Nullable string, which improves some micro-benchmarks. [#60846](https://github.com/ClickHouse/ClickHouse/pull/60846) ([李扬](https://github.com/taiyang-li)).
* Operations with the filesystem cache will suffer less from the lock contention. [#61066](https://github.com/ClickHouse/ClickHouse/pull/61066) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Optimize array join and other JOINs by preventing a wrong compiler's optimization. Close [#61074](https://github.com/ClickHouse/ClickHouse/issues/61074). [#61075](https://github.com/ClickHouse/ClickHouse/pull/61075) ([李扬](https://github.com/taiyang-li)).
* If a query with a syntax error contained `COLUMNS` matcher with a regular expression, the regular expression was compiled each time during the parser's backtracking, instead of being compiled once. This was a fundamental error. The compiled regexp was put to AST. But the letter A in AST means "abstract" which means it should not contain heavyweight objects. Parts of AST can be created and discarded during parsing, including a large number of backtracking. This leads to slowness on the parsing side and consequently allows DoS by a readonly user. But the main problem is that it prevents progress in fuzzers. [#61543](https://github.com/ClickHouse/ClickHouse/pull/61543) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* If a query with a syntax error contained the `COLUMNS` matcher with a regular expression, the regular expression was compiled each time during the parser's backtracking, instead of being compiled once. This was a fundamental error. The compiled regexp was put to AST. But the letter A in AST means "abstract" which means it should not contain heavyweight objects. Parts of AST can be created and discarded during parsing, including a large number of backtracking. This leads to slowness on the parsing side and consequently allows DoS by a readonly user. But the main problem is that it prevents progress in fuzzers. [#61543](https://github.com/ClickHouse/ClickHouse/pull/61543) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add a new analyzer pass to optimize the IN operator for a single value. [#61564](https://github.com/ClickHouse/ClickHouse/pull/61564) ([LiuNeng](https://github.com/liuneng1994)).
* DNSResolver shuffles set of resolved IPs which is needed to uniformly utilize multiple endpoints of AWS S3. [#60965](https://github.com/ClickHouse/ClickHouse/pull/60965) ([Sema Checherinda](https://github.com/CheSema)).

View File

@ -66,9 +66,11 @@ public:
/// The thread and process ids are set.
Message(
const std::string & source, const std::string & text, Priority prio, const char * file, int line, std::string_view fmt_str = {});
const std::string & source, const std::string & text, Priority prio, const char * file, int line,
std::string_view fmt_str = {}, const std::vector<std::string> & fmt_str_args = {});
Message(
std::string && source, std::string && text, Priority prio, const char * file, int line, std::string_view fmt_str);
std::string && source, std::string && text, Priority prio, const char * file, int line,
std::string_view fmt_str, std::vector<std::string> && fmt_str_args);
/// Creates a Message with the given source, text, priority,
/// source file path and line.
///
@ -161,6 +163,9 @@ public:
std::string_view getFormatString() const;
void setFormatString(std::string_view fmt_str);
const std::vector<std::string> & getFormatStringArgs() const;
void setFormatStringArgs(const std::vector<std::string> & fmt_str_args);
int getSourceLine() const;
/// Returns the source file line of the statement
/// generating the log message. May be 0
@ -210,6 +215,7 @@ private:
int _line;
StringMap * _pMap;
std::string_view _fmt_str;
std::vector<std::string> _fmt_str_args;
};

View File

@ -46,7 +46,9 @@ Message::Message(const std::string& source, const std::string& text, Priority pr
}
Message::Message(const std::string& source, const std::string& text, Priority prio, const char* file, int line, std::string_view fmt_str):
Message::Message(
const std::string& source, const std::string& text, Priority prio, const char* file, int line,
std::string_view fmt_str, const std::vector<std::string>& fmt_str_args):
_source(source),
_text(text),
_prio(prio),
@ -54,13 +56,16 @@ Message::Message(const std::string& source, const std::string& text, Priority pr
_file(file),
_line(line),
_pMap(0),
_fmt_str(fmt_str)
_fmt_str(fmt_str),
_fmt_str_args(fmt_str_args)
{
init();
}
Message::Message(std::string && source, std::string && text, Priority prio, const char * file, int line, std::string_view fmt_str):
Message::Message(
std::string && source, std::string && text, Priority prio, const char * file, int line,
std::string_view fmt_str, std::vector<std::string> && fmt_str_args):
_source(std::move(source)),
_text(std::move(text)),
_prio(prio),
@ -68,7 +73,8 @@ Message::Message(std::string && source, std::string && text, Priority prio, cons
_file(file),
_line(line),
_pMap(0),
_fmt_str(fmt_str)
_fmt_str(fmt_str),
_fmt_str_args(std::move(fmt_str_args))
{
init();
}
@ -83,7 +89,8 @@ Message::Message(const Message& msg):
_pid(msg._pid),
_file(msg._file),
_line(msg._line),
_fmt_str(msg._fmt_str)
_fmt_str(msg._fmt_str),
_fmt_str_args(msg._fmt_str_args)
{
if (msg._pMap)
_pMap = new StringMap(*msg._pMap);
@ -102,7 +109,8 @@ Message::Message(const Message& msg, const std::string& text):
_pid(msg._pid),
_file(msg._file),
_line(msg._line),
_fmt_str(msg._fmt_str)
_fmt_str(msg._fmt_str),
_fmt_str_args(msg._fmt_str_args)
{
if (msg._pMap)
_pMap = new StringMap(*msg._pMap);
@ -154,6 +162,7 @@ void Message::swap(Message& msg)
swap(_line, msg._line);
swap(_pMap, msg._pMap);
swap(_fmt_str, msg._fmt_str);
swap(_fmt_str_args, msg._fmt_str_args);
}
@ -227,6 +236,17 @@ void Message::setFormatString(std::string_view fmt_str)
}
const std::vector<std::string>& Message::getFormatStringArgs() const
{
return _fmt_str_args;
}
void Message::setFormatStringArgs(const std::vector<std::string>& fmt_str_args)
{
_fmt_str_args = fmt_str_args;
}
bool Message::has(const std::string& param) const
{
return _pMap && (_pMap->find(param) != _pMap->end());

View File

@ -8,9 +8,6 @@ option (SANITIZE "Enable one of the code sanitizers" "")
set (SAN_FLAGS "${SAN_FLAGS} -g -fno-omit-frame-pointer -DSANITIZER")
# It's possible to pass an ignore list to sanitizers (-fsanitize-ignorelist). Intentionally not doing this because
# 1. out-of-source suppressions are awkward 2. it seems ignore lists don't work after the Clang v16 upgrade (#49829)
if (SANITIZE)
if (SANITIZE STREQUAL "address")
set (ASAN_FLAGS "-fsanitize=address -fsanitize-address-use-after-scope")

2
contrib/curl vendored

@ -1 +1 @@
Subproject commit 1a05e833f8f7140628b27882b10525fd9ec4b873
Subproject commit de7b3e89218467159a7af72d58cea8425946e97d

View File

@ -33,14 +33,15 @@ set (SRCS
"${LIBRARY_DIR}/lib/curl_memrchr.c"
"${LIBRARY_DIR}/lib/curl_multibyte.c"
"${LIBRARY_DIR}/lib/curl_ntlm_core.c"
"${LIBRARY_DIR}/lib/curl_ntlm_wb.c"
"${LIBRARY_DIR}/lib/curl_path.c"
"${LIBRARY_DIR}/lib/curl_range.c"
"${LIBRARY_DIR}/lib/curl_rtmp.c"
"${LIBRARY_DIR}/lib/curl_sasl.c"
"${LIBRARY_DIR}/lib/curl_sha512_256.c"
"${LIBRARY_DIR}/lib/curl_sspi.c"
"${LIBRARY_DIR}/lib/curl_threads.c"
"${LIBRARY_DIR}/lib/curl_trc.c"
"${LIBRARY_DIR}/lib/cw-out.c"
"${LIBRARY_DIR}/lib/dict.c"
"${LIBRARY_DIR}/lib/doh.c"
"${LIBRARY_DIR}/lib/dynbuf.c"
@ -98,6 +99,7 @@ set (SRCS
"${LIBRARY_DIR}/lib/psl.c"
"${LIBRARY_DIR}/lib/rand.c"
"${LIBRARY_DIR}/lib/rename.c"
"${LIBRARY_DIR}/lib/request.c"
"${LIBRARY_DIR}/lib/rtsp.c"
"${LIBRARY_DIR}/lib/select.c"
"${LIBRARY_DIR}/lib/sendf.c"

View File

@ -38,6 +38,7 @@
#define HAVE_ARPA_INET_H
#define HAVE_ERRNO_H
#define HAVE_GETSOCKNAME
#define HAVE_FCNTL_H
#define HAVE_NETDB_H
#define HAVE_NETINET_IN_H

2
contrib/openssl vendored

@ -1 +1 @@
Subproject commit 417f9d2825799769708d99917d0465574c36f79a
Subproject commit f7b8721dfc66abb147f24ca07b9c9d1d64f40f71

View File

@ -93,6 +93,7 @@ enable_language(ASM)
if(COMPILER_CLANG)
add_definitions(-Wno-unused-command-line-argument)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=lld") # only relevant for -DENABLE_OPENSSL_DYNAMIC=1
endif()
if(ARCH_AMD64)
@ -960,11 +961,6 @@ set(CRYPTO_SRC
${OPENSSL_SOURCE_DIR}/crypto/x509/x_req.c
${OPENSSL_SOURCE_DIR}/crypto/x509/x_x509.c
${OPENSSL_SOURCE_DIR}/crypto/x509/x_x509a.c
${OPENSSL_SOURCE_DIR}/engines/e_capi.c
${OPENSSL_SOURCE_DIR}/engines/e_dasync.c
${OPENSSL_SOURCE_DIR}/engines/e_loader_attic.c
${OPENSSL_SOURCE_DIR}/engines/e_ossltest.c
${OPENSSL_SOURCE_DIR}/engines/e_padlock.c
${OPENSSL_SOURCE_DIR}/providers/baseprov.c
${OPENSSL_SOURCE_DIR}/providers/common/bio_prov.c
${OPENSSL_SOURCE_DIR}/providers/common/capabilities.c
@ -985,8 +981,6 @@ set(CRYPTO_SRC
${OPENSSL_SOURCE_DIR}/providers/common/securitycheck.c
${OPENSSL_SOURCE_DIR}/providers/common/securitycheck_default.c
${OPENSSL_SOURCE_DIR}/providers/defltprov.c
${OPENSSL_SOURCE_DIR}/providers/fips/fips_entry.c
${OPENSSL_SOURCE_DIR}/providers/fips/fipsprov.c
${OPENSSL_SOURCE_DIR}/providers/implementations/asymciphers/rsa_enc.c
${OPENSSL_SOURCE_DIR}/providers/implementations/asymciphers/sm2_enc.c
${OPENSSL_SOURCE_DIR}/providers/implementations/ciphers/cipher_aes.c
@ -1145,11 +1139,19 @@ set(CRYPTO_SRC
${OPENSSL_SOURCE_DIR}/providers/implementations/signature/sm2_sig.c
${OPENSSL_SOURCE_DIR}/providers/implementations/storemgmt/file_store.c
${OPENSSL_SOURCE_DIR}/providers/implementations/storemgmt/file_store_any2obj.c
${OPENSSL_SOURCE_DIR}/providers/legacyprov.c
${OPENSSL_SOURCE_DIR}/providers/nullprov.c
${OPENSSL_SOURCE_DIR}/providers/prov_running.c
${OPENSSL_SOURCE_DIR}/ssl/record/methods/tls_pad.c
${OPENSSL_SOURCE_DIR}/ssl/record/methods/ssl3_cbc.c
)
if(NOT ENABLE_OPENSSL_DYNAMIC)
set(CRYPTO_SRC ${CRYPTO_SRC}
${OPENSSL_SOURCE_DIR}/providers/fips/fips_entry.c
${OPENSSL_SOURCE_DIR}/providers/fips/fipsprov.c
)
endif()
if(ARCH_AMD64)
if (OS_DARWIN)
set(CRYPTO_SRC ${CRYPTO_SRC}
@ -1376,8 +1378,6 @@ set(SSL_SRC
${OPENSSL_SOURCE_DIR}/ssl/quic/uint_set.c
${OPENSSL_SOURCE_DIR}/ssl/record/rec_layer_d1.c
${OPENSSL_SOURCE_DIR}/ssl/record/rec_layer_s3.c
${OPENSSL_SOURCE_DIR}/ssl/record/methods/tls_pad.c
${OPENSSL_SOURCE_DIR}/ssl/record/methods/ssl3_cbc.c
${OPENSSL_SOURCE_DIR}/ssl/record/methods/dtls_meth.c
${OPENSSL_SOURCE_DIR}/ssl/record/methods/ssl3_meth.c
${OPENSSL_SOURCE_DIR}/ssl/record/methods/tls13_meth.c

View File

@ -14,11 +14,14 @@ RUN curl -o krb5-libs-1.10.3-65.el6.x86_64.rpm ftp://ftp.pbone.net/mirror/vault.
rpm -Uvh libkadm5-1.10.3-65.el6.x86_64.rpm libss-1.41.12-24.el6.x86_64.rpm krb5-libs-1.10.3-65.el6.x86_64.rpm krb5-workstation-1.10.3-65.el6.x86_64.rpm libcom_err-1.41.12-24.el6.x86_64.rpm && \
rm -fr *.rpm
ADD https://archive.apache.org/dist/commons/daemon/source/commons-daemon-1.0.15-src.tar.gz /tmp/commons-daemon-1.0.15-src.tar.gz
RUN cd /tmp && \
curl http://archive.apache.org/dist/commons/daemon/source/commons-daemon-1.0.15-src.tar.gz -o commons-daemon-1.0.15-src.tar.gz && \
tar xzf commons-daemon-1.0.15-src.tar.gz && \
cd commons-daemon-1.0.15-src/src/native/unix && \
./configure && \
make && \
cp ./jsvc /usr/local/hadoop-2.7.0/sbin && \
[ -e /usr/local/hadoop ] || ln -s ./hadoop-2.7.0 /usr/local/hadoop
cd /tmp && \
rm -rf commons-daemon-1.0.15-src* && \
{ [ -e /usr/local/hadoop ] || ln -s ./hadoop-2.7.0 /usr/local/hadoop; }

View File

@ -19,7 +19,7 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
# install test configs
/usr/share/clickhouse-test/config/install.sh
azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --silent --inMemoryPersistence &
./setup_minio.sh stateful
config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml
@ -87,7 +87,7 @@ function start()
tail -n1000 /var/log/clickhouse-server/clickhouse-server.log
break
fi
timeout 120 service clickhouse-server start
timeout 120 sudo -E -u clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml --daemon --pid-file /var/run/clickhouse-server/clickhouse-server.pid
sleep 0.5
counter=$((counter + 1))
done

View File

@ -42,14 +42,6 @@ source /utils.lib
# install test configs
/usr/share/clickhouse-test/config/install.sh
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
echo "Azure is disabled"
elif [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then
echo "Azure is disabled"
else
azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
fi
./setup_minio.sh stateless
./setup_hdfs_minicluster.sh
@ -99,12 +91,11 @@ if [ "$NUM_TRIES" -gt "1" ]; then
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US_MAX=10000
mkdir -p /var/run/clickhouse-server
# simplest way to forward env variables to server
sudo -E -u clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml --daemon --pid-file /var/run/clickhouse-server/clickhouse-server.pid
else
sudo clickhouse start
fi
# simplest way to forward env variables to server
sudo -E -u clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml --daemon --pid-file /var/run/clickhouse-server/clickhouse-server.pid
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
sudo sed -i "s|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches/</filesystem_caches_path>|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches_1/</filesystem_caches_path>|" /etc/clickhouse-server1/config.d/filesystem_caches_path.xml
@ -214,6 +205,14 @@ function run_tests()
ADDITIONAL_OPTIONS+=('--s3-storage')
fi
if [[ -n "$USE_AZURE_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then
# to disable the same tests
ADDITIONAL_OPTIONS+=('--s3-storage')
# azurite is slow, but with these two settings it can be super slow
ADDITIONAL_OPTIONS+=('--no-random-settings')
ADDITIONAL_OPTIONS+=('--no-random-merge-tree-settings')
fi
if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then
ADDITIONAL_OPTIONS+=('--shared-catalog')
fi
@ -288,7 +287,7 @@ stop_logs_replication
failed_to_save_logs=0
for table in query_log zookeeper_log trace_log transactions_info_log metric_log
do
err=$( { clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst; } 2>&1 )
err=$(clickhouse-client -q "select * from system.$table into outfile '/test_output/$table.tsv.gz' format TSVWithNamesAndTypes")
echo "$err"
[[ "0" != "${#err}" ]] && failed_to_save_logs=1
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then

View File

@ -279,7 +279,7 @@ function check_logs_for_critical_errors()
function collect_query_and_trace_logs()
{
for table in query_log trace_log
for table in query_log trace_log metric_log
do
clickhouse-local --config-file=/etc/clickhouse-server/config.xml --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||:
done

View File

@ -52,7 +52,6 @@ export ZOOKEEPER_FAULT_INJECTION=1
# available for dump via clickhouse-local
configure
azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
./setup_minio.sh stateless # to have a proper environment
config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml

View File

@ -0,0 +1,37 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v23.8.13.25-lts (37e034f903e) FIXME as compared to v23.8.12.13-lts (bdbd0d87e5d)
#### Improvement
* Backported in [#61930](https://github.com/ClickHouse/ClickHouse/issues/61930): Fixed accounting of memory allocated before attaching thread to a query or a user. [#56089](https://github.com/ClickHouse/ClickHouse/pull/56089) ([Nikita Taranov](https://github.com/nickitat)).
#### Build/Testing/Packaging Improvement
* Backported in [#62007](https://github.com/ClickHouse/ClickHouse/issues/62007): Remove from the Keeper Docker image the volumes at /etc/clickhouse-keeper and /var/log/clickhouse-keeper. [#61683](https://github.com/ClickHouse/ClickHouse/pull/61683) ([Tristan](https://github.com/Tristan971)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix REPLACE/MOVE PARTITION with zero-copy replication [#54193](https://github.com/ClickHouse/ClickHouse/pull/54193) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix ATTACH query with external ON CLUSTER [#61365](https://github.com/ClickHouse/ClickHouse/pull/61365) ([Nikolay Degterinsky](https://github.com/evillique)).
* Cancel merges before removing moved parts [#61610](https://github.com/ClickHouse/ClickHouse/pull/61610) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Mark CANNOT_PARSE_ESCAPE_SEQUENCE error as parse error to be able to skip it in row input formats [#61883](https://github.com/ClickHouse/ClickHouse/pull/61883) ([Kruglov Pavel](https://github.com/Avogar)).
* Try to fix segfault in Hive engine [#62578](https://github.com/ClickHouse/ClickHouse/pull/62578) ([Nikolay Degterinsky](https://github.com/evillique)).
#### CI Fix or Improvement (changelog entry is not required)
* Backported in [#62201](https://github.com/ClickHouse/ClickHouse/issues/62201):. [#62190](https://github.com/ClickHouse/ClickHouse/pull/62190) ([Konstantin Bogdanov](https://github.com/thevar1able)).
* Backported in [#62796](https://github.com/ClickHouse/ClickHouse/issues/62796): We won't fail the job when GH fails to retrieve the job ID and URLs. [#62651](https://github.com/ClickHouse/ClickHouse/pull/62651) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#62968](https://github.com/ClickHouse/ClickHouse/issues/62968):. [#62932](https://github.com/ClickHouse/ClickHouse/pull/62932) ([Robert Schulze](https://github.com/rschu1ze)).
#### NO CL CATEGORY
* Backported in [#62585](https://github.com/ClickHouse/ClickHouse/issues/62585):. [#60078](https://github.com/ClickHouse/ClickHouse/pull/60078) ([Maksim Kita](https://github.com/kitaisreal)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Throw on query timeout in ZooKeeperRetries [#60922](https://github.com/ClickHouse/ClickHouse/pull/60922) ([Antonio Andelic](https://github.com/antonio2368)).

View File

@ -4,7 +4,7 @@ sidebar_position: 30
sidebar_label: Replicated
---
# [experimental] Replicated
# Replicated
The engine is based on the [Atomic](../../engines/database-engines/atomic.md) engine. It supports replication of metadata via DDL log being written to ZooKeeper and executed on all of the replicas for a given database.

View File

@ -8,6 +8,8 @@ sidebar_label: HDFS
This engine provides integration with the [Apache Hadoop](https://en.wikipedia.org/wiki/Apache_Hadoop) ecosystem by allowing to manage data on [HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html) via ClickHouse. This engine is similar to the [File](../../../engines/table-engines/special/file.md#table_engines-file) and [URL](../../../engines/table-engines/special/url.md#table_engines-url) engines, but provides Hadoop-specific features.
This feature is not supported by ClickHouse engineers, and it is known to have a sketchy quality. In case of any problems, fix them yourself and submit a pull request.
## Usage {#usage}
``` sql

View File

@ -1,19 +1,19 @@
---
slug: /en/engines/table-engines/mergetree-family/invertedindexes
sidebar_label: Inverted Indexes
sidebar_label: Full-text Indexes
description: Quickly find search terms in text.
keywords: [full-text search, text search, inverted, index, indices]
---
# Full-text Search using Inverted Indexes [experimental]
# Full-text Search using Full-text Indexes [experimental]
Inverted indexes are an experimental type of [secondary indexes](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#available-types-of-indices) which provide fast text search
Full-text indexes are an experimental type of [secondary indexes](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#available-types-of-indices) which provide fast text search
capabilities for [String](/docs/en/sql-reference/data-types/string.md) or [FixedString](/docs/en/sql-reference/data-types/fixedstring.md)
columns. The main idea of an inverted index is to store a mapping from "terms" to the rows which contain these terms. "Terms" are
columns. The main idea of a full-text index is to store a mapping from "terms" to the rows which contain these terms. "Terms" are
tokenized cells of the string column. For example, the string cell "I will be a little late" is by default tokenized into six terms "I", "will",
"be", "a", "little" and "late". Another kind of tokenizer is n-grams. For example, the result of 3-gram tokenization will be 21 terms "I w",
" wi", "wil", "ill", "ll ", "l b", " be" etc. The more fine-granular the input strings are tokenized, the bigger but also the more
useful the resulting inverted index will be.
useful the resulting full-text index will be.
<div class='vimeo-container'>
<iframe src="//www.youtube.com/embed/O_MnyUkrIq8"
@ -28,26 +28,26 @@ useful the resulting inverted index will be.
</div>
:::note
Inverted indexes are experimental and should not be used in production environments yet. They may change in the future in backward-incompatible
Full-text indexes are experimental and should not be used in production environments yet. They may change in the future in backward-incompatible
ways, for example with respect to their DDL/DQL syntax or performance/compression characteristics.
:::
## Usage
To use inverted indexes, first enable them in the configuration:
To use full-text indexes, first enable them in the configuration:
```sql
SET allow_experimental_inverted_index = true;
```
An inverted index can be defined on a string column using the following syntax
An full-text index can be defined on a string column using the following syntax
``` sql
CREATE TABLE tab
(
`key` UInt64,
`str` String,
INDEX inv_idx(str) TYPE inverted(0) GRANULARITY 1
INDEX inv_idx(str) TYPE full_text(0) GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY key
@ -55,20 +55,20 @@ ORDER BY key
where `N` specifies the tokenizer:
- `inverted(0)` (or shorter: `inverted()`) set the tokenizer to "tokens", i.e. split strings along spaces,
- `inverted(N)` with `N` between 2 and 8 sets the tokenizer to "ngrams(N)"
- `full_text(0)` (or shorter: `full_text()`) set the tokenizer to "tokens", i.e. split strings along spaces,
- `full_text(N)` with `N` between 2 and 8 sets the tokenizer to "ngrams(N)"
The maximum rows per postings list can be specified as the second parameter. This parameter can be used to control postings list sizes to avoid generating huge postings list files. The following variants exist:
- `inverted(ngrams, max_rows_per_postings_list)`: Use given max_rows_per_postings_list (assuming it is not 0)
- `inverted(ngrams, 0)`: No limitation of maximum rows per postings list
- `inverted(ngrams)`: Use a default maximum rows which is 64K.
- `full_text(ngrams, max_rows_per_postings_list)`: Use given max_rows_per_postings_list (assuming it is not 0)
- `full_text(ngrams, 0)`: No limitation of maximum rows per postings list
- `full_text(ngrams)`: Use a default maximum rows which is 64K.
Being a type of skipping index, inverted indexes can be dropped or added to a column after table creation:
Being a type of skipping index, full-text indexes can be dropped or added to a column after table creation:
``` sql
ALTER TABLE tab DROP INDEX inv_idx;
ALTER TABLE tab ADD INDEX inv_idx(s) TYPE inverted(2);
ALTER TABLE tab ADD INDEX inv_idx(s) TYPE full_text(2);
```
To use the index, no special functions or syntax are required. Typical string search predicates automatically leverage the index. As
@ -83,9 +83,9 @@ SELECT * from tab WHERE multiSearchAny(str, ['Hello', 'World']);
SELECT * from tab WHERE hasToken(str, 'Hello');
```
The inverted index also works on columns of type `Array(String)`, `Array(FixedString)`, `Map(String)` and `Map(String)`.
The full-text index also works on columns of type `Array(String)`, `Array(FixedString)`, `Map(String)` and `Map(String)`.
Like for other secondary indices, each column part has its own inverted index. Furthermore, each inverted index is internally divided into
Like for other secondary indices, each column part has its own full-text index. Furthermore, each full-text index is internally divided into
"segments". The existence and size of the segments are generally transparent to users but the segment size determines the memory consumption
during index construction (e.g. when two parts are merged). Configuration parameter "max_digestion_size_per_segment" (default: 256 MB)
controls the amount of data read consumed from the underlying column before a new segment is created. Incrementing the parameter raises the
@ -94,7 +94,7 @@ average to evaluate a query.
## Full-text search of the Hacker News dataset
Let's look at the performance improvements of inverted indexes on a large dataset with lots of text. We will use 28.7M rows of comments on the popular Hacker News website. Here is the table without an inverted index:
Let's look at the performance improvements of full-text indexes on a large dataset with lots of text. We will use 28.7M rows of comments on the popular Hacker News website. Here is the table without an full-text index:
```sql
CREATE TABLE hackernews (
@ -162,11 +162,11 @@ Notice it takes 3 seconds to execute the query:
1 row in set. Elapsed: 3.001 sec. Processed 28.74 million rows, 9.75 GB (9.58 million rows/s., 3.25 GB/s.)
```
We will use `ALTER TABLE` and add an inverted index on the lowercase of the `comment` column, then materialize it (which can take a while - wait for it to materialize):
We will use `ALTER TABLE` and add an full-text index on the lowercase of the `comment` column, then materialize it (which can take a while - wait for it to materialize):
```sql
ALTER TABLE hackernews
ADD INDEX comment_lowercase(lower(comment)) TYPE inverted;
ADD INDEX comment_lowercase(lower(comment)) TYPE full_text;
ALTER TABLE hackernews MATERIALIZE INDEX comment_lowercase;
```
@ -204,9 +204,9 @@ WHERE hasToken(lower(comment), 'avx') AND hasToken(lower(comment), 'sve');
```
:::note
Unlike other secondary indices, inverted indexes (for now) map to row numbers (row ids) instead of granule ids. The reason for this design
Unlike other secondary indices, full-text indexes (for now) map to row numbers (row ids) instead of granule ids. The reason for this design
is performance. In practice, users often search for multiple terms at once. For example, filter predicate `WHERE s LIKE '%little%' OR s LIKE
'%big%'` can be evaluated directly using an inverted index by forming the union of the row id lists for terms "little" and "big". This also
'%big%'` can be evaluated directly using an full-text index by forming the union of the row id lists for terms "little" and "big". This also
means that the parameter `GRANULARITY` supplied to index creation has no meaning (it may be removed from the syntax in the future).
:::

View File

@ -287,9 +287,9 @@ The number of columns in the primary key is not explicitly limited. Depending on
A long primary key will negatively affect the insert performance and memory consumption, but extra columns in the primary key do not affect ClickHouse performance during `SELECT` queries.
You can create a table without a primary key using the `ORDER BY tuple()` syntax. In this case, ClickHouse stores data in the order of inserting. If you want to save data order when inserting data by `INSERT ... SELECT` queries, set [max_insert_threads = 1](/docs/en/operations/settings/settings.md/#settings-max-insert-threads).
You can create a table without a primary key using the `ORDER BY tuple()` syntax. In this case, ClickHouse stores data in the order of inserting. If you want to save data order when inserting data by `INSERT ... SELECT` queries, set [max_insert_threads = 1](/docs/en/operations/settings/settings.md/#max-insert-threads).
To select data in the initial order, use [single-threaded](/docs/en/operations/settings/settings.md/#settings-max_threads) `SELECT` queries.
To select data in the initial order, use [single-threaded](/docs/en/operations/settings/settings.md/#max_threads) `SELECT` queries.
### Choosing a Primary Key that Differs from the Sorting Key {#choosing-a-primary-key-that-differs-from-the-sorting-key}
@ -344,7 +344,7 @@ In the example below, the index cant be used.
SELECT count() FROM table WHERE CounterID = 34 OR URL LIKE '%upyachka%'
```
To check whether ClickHouse can use the index when running a query, use the settings [force_index_by_date](/docs/en/operations/settings/settings.md/#settings-force_index_by_date) and [force_primary_key](/docs/en/operations/settings/settings.md/#force-primary-key).
To check whether ClickHouse can use the index when running a query, use the settings [force_index_by_date](/docs/en/operations/settings/settings.md/#force_index_by_date) and [force_primary_key](/docs/en/operations/settings/settings.md/#force-primary-key).
The key for partitioning by month allows reading only those data blocks which contain dates from the proper range. In this case, the data block may contain data for many dates (up to an entire month). Within a block, data is sorted by primary key, which might not contain the date as the first column. Because of this, using a query with only a date condition that does not specify the primary key prefix will cause more data to be read than for a single date.
@ -769,6 +769,7 @@ In addition to local block devices, ClickHouse supports these storage types:
- [`web` for read-only from web](#web-storage)
- [`cache` for local caching](/docs/en/operations/storing-data.md/#using-local-cache)
- [`s3_plain` for backups to S3](/docs/en/operations/backup#backuprestore-using-an-s3-disk)
- [`s3_plain_rewritable` for immutable, non-replicated tables in S3](/docs/en/operations/storing-data.md#s3-plain-rewritable-storage)
## Using Multiple Block Devices for Data Storage {#table_engine-mergetree-multiple-volumes}

View File

@ -113,7 +113,7 @@ You can specify any existing ZooKeeper cluster and the system will use a directo
If ZooKeeper is not set in the config file, you cant create replicated tables, and any existing replicated tables will be read-only.
ZooKeeper is not used in `SELECT` queries because replication does not affect the performance of `SELECT` and queries run just as fast as they do for non-replicated tables. When querying distributed replicated tables, ClickHouse behavior is controlled by the settings [max_replica_delay_for_distributed_queries](/docs/en/operations/settings/settings.md/#settings-max_replica_delay_for_distributed_queries) and [fallback_to_stale_replicas_for_distributed_queries](/docs/en/operations/settings/settings.md/#settings-fallback_to_stale_replicas_for_distributed_queries).
ZooKeeper is not used in `SELECT` queries because replication does not affect the performance of `SELECT` and queries run just as fast as they do for non-replicated tables. When querying distributed replicated tables, ClickHouse behavior is controlled by the settings [max_replica_delay_for_distributed_queries](/docs/en/operations/settings/settings.md/#max_replica_delay_for_distributed_queries) and [fallback_to_stale_replicas_for_distributed_queries](/docs/en/operations/settings/settings.md/#fallback_to_stale_replicas_for_distributed_queries).
For each `INSERT` query, approximately ten entries are added to ZooKeeper through several transactions. (To be more precise, this is for each inserted block of data; an INSERT query contains one block or one block per `max_insert_block_size = 1048576` rows.) This leads to slightly longer latencies for `INSERT` compared to non-replicated tables. But if you follow the recommendations to insert data in batches of no more than one `INSERT` per second, it does not create any problems. The entire ClickHouse cluster used for coordinating one ZooKeeper cluster has a total of several hundred `INSERTs` per second. The throughput on data inserts (the number of rows per second) is just as high as for non-replicated data.
@ -304,10 +304,10 @@ We use the term `MergeTree` to refer to all table engines in the `MergeTree fami
If you had a `MergeTree` table that was manually replicated, you can convert it to a replicated table. You might need to do this if you have already collected a large amount of data in a `MergeTree` table and now you want to enable replication.
`MergeTree` table can be automatically converted on server restart if `convert_to_replicated` flag is set at the table's data directory (`/var/lib/clickhouse/store/xxx/xxxyyyyy-yyyy-yyyy-yyyy-yyyyyyyyyyyy/` for `Atomic` database).
`MergeTree` table can be automatically converted on server restart if `convert_to_replicated` flag is set at the table's data directory (`/store/xxx/xxxyyyyy-yyyy-yyyy-yyyy-yyyyyyyyyyyy/` for `Atomic` database).
Create empty `convert_to_replicated` file and the table will be loaded as replicated on next server restart.
This query can be used to get the table's data path.
This query can be used to get the table's data path. If table has many data paths, you have to use the first one.
```sql
SELECT data_paths FROM system.tables WHERE table = 'table_name' AND database = 'database_name';

View File

@ -83,7 +83,7 @@ When creating a table, the following settings are applied:
#### join_any_take_last_row
[join_any_take_last_row](/docs/en/operations/settings/settings.md/#settings-join_any_take_last_row)
[join_any_take_last_row](/docs/en/operations/settings/settings.md/#join_any_take_last_row)
#### join_use_nulls
#### persistent

View File

@ -10,7 +10,8 @@ The RecipeNLG dataset is available for download [here](https://recipenlg.cs.put.
1. Go to the download page [https://recipenlg.cs.put.poznan.pl/dataset](https://recipenlg.cs.put.poznan.pl/dataset).
1. Accept Terms and Conditions and download zip file.
1. Unpack the zip file with `unzip`. You will get the `full_dataset.csv` file.
1. Option: Using the `md5sum dataset.zip` to validate the zip file and it should be equal to `3a168dfd0912bb034225619b3586ce76`.
1. Unpack the zip file with `unzip dataset.zip`. You will get the `full_dataset.csv` file in the `dataset` directory.
## Create a Table
@ -72,7 +73,7 @@ Result:
``` text
┌─count()─┐
│ 2231141
│ 2231142
└─────────┘
```
@ -115,7 +116,7 @@ Result:
│ egg │ 160507 │
│ baking powder │ 148277 │
│ lemon juice │ 146414 │
│ Salt │ 122557
│ Salt │ 122558
│ cinnamon │ 117927 │
│ sour cream │ 116682 │
│ cream cheese │ 114423 │

View File

@ -327,7 +327,9 @@ Use buffering to avoid situations where a query processing error occurred after
## Setting a role with query parameters {#setting-role-with-query-parameters}
In certain scenarios, it might be required to set the granted role first, before executing the statement itself.
This is a new feature added in ClickHouse 24.4.
In specific scenarios, setting the granted role first might be required before executing the statement itself.
However, it is not possible to send `SET ROLE` and the statement together, as multi-statements are not allowed:
```
@ -346,7 +348,7 @@ To overcome this limitation, you could use the `role` query parameter instead:
curl -sS "http://localhost:8123?role=my_role" --data-binary "SELECT * FROM my_table;"
```
This will be an equivalent of executing `SET ROLE my_role` before the statement.
This will be the equivalent of executing `SET ROLE my_role` before the statement.
Additionally, it is possible to specify multiple `role` query parameters:

View File

@ -172,7 +172,7 @@ Features:
### ClickVisual {#clickvisual}
[ClickVisual](https://clickvisual.gocn.vip/) ClickVisual is a lightweight open source log query, analysis and alarm visualization platform.
[ClickVisual](https://clickvisual.net/) ClickVisual is a lightweight open source log query, analysis and alarm visualization platform.
Features:

View File

@ -76,7 +76,7 @@ ClickHouse, Inc. does **not** maintain the tools and libraries listed below and
- [clickhouse-maxmind-geoip](https://github.com/AlexeyKupershtokh/clickhouse-maxmind-geoip)
- AutoML
- [MindsDB](https://mindsdb.com/)
- [MindsDB](https://github.com/mindsdb/mindsdb) - Predictive AI layer for ClickHouse database.
- [MindsDB](https://github.com/mindsdb/mindsdb) - Integrates with ClickHouse, making data from ClickHouse accessible to a diverse range of AI/ML models.
## Programming Language Ecosystems {#programming-language-ecosystems}

View File

@ -7,6 +7,8 @@ toc_max_heading_level: 2
# Core Settings
All below settings are also available in table [system.settings](/docs/en/operations/system-tables/settings).
## additional_table_filters
An additional filter expression that is applied after reading
@ -3931,19 +3933,6 @@ For example, `avg(if(cond, col, null))` can be rewritten to `avgOrNullIf(cond, c
Supported only with experimental analyzer (`allow_experimental_analyzer = 1`).
:::
## allow_experimental_database_replicated {#allow_experimental_database_replicated}
Enables to create databases with [Replicated](../../engines/database-engines/replicated.md) engine.
Possible values:
- 0 — Disabled.
- 1 — Enabled.
Default value: `0`.
Cloud default value: `1`.
## database_replicated_initial_query_timeout_sec {#database_replicated_initial_query_timeout_sec}
Sets how long initial DDL query should wait for Replicated database to process previous DDL queue entries in seconds.
@ -4384,17 +4373,6 @@ Possible values:
Default value: `ignore`.
## first_day_of_week
The first day of the week assumed by [`toStartOfInterval`](../../sql-reference/functions/date-time-functions.md#toStartOfInterval) function when using weeks as unit.
Possible values:
- Monday - Week starts on Monday
- Sunday - Week starts on Sunday
Default value: 'Monday'.
## optimize_move_to_prewhere {#optimize_move_to_prewhere}
Enables or disables automatic [PREWHERE](../../sql-reference/statements/select/prewhere.md) optimization in [SELECT](../../sql-reference/statements/select/index.md) queries.

View File

@ -28,7 +28,7 @@ Starting from 24.1 clickhouse version, it is possible to use a new configuration
It requires to specify:
1. `type` equal to `object_storage`
2. `object_storage_type`, equal to one of `s3`, `azure_blob_storage` (or just `azure` from `24.3`), `hdfs`, `local_blob_storage` (or just `local` from `24.3`), `web`.
Optionally, `metadata_type` can be specified (it is equal to `local` by default), but it can also be set to `plain`, `web`.
Optionally, `metadata_type` can be specified (it is equal to `local` by default), but it can also be set to `plain`, `web` and, starting from `24.4`, `plain_rewritable`.
Usage of `plain` metadata type is described in [plain storage section](/docs/en/operations/storing-data.md/#storing-data-on-webserver), `web` metadata type can be used only with `web` object storage type, `local` metadata type stores metadata files locally (each metadata files contains mapping to files in object storage and some additional meta information about them).
E.g. configuration option
@ -341,6 +341,36 @@ Configuration:
</s3_plain>
```
### Using S3 Plain Rewritable Storage {#s3-plain-rewritable-storage}
A new disk type `s3_plain_rewritable` was introduced in `24.4`.
Similar to the `s3_plain` disk type, it does not require additional storage for metadata files; instead, metadata is stored in S3.
Unlike `s3_plain` disk type, `s3_plain_rewritable` allows executing merges and supports INSERT operations.
[Mutations](/docs/en/sql-reference/statements/alter#mutations) and replication of tables are not supported.
A use case for this disk type are non-replicated `MergeTree` tables. Although the `s3` disk type is suitable for non-replicated
MergeTree tables, you may opt for the `s3_plain_rewritable` disk type if you do not require local metadata for the table and are
willing to accept a limited set of operations. This could be useful, for example, for system tables.
Configuration:
``` xml
<s3_plain_rewritable>
<type>s3_plain_rewritable</type>
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
<use_environment_credentials>1</use_environment_credentials>
</s3_plain_rewritable>
```
is equal to
``` xml
<s3_plain_rewritable>
<type>object_storage</type>
<object_storage_type>s3</object_storage_type>
<metadata_type>plain_rewritable</metadata_type>
<endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
<use_environment_credentials>1</use_environment_credentials>
</s3_plain_rewritable>
```
### Using Azure Blob Storage {#azure-blob-storage}
`MergeTree` family table engines can store data to [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) using a disk with type `azure_blob_storage`.

View File

@ -30,6 +30,16 @@ Columns:
- `source_file` (LowCardinality(String)) — Source file from which the logging was done.
- `source_line` (UInt64) — Source line from which the logging was done.
- `message_format_string` (LowCardinality(String)) — A format string that was used to format the message.
- `value1` (String) - Argument 1 that was used to format the message.
- `value2` (String) - Argument 2 that was used to format the message.
- `value3` (String) - Argument 3 that was used to format the message.
- `value4` (String) - Argument 4 that was used to format the message.
- `value5` (String) - Argument 5 that was used to format the message.
- `value6` (String) - Argument 6 that was used to format the message.
- `value7` (String) - Argument 7 that was used to format the message.
- `value8` (String) - Argument 8 that was used to format the message.
- `value9` (String) - Argument 9 that was used to format the message.
- `value10` (String) - Argument 10 that was used to format the message.
**Example**
@ -55,4 +65,14 @@ revision: 54440
source_file: /ClickHouse/src/Interpreters/DNSCacheUpdater.cpp; void DB::DNSCacheUpdater::start()
source_line: 45
message_format_string: Update period {} seconds
value1: 15
value2:
value3:
value4:
value5:
value6:
value7:
value8:
value9:
value10:
```

View File

@ -111,7 +111,7 @@ On newer Linux kernels transparent huge pages are alright.
$ echo 'madvise' | sudo tee /sys/kernel/mm/transparent_hugepage/enabled
```
If you want to modify the transparent huge pages setting permanently, editing the `/etc/default/grub` to add the `transparent_hugepage=never` to the `GRUB_CMDLINE_LINUX_DEFAULT` option:
If you want to modify the transparent huge pages setting permanently, editing the `/etc/default/grub` to add the `transparent_hugepage=madvise` to the `GRUB_CMDLINE_LINUX_DEFAULT` option:
```bash
$ GRUB_CMDLINE_LINUX_DEFAULT="transparent_hugepage=madvise ..."

View File

@ -505,9 +505,117 @@ HAVING uniqUpTo(4)(UserID) >= 5
`uniqUpTo(4)(UserID)` calculates the number of unique `UserID` values for each `SearchPhrase`, but it only counts up to 4 unique values. If there are more than 4 unique `UserID` values for a `SearchPhrase`, the function returns 5 (4 + 1). The `HAVING` clause then filters out the `SearchPhrase` values for which the number of unique `UserID` values is less than 5. This will give you a list of search keywords that were used by at least 5 unique users.
## sumMapFiltered(keys_to_keep)(keys, values)
## sumMapFiltered
Same behavior as [sumMap](../../sql-reference/aggregate-functions/reference/summap.md#agg_functions-summap) except that an array of keys is passed as a parameter. This can be especially useful when working with a high cardinality of keys.
This function behaves the same as [sumMap](../../sql-reference/aggregate-functions/reference/summap.md#agg_functions-summap) except that it also accepts an array of keys to filter with as a parameter. This can be especially useful when working with a high cardinality of keys.
**Syntax**
`sumMapFiltered(keys_to_keep)(keys, values)`
**Parameters**
- `keys_to_keep`: [Array](../data-types/array.md) of keys to filter with.
- `keys`: [Array](../data-types/array.md) of keys.
- `values`: [Array](../data-types/array.md) of values.
**Returned Value**
- Returns a tuple of two arrays: keys in sorted order, and values summed for the corresponding keys.
**Example**
Query:
```sql
CREATE TABLE sum_map
(
`date` Date,
`timeslot` DateTime,
`statusMap` Nested(status UInt16, requests UInt64)
)
ENGINE = Log
INSERT INTO sum_map VALUES
('2000-01-01', '2000-01-01 00:00:00', [1, 2, 3], [10, 10, 10]),
('2000-01-01', '2000-01-01 00:00:00', [3, 4, 5], [10, 10, 10]),
('2000-01-01', '2000-01-01 00:01:00', [4, 5, 6], [10, 10, 10]),
('2000-01-01', '2000-01-01 00:01:00', [6, 7, 8], [10, 10, 10]);
```
```sql
SELECT sumMapFiltered([1, 4, 8])(statusMap.status, statusMap.requests) FROM sum_map;
```
Result:
```response
┌─sumMapFiltered([1, 4, 8])(statusMap.status, statusMap.requests)─┐
1. │ ([1,4,8],[10,20,10]) │
└─────────────────────────────────────────────────────────────────┘
```
## sumMapFilteredWithOverflow
This function behaves the same as [sumMap](../../sql-reference/aggregate-functions/reference/summap.md#agg_functions-summap) except that it also accepts an array of keys to filter with as a parameter. This can be especially useful when working with a high cardinality of keys. It differs from the [sumMapFiltered](#summapfiltered) function in that it does summation with overflow - i.e. returns the same data type for the summation as the argument data type.
**Syntax**
`sumMapFilteredWithOverflow(keys_to_keep)(keys, values)`
**Parameters**
- `keys_to_keep`: [Array](../data-types/array.md) of keys to filter with.
- `keys`: [Array](../data-types/array.md) of keys.
- `values`: [Array](../data-types/array.md) of values.
**Returned Value**
- Returns a tuple of two arrays: keys in sorted order, and values summed for the corresponding keys.
**Example**
In this example we create a table `sum_map`, insert some data into it and then use both `sumMapFilteredWithOverflow` and `sumMapFiltered` and the `toTypeName` function for comparison of the result. Where `requests` was of type `UInt8` in the created table, `sumMapFiltered` has promoted the type of the summed values to `UInt64` to avoid overflow whereas `sumMapFilteredWithOverflow` has kept the type as `UInt8` which is not large enough to store the result - i.e. overflow has occurred.
Query:
```sql
CREATE TABLE sum_map
(
`date` Date,
`timeslot` DateTime,
`statusMap` Nested(status UInt8, requests UInt8)
)
ENGINE = Log
INSERT INTO sum_map VALUES
('2000-01-01', '2000-01-01 00:00:00', [1, 2, 3], [10, 10, 10]),
('2000-01-01', '2000-01-01 00:00:00', [3, 4, 5], [10, 10, 10]),
('2000-01-01', '2000-01-01 00:01:00', [4, 5, 6], [10, 10, 10]),
('2000-01-01', '2000-01-01 00:01:00', [6, 7, 8], [10, 10, 10]);
```
```sql
SELECT sumMapFilteredWithOverflow([1, 4, 8])(statusMap.status, statusMap.requests) as summap_overflow, toTypeName(summap_overflow) FROM sum_map;
```
```sql
SELECT sumMapFiltered([1, 4, 8])(statusMap.status, statusMap.requests) as summap, toTypeName(summap) FROM sum_map;
```
Result:
```response
┌─sum──────────────────┬─toTypeName(sum)───────────────────┐
1. │ ([1,4,8],[10,20,10]) │ Tuple(Array(UInt8), Array(UInt8)) │
└──────────────────────┴───────────────────────────────────┘
```
```response
┌─summap───────────────┬─toTypeName(summap)─────────────────┐
1. │ ([1,4,8],[10,20,10]) │ Tuple(Array(UInt8), Array(UInt64)) │
└──────────────────────┴────────────────────────────────────┘
```
## sequenceNextNode

View File

@ -16,7 +16,9 @@ Standard aggregate functions:
- [avg](/docs/en/sql-reference/aggregate-functions/reference/avg.md)
- [any](/docs/en/sql-reference/aggregate-functions/reference/any.md)
- [stddevPop](/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md)
- [stddevPopStable](/docs/en/sql-reference/aggregate-functions/reference/stddevpopstable.md)
- [stddevSamp](/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md)
- [stddevSampStable](/docs/en/sql-reference/aggregate-functions/reference/stddevsampstable.md)
- [varPop](/docs/en/sql-reference/aggregate-functions/reference/varpop.md)
- [varSamp](/docs/en/sql-reference/aggregate-functions/reference/varsamp.md)
- [corr](./corr.md)
@ -65,6 +67,9 @@ ClickHouse-specific aggregate functions:
- [groupBitmapXor](/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md)
- [sumWithOverflow](/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md)
- [sumMap](/docs/en/sql-reference/aggregate-functions/reference/summap.md)
- [sumMapWithOverflow](/docs/en/sql-reference/aggregate-functions/reference/summapwithoverflow.md)
- [sumMapFiltered](/docs/en/sql-reference/aggregate-functions/parametric-functions.md/#summapfiltered)
- [sumMapFilteredWithOverflow](/docs/en/sql-reference/aggregate-functions/parametric-functions.md/#summapfilteredwithoverflow)
- [minMap](/docs/en/sql-reference/aggregate-functions/reference/minmap.md)
- [maxMap](/docs/en/sql-reference/aggregate-functions/reference/maxmap.md)
- [skewSamp](/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md)

View File

@ -7,10 +7,50 @@ sidebar_position: 30
The result is equal to the square root of [varPop](../../../sql-reference/aggregate-functions/reference/varpop.md).
Alias:
- `STD`
- `STDDEV_POP`
Aliases: `STD`, `STDDEV_POP`.
:::note
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `stddevPopStable` function. It works slower but provides a lower computational error.
:::
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the [`stddevPopStable`](../reference/stddevpopstable.md) function. It works slower but provides a lower computational error.
:::
**Syntax**
```sql
stddevPop(x)
```
**Parameters**
- `x`: Population of values to find the standard deviation of. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal*](../../data-types/decimal.md).
**Returned value**
Square root of standard deviation of `x`. [Float64](../../data-types/float.md).
**Example**
Query:
```sql
DROP TABLE IF EXISTS test_data;
CREATE TABLE test_data
(
population UInt8,
)
ENGINE = Log;
INSERT INTO test_data VALUES (3),(3),(3),(4),(4),(5),(5),(7),(11),(15);
SELECT
stddevPop(population) AS stddev
FROM test_data;
```
Result:
```response
┌────────────stddev─┐
│ 3.794733192202055 │
└───────────────────┘
```

View File

@ -0,0 +1,49 @@
---
slug: /en/sql-reference/aggregate-functions/reference/stddevpopstable
sidebar_position: 30
---
# stddevPopStable
The result is equal to the square root of [varPop](../../../sql-reference/aggregate-functions/reference/varpop.md). Unlike [`stddevPop`](../reference/stddevpop.md), this function uses a numerically stable algorithm. It works slower but provides a lower computational error.
**Syntax**
```sql
stddevPopStable(x)
```
**Parameters**
- `x`: Population of values to find the standard deviation of. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal*](../../data-types/decimal.md).
**Returned value**
Square root of standard deviation of `x`. [Float64](../../data-types/float.md).
**Example**
Query:
```sql
DROP TABLE IF EXISTS test_data;
CREATE TABLE test_data
(
population Float64,
)
ENGINE = Log;
INSERT INTO test_data SELECT randUniform(5.5, 10) FROM numbers(1000000)
SELECT
stddevPopStable(population) AS stddev
FROM test_data;
```
Result:
```response
┌─────────────stddev─┐
│ 1.2999977786592576 │
└────────────────────┘
```

View File

@ -10,5 +10,46 @@ The result is equal to the square root of [varSamp](../../../sql-reference/aggre
Alias: `STDDEV_SAMP`.
:::note
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `stddevSampStable` function. It works slower but provides a lower computational error.
:::
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the [`stddevSampStable`](../reference/stddevsampstable.md) function. It works slower but provides a lower computational error.
:::
**Syntax**
```sql
stddevSamp(x)
```
**Parameters**
- `x`: Values for which to find the square root of sample variance. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal*](../../data-types/decimal.md).
**Returned value**
Square root of sample variance of `x`. [Float64](../../data-types/float.md).
**Example**
Query:
```sql
DROP TABLE IF EXISTS test_data;
CREATE TABLE test_data
(
population UInt8,
)
ENGINE = Log;
INSERT INTO test_data VALUES (3),(3),(3),(4),(4),(5),(5),(7),(11),(15);
SELECT
stddevSamp(population)
FROM test_data;
```
Result:
```response
┌─stddevSamp(population)─┐
│ 4 │
└────────────────────────┘
```

View File

@ -0,0 +1,49 @@
---
slug: /en/sql-reference/aggregate-functions/reference/stddevsampstable
sidebar_position: 31
---
# stddevSampStable
The result is equal to the square root of [varSamp](../../../sql-reference/aggregate-functions/reference/varsamp.md). Unlike [`stddevSamp`](../reference/stddevsamp.md) This function uses a numerically stable algorithm. It works slower but provides a lower computational error.
**Syntax**
```sql
stddevSampStable(x)
```
**Parameters**
- `x`: Values for which to find the square root of sample variance. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal*](../../data-types/decimal.md).
**Returned value**
Square root of sample variance of `x`. [Float64](../../data-types/float.md).
**Example**
Query:
```sql
DROP TABLE IF EXISTS test_data;
CREATE TABLE test_data
(
population UInt8,
)
ENGINE = Log;
INSERT INTO test_data VALUES (3),(3),(3),(4),(4),(5),(5),(7),(11),(15);
SELECT
stddevSampStable(population)
FROM test_data;
```
Result:
```response
┌─stddevSampStable(population)─┐
│ 4 │
└──────────────────────────────┘
```

View File

@ -7,6 +7,56 @@ sidebar_position: 4
Calculates the sum. Only works for numbers.
**Syntax**
```sql
sum(num)
```
**Parameters**
- `num`: Column of numeric values. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal*](../../data-types/decimal.md).
**Returned value**
- The sum of the values. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal*](../../data-types/decimal.md).
**Example**
First we create a table `employees` and insert some fictional employee data into it.
Query:
```sql
CREATE TABLE employees
(
`id` UInt32,
`name` String,
`salary` UInt32
)
ENGINE = Log
```
```sql
INSERT INTO employees VALUES
(87432, 'John Smith', 45680),
(59018, 'Jane Smith', 72350),
(20376, 'Ivan Ivanovich', 58900),
(71245, 'Anastasia Ivanovna', 89210);
```
We query for the total amount of the employee salaries using the `sum` function.
Query:
```sql
SELECT sum(salary) FROM employees;
```
Result:
```response
┌─sum(salary)─┐
1. │ 266140 │
└─────────────┘
```

View File

@ -5,21 +5,35 @@ sidebar_position: 141
# sumMap
Syntax: `sumMap(key <Array>, value <Array>)` [Array type](../../data-types/array.md) or `sumMap(Tuple(key <Array>, value <Array>))` [Tuple type](../../data-types/tuple.md).
Totals a `value` array according to the keys specified in the `key` array. Returns a tuple of two arrays: keys in sorted order, and values summed for the corresponding keys without overflow.
Arguments:
**Syntax**
- `sumMap(key <Array>, value <Array>)` [Array type](../../data-types/array.md).
- `sumMap(Tuple(key <Array>, value <Array>))` [Tuple type](../../data-types/tuple.md).
Alias: `sumMappedArrays`.
Totals the `value` array according to the keys specified in the `key` array.
**Arguments**
Passing tuple of keys and values arrays is a synonym to passing two arrays of keys and values.
- `key`: [Array](../../data-types/array.md) of keys.
- `value`: [Array](../../data-types/array.md) of values.
Passing a tuple of key and value arrays is a synonym to passing separately an array of keys and an array of values.
:::note
The number of elements in `key` and `value` must be the same for each row that is totaled.
:::
Returns a tuple of two arrays: keys in sorted order, and values summed for the corresponding keys.
**Returned Value**
Example:
- Returns a tuple of two arrays: keys in sorted order, and values summed for the corresponding keys.
**Example**
First we create a table called `sum_map`, and insert some data into it. Arrays of keys and values are stored separately as a column called `statusMap` of [Nested](../../data-types/nested-data-structures/index.md) type, and together as a column called `statusMapTuple` of [tuple](../../data-types/tuple.md) type to illustrate the use of the two different syntaxes of this function described above.
Query:
``` sql
CREATE TABLE sum_map(
@ -31,13 +45,20 @@ CREATE TABLE sum_map(
),
statusMapTuple Tuple(Array(Int32), Array(Int32))
) ENGINE = Log;
```
```sql
INSERT INTO sum_map VALUES
('2000-01-01', '2000-01-01 00:00:00', [1, 2, 3], [10, 10, 10], ([1, 2, 3], [10, 10, 10])),
('2000-01-01', '2000-01-01 00:00:00', [3, 4, 5], [10, 10, 10], ([3, 4, 5], [10, 10, 10])),
('2000-01-01', '2000-01-01 00:01:00', [4, 5, 6], [10, 10, 10], ([4, 5, 6], [10, 10, 10])),
('2000-01-01', '2000-01-01 00:01:00', [6, 7, 8], [10, 10, 10], ([6, 7, 8], [10, 10, 10]));
```
Next, we query the table using the `sumMap` function, making use of both array and tuple type syntaxes:
Query:
``` sql
SELECT
timeslot,
sumMap(statusMap.status, statusMap.requests),
@ -46,6 +67,8 @@ FROM sum_map
GROUP BY timeslot
```
Result:
``` text
┌────────────timeslot─┬─sumMap(statusMap.status, statusMap.requests)─┬─sumMap(statusMapTuple)─────────┐
│ 2000-01-01 00:00:00 │ ([1,2,3,4,5],[10,10,20,10,10]) │ ([1,2,3,4,5],[10,10,20,10,10]) │
@ -54,5 +77,6 @@ GROUP BY timeslot
```
**See Also**
- [-Map combinator for Map datatype](../combinators.md#-map)
- [Map combinator for Map datatype](../combinators.md#-map)
- [sumMapWithOverflow](../reference/summapwithoverflow.md)

View File

@ -0,0 +1,92 @@
---
slug: /en/sql-reference/aggregate-functions/reference/summapwithoverflow
sidebar_position: 141
---
# sumMapWithOverflow
Totals a `value` array according to the keys specified in the `key` array. Returns a tuple of two arrays: keys in sorted order, and values summed for the corresponding keys.
It differs from the [sumMap](../reference/summap.md) function in that it does summation with overflow - i.e. returns the same data type for the summation as the argument data type.
**Syntax**
- `sumMapWithOverflow(key <Array>, value <Array>)` [Array type](../../data-types/array.md).
- `sumMapWithOverflow(Tuple(key <Array>, value <Array>))` [Tuple type](../../data-types/tuple.md).
**Arguments**
- `key`: [Array](../../data-types/array.md) of keys.
- `value`: [Array](../../data-types/array.md) of values.
Passing a tuple of key and value arrays is a synonym to passing separately an array of keys and an array of values.
:::note
The number of elements in `key` and `value` must be the same for each row that is totaled.
:::
**Returned Value**
- Returns a tuple of two arrays: keys in sorted order, and values summed for the corresponding keys.
**Example**
First we create a table called `sum_map`, and insert some data into it. Arrays of keys and values are stored separately as a column called `statusMap` of [Nested](../../data-types/nested-data-structures/index.md) type, and together as a column called `statusMapTuple` of [tuple](../../data-types/tuple.md) type to illustrate the use of the two different syntaxes of this function described above.
Query:
``` sql
CREATE TABLE sum_map(
date Date,
timeslot DateTime,
statusMap Nested(
status UInt8,
requests UInt8
),
statusMapTuple Tuple(Array(Int8), Array(Int8))
) ENGINE = Log;
```
```sql
INSERT INTO sum_map VALUES
('2000-01-01', '2000-01-01 00:00:00', [1, 2, 3], [10, 10, 10], ([1, 2, 3], [10, 10, 10])),
('2000-01-01', '2000-01-01 00:00:00', [3, 4, 5], [10, 10, 10], ([3, 4, 5], [10, 10, 10])),
('2000-01-01', '2000-01-01 00:01:00', [4, 5, 6], [10, 10, 10], ([4, 5, 6], [10, 10, 10])),
('2000-01-01', '2000-01-01 00:01:00', [6, 7, 8], [10, 10, 10], ([6, 7, 8], [10, 10, 10]));
```
If we query the table using the `sumMap`, `sumMapWithOverflow` with the array type syntax, and `toTypeName` functions then we can see that
for the `sumMapWithOverflow` function, the data type of the summed values array is the same as the argument type, both `UInt8` (i.e. summation was done with overflow). For `sumMap` the data type of the summed values arrays has changed from `UInt8` to `UInt64` such that overflow does not occur.
Query:
``` sql
SELECT
timeslot,
toTypeName(sumMap(statusMap.status, statusMap.requests)),
toTypeName(sumMapWithOverflow(statusMap.status, statusMap.requests)),
FROM sum_map
GROUP BY timeslot
```
Equivalently we could have used the tuple syntax with for the same result.
``` sql
SELECT
timeslot,
toTypeName(sumMap(statusMapTuple)),
toTypeName(sumMapWithOverflow(statusMapTuple)),
FROM sum_map
GROUP BY timeslot
```
Result:
``` text
┌────────────timeslot─┬─toTypeName(sumMap(statusMap.status, statusMap.requests))─┬─toTypeName(sumMapWithOverflow(statusMap.status, statusMap.requests))─┐
1. │ 2000-01-01 00:01:00 │ Tuple(Array(UInt8), Array(UInt64)) │ Tuple(Array(UInt8), Array(UInt8)) │
2. │ 2000-01-01 00:00:00 │ Tuple(Array(UInt8), Array(UInt64)) │ Tuple(Array(UInt8), Array(UInt8)) │
└─────────────────────┴──────────────────────────────────────────────────────────┴──────────────────────────────────────────────────────────────────────┘
```
**See Also**
- [sumMap](../reference/summap.md)

View File

@ -8,3 +8,64 @@ sidebar_position: 140
Computes the sum of the numbers, using the same data type for the result as for the input parameters. If the sum exceeds the maximum value for this data type, it is calculated with overflow.
Only works for numbers.
**Syntax**
```sql
sumWithOverflow(num)
```
**Parameters**
- `num`: Column of numeric values. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal*](../../data-types/decimal.md).
**Returned value**
- The sum of the values. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal*](../../data-types/decimal.md).
**Example**
First we create a table `employees` and insert some fictional employee data into it. For this example we will select `salary` as `UInt16` such that a sum of these values may produce an overflow.
Query:
```sql
CREATE TABLE employees
(
`id` UInt32,
`name` String,
`monthly_salary` UInt16
)
ENGINE = Log
```
```sql
SELECT
sum(monthly_salary) AS no_overflow,
sumWithOverflow(monthly_salary) AS overflow,
toTypeName(no_overflow),
toTypeName(overflow)
FROM employees
```
We query for the total amount of the employee salaries using the `sum` and `sumWithOverflow` functions and show their types using the `toTypeName` function.
For the `sum` function the resulting type is `UInt64`, big enough to contain the sum, whilst for `sumWithOverflow` the resulting type remains as `UInt16`.
Query:
```sql
SELECT
sum(monthly_salary) AS no_overflow,
sumWithOverflow(monthly_salary) AS overflow,
toTypeName(no_overflow),
toTypeName(overflow),
FROM employees;
```
Result:
```response
┌─no_overflow─┬─overflow─┬─toTypeName(no_overflow)─┬─toTypeName(overflow)─┐
1. │ 118700 │ 53164 │ UInt64 │ UInt16 │
└─────────────┴──────────┴─────────────────────────┴──────────────────────┘
```

View File

@ -31,6 +31,8 @@ This function uses a numerically unstable algorithm. If you need numerical stabi
**Example**
Query:
```sql
DROP TABLE IF EXISTS test_data;
CREATE TABLE test_data
@ -47,6 +49,8 @@ SELECT
FROM test_data;
```
Result:
```response
3
```
@ -94,6 +98,8 @@ SELECT
FROM test_data;
```
Result:
```response
0.5999999999999999
```

View File

@ -1052,7 +1052,7 @@ toStartOfWeek(t[, mode[, timezone]])
**Arguments**
- `t` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
- `mode` - determines the first day of the week as described in the [toWeek()](date-time-functions#toweek) function. Default: 0
- `mode` - determines the first day of the week as described in the [toWeek()](date-time-functions#toweek) function
- `timezone` - Optional parameter, it behaves like any other conversion function
**Returned value**
@ -1413,7 +1413,7 @@ toStartOfFifteenMinutes(toDateTime('2023-04-21 10:20:00')): 2023-04-21 10:15:00
toStartOfFifteenMinutes(toDateTime('2023-04-21 10:23:00')): 2023-04-21 10:15:00
```
## toStartOfInterval
## toStartOfInterval(date_or_date_with_time, INTERVAL x unit \[, time_zone\])
This function generalizes other `toStartOf*()` functions. For example,
- `toStartOfInterval(t, INTERVAL 1 year)` returns the same as `toStartOfYear(t)`,
@ -1440,8 +1440,6 @@ The calculation is performed relative to specific points in time:
(*) hour intervals are special: the calculation is always performed relative to 00:00:00 (midnight) of the current day. As a result, only
hour values between 1 and 23 are useful.
If unit `week` was specified, `toStartOfInterval` assumes by default that weeks start on Monday. You can change this behavior with setting [`first_day_of_week`](../../operations/settings/settings.md/#first-day-of-week)
**See Also**
- [date_trunc](#date_trunc)
@ -1675,7 +1673,7 @@ Like [fromDaysSinceYearZero](#fromDaysSinceYearZero) but returns a [Date32](../.
Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 nanosecond.
E.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for `day` unit, 0 months for `month` unit, 0 years for `year` unit.
For an alternative to `age`, see function `date_diff`.
For an alternative to `age`, see function `date\_diff`.
**Syntax**
@ -1749,9 +1747,9 @@ Result:
Returns the count of the specified `unit` boundaries crossed between the `startdate` and the `enddate`.
The difference is calculated using relative units, e.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for unit `day` (see [toRelativeDayNum](#torelativedaynum)), 1 month for unit `month` (see [toRelativeMonthNum](#torelativemonthnum)) and 1 year for unit `year` (see [toRelativeYearNum](#torelativeyearnum)).
If unit `week` was specified, `date_diff` assumes that weeks start on Monday. Note that this behavior is different from that of function `toWeek()` in which weeks start by default on Sunday.
If unit `week` was specified, `date\_diff` assumes that weeks start on Monday. Note that this behavior is different from that of function `toWeek()` in which weeks start by default on Sunday.
For an alternative to `date_diff`, see function `age`.
For an alternative to `date\_diff`, see function `age`.
**Syntax**
@ -1907,6 +1905,12 @@ If the addition results in a value outside the bounds of the data type, the resu
date_add(unit, value, date)
```
Alternative syntax:
``` sql
date_add(date, INTERVAL value unit)
```
Aliases: `dateAdd`, `DATE_ADD`.
**Arguments**
@ -1946,6 +1950,20 @@ Result:
└───────────────────────────────────────────────┘
```
```sql
SELECT date_add(toDate('2018-01-01'), INTERVAL 3 YEAR);
```
Result:
```text
┌─plus(toDate('2018-01-01'), toIntervalYear(3))─┐
│ 2021-01-01 │
└───────────────────────────────────────────────┘
```
**See Also**
- [addDate](#addDate)
@ -1962,6 +1980,13 @@ If the subtraction results in a value outside the bounds of the data type, the r
date_sub(unit, value, date)
```
Alternative syntax:
``` sql
date_sub(date, INTERVAL value unit)
```
Aliases: `dateSub`, `DATE_SUB`.
**Arguments**
@ -2002,6 +2027,19 @@ Result:
└────────────────────────────────────────────────┘
```
``` sql
SELECT date_sub(toDate('2018-01-01'), INTERVAL 3 YEAR);
```
Result:
``` text
┌─minus(toDate('2018-01-01'), toIntervalYear(3))─┐
│ 2015-01-01 │
└────────────────────────────────────────────────┘
```
**See Also**
- [subDate](#subDate)
@ -2845,7 +2883,7 @@ Result:
## fromUnixTimestamp
This function converts a Unix timestamp to a calendar date and a time of a day.
This function converts a Unix timestamp to a calendar date and a time of a day.
It can be called in two ways:

View File

@ -4,48 +4,72 @@ sidebar_label: SVG
title: "Functions for Generating SVG images from Geo data"
---
## Syntax
## Svg
Returns a string of select SVG element tags from Geo data.
**Syntax**
``` sql
SVG(geometry,[style])
Svg(geometry,[style])
```
### Parameters
Aliases: `SVG`, `svg`
- `geometry` — Geo data
- `style` — Optional style name
**Parameters**
### Returned value
- `geometry` — Geo data. [Geo](../../data-types/geo).
- `style` — Optional style name. [String](../../data-types/string).
**Returned value**
- The SVG representation of the geometry:
- SVG circle
- SVG polygon
- SVG path
Type: String
Type: [String](../../data-types/string)
## Examples
**Examples**
**Circle**
Query:
### Circle
```sql
SELECT SVG((0., 0.))
```
Result:
```response
<circle cx="0" cy="0" r="5" style=""/>
```
### Polygon
**Polygon**
Query:
```sql
SELECT SVG([(0., 0.), (10, 0), (10, 10), (0, 10)])
```
Result:
```response
<polygon points="0,0 0,10 10,10 10,0 0,0" style=""/>
```
### Path
**Path**
Query:
```sql
SELECT SVG([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]])
```
Result:
```response
<g fill-rule="evenodd"><path d="M 0,0 L 0,10 L 10,10 L 10,0 L 0,0M 4,4 L 5,4 L 5,5 L 4,5 L 4,4 z " style=""/></g>
```

View File

@ -8,7 +8,7 @@ sidebar_label: Mathematical
## e
Returns e ([Euler's constant](https://en.wikipedia.org/wiki/Euler%27s_constant))
Returns e ([Euler's constant](https://en.wikipedia.org/wiki/Euler%27s_constant)).
**Syntax**
@ -45,7 +45,7 @@ exp(x)
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
**Returned value**
@ -65,7 +65,7 @@ Alias: `ln(x)`
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
**Returned value**
@ -83,7 +83,7 @@ exp2(x)
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
**Returned value**
@ -111,7 +111,7 @@ log2(x)
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
**Returned value**
@ -129,7 +129,7 @@ exp10(x)
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
**Returned value**
@ -157,7 +157,7 @@ log10(x)
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
**Returned value**
@ -173,7 +173,7 @@ sqrt(x)
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
**Returned value**
@ -189,7 +189,7 @@ cbrt(x)
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
**Returned value**
@ -207,7 +207,7 @@ erf(x)
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
**Returned value**
@ -239,7 +239,7 @@ erfc(x)
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
**Returned value**
@ -257,7 +257,7 @@ lgamma(x)
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
**Returned value**
@ -275,7 +275,7 @@ gamma(x)
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
**Returned value**
@ -293,7 +293,7 @@ sin(x)
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
**Returned value**
@ -323,7 +323,7 @@ cos(x)
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
**Returned value**
@ -341,7 +341,7 @@ tan(x)
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
**Returned value**
@ -359,7 +359,7 @@ asin(x)
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
**Returned value**
@ -377,7 +377,7 @@ acos(x)
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
**Returned value**
@ -395,7 +395,7 @@ atan(x)
**Arguments**
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md)
- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
**Returned value**
@ -434,7 +434,7 @@ cosh(x)
**Arguments**
- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64).
- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
**Returned value**
@ -468,7 +468,7 @@ acosh(x)
**Arguments**
- `x` — Hyperbolic cosine of angle. Values from the interval: `1 <= x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64).
- `x` — Hyperbolic cosine of angle. Values from the interval: `1 <= x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
**Returned value**
@ -502,7 +502,7 @@ sinh(x)
**Arguments**
- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64).
- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
**Returned value**
@ -536,7 +536,7 @@ asinh(x)
**Arguments**
- `x` — Hyperbolic sine of angle. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64).
- `x` — Hyperbolic sine of angle. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
**Returned value**
@ -569,13 +569,13 @@ tanh(x)
**Arguments**
- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64).
- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
**Returned value**
- Values from the interval: `-1 < tanh(x) < 1`.
Type: [Float64](../../sql-reference/data-types/float.md#float32-float64).
Type: [Float*](../../sql-reference/data-types/float.md#float32-float64).
**Example**
@ -601,7 +601,7 @@ atanh(x)
**Arguments**
- `x` — Hyperbolic tangent of angle. Values from the interval: `1 < x < 1`. [Float64](../../sql-reference/data-types/float.md#float32-float64).
- `x` — Hyperbolic tangent of angle. Values from the interval: `1 < x < 1`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
**Returned value**
@ -635,8 +635,8 @@ atan2(y, x)
**Arguments**
- `y` — y-coordinate of the point through which the ray passes. [Float64](../../sql-reference/data-types/float.md#float32-float64).
- `x` — x-coordinate of the point through which the ray passes. [Float64](../../sql-reference/data-types/float.md#float32-float64).
- `y` — y-coordinate of the point through which the ray passes. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md).
- `x` — x-coordinate of the point through which the ray passes. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md).
**Returned value**
@ -670,8 +670,8 @@ hypot(x, y)
**Arguments**
- `x` — The first cathetus of a right-angle triangle. [Float64](../../sql-reference/data-types/float.md#float32-float64).
- `y` — The second cathetus of a right-angle triangle. [Float64](../../sql-reference/data-types/float.md#float32-float64).
- `x` — The first cathetus of a right-angle triangle. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md).
- `y` — The second cathetus of a right-angle triangle. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md).
**Returned value**
@ -705,7 +705,7 @@ log1p(x)
**Arguments**
- `x` — Values from the interval: `-1 < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64).
- `x` — Values from the interval: `-1 < x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
**Returned value**
@ -747,6 +747,8 @@ sign(x)
- 0 for `x = 0`
- 1 for `x > 0`
Type: [Int8](../../sql-reference/data-types/int-uint.md).
**Examples**
Sign for the zero value:
@ -803,7 +805,7 @@ degrees(x)
**Arguments**
- `x` — Input in radians. [Float64](../../sql-reference/data-types/float.md#float32-float64).
- `x` — Input in radians. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
**Returned value**
@ -837,7 +839,7 @@ radians(x)
**Arguments**
- `x` — Input in degrees. [Float64](../../sql-reference/data-types/float.md#float32-float64).
- `x` — Input in degrees. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
**Returned value**

View File

@ -3213,6 +3213,74 @@ Result:
└───────────────────────────────────────┘
```
## connectionId
Retrieves the connection ID of the client that submitted the current query and returns it as a UInt64 integer.
**Syntax**
```sql
connectionId()
```
**Parameters**
None.
**Returned value**
Returns an integer of type UInt64.
**Implementation details**
This function is most useful in debugging scenarios or for internal purposes within the MySQL handler. It was created for compatibility with [MySQL's `CONNECTION_ID` function](https://dev.mysql.com/doc/refman/8.0/en/information-functions.html#function_connection-id) It is not typically used in production queries.
**Example**
Query:
```sql
SELECT connectionId();
```
```response
0
```
## connection_id
An alias of `connectionId`. Retrieves the connection ID of the client that submitted the current query and returns it as a UInt64 integer.
**Syntax**
```sql
connection_id()
```
**Parameters**
None.
**Returned value**
Returns an integer of type UInt64.
**Implementation details**
This function is most useful in debugging scenarios or for internal purposes within the MySQL handler. It was created for compatibility with [MySQL's `CONNECTION_ID` function](https://dev.mysql.com/doc/refman/8.0/en/information-functions.html#function_connection-id) It is not typically used in production queries.
**Example**
Query:
```sql
SELECT connection_id();
```
```response
0
```
## getClientHTTPHeader
Get the value of an HTTP header.

View File

@ -79,9 +79,9 @@ round(expression [, decimal_places])
The rounded number of the same type as the input number.
### Examples
**Examples**
**Example of use with Float**
Example of usage with Float:
``` sql
SELECT number / 2 AS x, round(x) FROM system.numbers LIMIT 3;
@ -95,7 +95,7 @@ SELECT number / 2 AS x, round(x) FROM system.numbers LIMIT 3;
└─────┴──────────────────────────┘
```
**Example of use with Decimal**
Example of usage with Decimal:
``` sql
SELECT cast(number / 2 AS Decimal(10,4)) AS x, round(x) FROM system.numbers LIMIT 3;
@ -124,9 +124,7 @@ SELECT cast(number / 2 AS Decimal(10,4)) AS x, round(x) FROM system.numbers LIM
└────────┴──────────────────────────────────────────────────┘
```
**Examples of rounding**
Rounding to the nearest number.
Examples of rounding to the nearest number:
``` text
round(3.2, 0) = 3
@ -183,9 +181,7 @@ roundBankers(expression [, decimal_places])
A value rounded by the bankers rounding method.
### Examples
**Example of use**
**Examples**
Query:
@ -210,7 +206,7 @@ Result:
└─────┴───┘
```
**Examples of Bankers rounding**
Examples of Bankers rounding:
``` text
roundBankers(0.4) = 0
@ -226,25 +222,180 @@ roundBankers(10.755, 2) = 10.76
- [round](#rounding_functions-round)
## roundToExp2(num)
## roundToExp2
Accepts a number. If the number is less than one, it returns 0. Otherwise, it rounds the number down to the nearest (whole non-negative) degree of two.
Accepts a number. If the number is less than one, it returns `0`. Otherwise, it rounds the number down to the nearest (whole non-negative) degree of two.
## roundDuration(num)
**Syntax**
Accepts a number. If the number is less than one, it returns 0. Otherwise, it rounds the number down to numbers from the set: 1, 10, 30, 60, 120, 180, 240, 300, 600, 1200, 1800, 3600, 7200, 18000, 36000.
```sql
roundToExp2(num)
```
## roundAge(num)
**Parameters**
Accepts a number. If the number is
- smaller than 1, it returns 0,
- between 1 and 17, it returns 17,
- between 18 and 24, it returns 18,
- between 25 and 34, it returns 25,
- between 35 and 44, it returns 35,
- between 45 and 54, it returns 45,
- larger than 55, it returns 55.
- `num`: A number representing an age in years. [UInt](../data-types/int-uint.md)/[Float](../data-types/float.md).
## roundDown(num, arr)
**Returned value**
- `0`, for `num` $\lt 1$. [UInt8](../data-types/int-uint.md).
- `num` rounded down to the nearest (whole non-negative) degree of two. [UInt](../data-types/int-uint.md)/[Float](../data-types/float.md) equivalent to the input type.
**Example**
Query:
```sql
SELECT *, roundToExp2(*) FROM system.numbers WHERE number IN (0, 2, 5, 10, 19, 50)
```
Result:
```response
┌─number─┬─roundToExp2(number)─┐
│ 0 │ 0 │
│ 2 │ 2 │
│ 5 │ 4 │
│ 10 │ 8 │
│ 19 │ 16 │
│ 50 │ 32 │
└────────┴─────────────────────┘
```
## roundDuration
Accepts a number. If the number is less than one, it returns `0`. Otherwise, it rounds the number down to numbers from the set of commonly used durations: `1, 10, 30, 60, 120, 180, 240, 300, 600, 1200, 1800, 3600, 7200, 18000, 36000`.
**Syntax**
```sql
roundDuration(num)
```
**Parameters**
- `num`: A number to round to one of the numbers in the set of common durations. [UInt](../data-types/int-uint.md)/[Float](../data-types/float.md).
**Returned value**
- `0`, for `num` $\lt 1$.
- Otherwise, one of: `1, 10, 30, 60, 120, 180, 240, 300, 600, 1200, 1800, 3600, 7200, 18000, 36000`. [UInt16](../data-types/int-uint.md).
**Example**
Query:
```sql
SELECT *, roundDuration(*) FROM system.numbers WHERE number IN (0, 9, 19, 47, 101, 149, 205, 271, 421, 789, 1423, 2345, 4567, 9876, 24680, 42573)
```
Result:
```response
┌─number─┬─roundDuration(number)─┐
│ 0 │ 0 │
│ 9 │ 1 │
│ 19 │ 10 │
│ 47 │ 30 │
│ 101 │ 60 │
│ 149 │ 120 │
│ 205 │ 180 │
│ 271 │ 240 │
│ 421 │ 300 │
│ 789 │ 600 │
│ 1423 │ 1200 │
│ 2345 │ 1800 │
│ 4567 │ 3600 │
│ 9876 │ 7200 │
│ 24680 │ 18000 │
│ 42573 │ 36000 │
└────────┴───────────────────────┘
```
## roundAge
Accepts a number within various commonly used ranges of human age and returns either a maximum or a minimum within that range.
**Syntax**
```sql
roundAge(num)
```
**Parameters**
- `age`: A number representing an age in years. [UInt](../data-types/int-uint.md)/[Float](../data-types/float.md).
**Returned value**
- Returns `0`, for $age \lt 1$.
- Returns `17`, for $1 \leq age \leq 17$.
- Returns `18`, for $18 \leq age \leq 24$.
- Returns `25`, for $25 \leq age \leq 34$.
- Returns `35`, for $35 \leq age \leq 44$.
- Returns `45`, for $45 \leq age \leq 54$.
- Returns `55`, for $age \geq 55$.
Type: [UInt8](../data-types/int-uint.md).
**Example**
Query:
```sql
SELECT *, roundAge(*) FROM system.numbers WHERE number IN (0, 5, 20, 31, 37, 54, 72);
```
Result:
```response
┌─number─┬─roundAge(number)─┐
│ 0 │ 0 │
│ 5 │ 17 │
│ 20 │ 18 │
│ 31 │ 25 │
│ 37 │ 35 │
│ 54 │ 45 │
│ 72 │ 55 │
└────────┴──────────────────┘
```
## roundDown
Accepts a number and rounds it down to an element in the specified array. If the value is less than the lowest bound, the lowest bound is returned.
**Syntax**
```sql
roundDown(num, arr)
```
**Parameters**
- `num`: A number to round down. [Numeric](../data-types/int-uint.md).
- `arr`: Array of elements to round `age` down to. [Array](../data-types/array.md) of [UInt](../data-types/int-uint.md)/[Float](../data-types/float.md) type.
**Returned value**
- Number rounded down to an element in `arr`. If the value is less than the lowest bound, the lowest bound is returned. [UInt](../data-types/int-uint.md)/[Float](../data-types/float.md) type deduced from the type of `arr`.
**Example**
Query:
```sql
SELECT *, roundDown(*, [3, 4, 5]) FROM system.numbers WHERE number IN (0, 1, 2, 3, 4, 5)
```
Result:
```response
┌─number─┬─roundDown(number, [3, 4, 5])─┐
│ 0 │ 3 │
│ 1 │ 3 │
│ 2 │ 3 │
│ 3 │ 3 │
│ 4 │ 4 │
│ 5 │ 5 │
└────────┴──────────────────────────────┘
```

View File

@ -88,20 +88,93 @@ Result:
## length
Returns the length of a string in bytes (not: in characters or Unicode code points).
The function also works for arrays.
Returns the length of a string in bytes rather than in characters or Unicode code points. The function also works for arrays.
Alias: `OCTET_LENGTH`
**Syntax**
```sql
length(s)
```
**Parameters**
- `s`: An input string or array. [String](../data-types/string)/[Array](../data-types/array).
**Returned value**
- Length of the string or array `s` in bytes. [UInt64](../data-types/int-uint).
**Example**
Query:
```sql
SELECT length('Hello, world!');
```
Result:
```response
┌─length('Hello, world!')─┐
│ 13 │
└─────────────────────────┘
```
Query:
```sql
SELECT length([1, 2, 3, 4]);
```
Result:
```response
┌─length([1, 2, 3, 4])─┐
│ 4 │
└──────────────────────┘
```
## lengthUTF8
Returns the length of a string in Unicode code points (not: in bytes or characters). It assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
Returns the length of a string in Unicode code points rather than in bytes or characters. It assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
Alias:
Aliases:
- `CHAR_LENGTH`
- `CHARACTER_LENGTH`
**Syntax**
```sql
lengthUTF8(s)
```
**Parameters**
- `s`: String containing valid UTF-8 encoded text. [String](../data-types/string).
**Returned value**
- Length of the string `s` in Unicode code points. [UInt64](../data-types/int-uint.md).
**Example**
Query:
```sql
SELECT lengthUTF8('Здравствуй, мир!');
```
Result:
```response
┌─lengthUTF8('Здравствуй, мир!')─┐
│ 16 │
└────────────────────────────────┘
```
## left
Returns a substring of string `s` with a specified `offset` starting from the left.
@ -1055,6 +1128,34 @@ Result:
Like `base58Decode` but returns an empty string in case of error.
**Syntax**
```sql
tryBase58Decode(encoded)
```
**Parameters**
- `encoded`: [String](../../sql-reference/data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, returns an empty string in case of error.
**Returned value**
- A string containing the decoded value of the argument.
**Examples**
Query:
```sql
SELECT tryBase58Decode('3dc8KtHrwM') as res, tryBase58Decode('invalid') as res_invalid;
```
```response
┌─res─────┬─res_invalid─┐
│ Encoded │ │
└─────────┴─────────────┘
```
## base64Encode
Encodes a String or FixedString as base64.
@ -1071,6 +1172,30 @@ Alias: `FROM_BASE64`.
Like `base64Decode` but returns an empty string in case of error.
**Syntax**
```sql
tryBase64Decode(encoded)
```
**Parameters**
- `encoded`: [String](../../sql-reference/data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, returns an empty string in case of error.
**Examples**
Query:
```sql
SELECT tryBase64Decode('RW5jb2RlZA==') as res, tryBase64Decode('invalid') as res_invalid;
```
```response
┌─res─────┬─res_invalid─┐
│ Encoded │ │
└─────────┴─────────────┘
```
## endsWith {#endswith}
Returns whether string `str` ends with `suffix`.

View File

@ -817,6 +817,42 @@ Result:
└─────────────────────────────────────┘
```
## flattenTuple
Returns a flattened `output` tuple from a nested named `input` tuple. Elements of the `output` tuple are the paths from the original `input` tuple. For instance: `Tuple(a Int, Tuple(b Int, c Int)) -> Tuple(a Int, b Int, c Int)`. `flattenTuple` can be used to select all paths from type `Object` as separate columns.
**Syntax**
```sql
flattenTuple(input)
```
**Parameters**
- `input`: Nested named tuple to flatten. [Tuple](../data-types/tuple).
**Returned value**
- `output` tuple whose elements are paths from the original `input`. [Tuple](../data-types/tuple).
**Example**
Query:
``` sql
CREATE TABLE t_flatten_tuple(t Tuple(t1 Nested(a UInt32, s String), b UInt32, t2 Tuple(k String, v UInt32))) ENGINE = Memory;
INSERT INTO t_flatten_tuple VALUES (([(1, 'a'), (2, 'b')], 3, ('c', 4)));
SELECT flattenTuple(t) FROM t_flatten_tuple;
```
Result:
``` text
┌─flattenTuple(t)───────────┐
│ ([1,2],['a','b'],3,'c',4) │
└───────────────────────────┘
```
## Distance functions
All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md).

View File

@ -6,24 +6,29 @@ sidebar_label: Embedded Dictionaries
# Functions for Working with Embedded Dictionaries
:::note
In order for the functions below to work, the server config must specify the paths and addresses for getting all the embedded dictionaries. The dictionaries are loaded at the first call of any of these functions. If the reference lists cant be loaded, an exception is thrown.
For information about creating reference lists, see the section “Dictionaries”.
As such, the examples shown in this section will throw an exception in [ClickHouse Fiddle](https://fiddle.clickhouse.com/) and in quick release and production deployments by default, unless first configured.
:::
For information about creating reference lists, see the section [“Dictionaries”](../dictionaries#embedded-dictionaries).
## Multiple Geobases
ClickHouse supports working with multiple alternative geobases (regional hierarchies) simultaneously, in order to support various perspectives on which countries certain regions belong to.
The clickhouse-server config specifies the file with the regional hierarchy::`<path_to_regions_hierarchy_file>/opt/geo/regions_hierarchy.txt</path_to_regions_hierarchy_file>`
The clickhouse-server config specifies the file with the regional hierarchy:
Besides this file, it also searches for files nearby that have the _ symbol and any suffix appended to the name (before the file extension).
For example, it will also find the file `/opt/geo/regions_hierarchy_ua.txt`, if present.
```<path_to_regions_hierarchy_file>/opt/geo/regions_hierarchy.txt</path_to_regions_hierarchy_file>```
`ua` is called the dictionary key. For a dictionary without a suffix, the key is an empty string.
Besides this file, it also searches for files nearby that have the `_` symbol and any suffix appended to the name (before the file extension).
For example, it will also find the file `/opt/geo/regions_hierarchy_ua.txt`, if present. Here `ua` is called the dictionary key. For a dictionary without a suffix, the key is an empty string.
All the dictionaries are re-loaded in runtime (once every certain number of seconds, as defined in the builtin_dictionaries_reload_interval config parameter, or once an hour by default). However, the list of available dictionaries is defined one time, when the server starts.
All the dictionaries are re-loaded during runtime (once every certain number of seconds, as defined in the [`builtin_dictionaries_reload_interval`](../../operations/server-configuration-parameters/settings#builtin-dictionaries-reload-interval) config parameter, or once an hour by default). However, the list of available dictionaries is defined once, when the server starts.
All functions for working with regions have an optional argument at the end the dictionary key. It is referred to as the geobase.
Example:
``` sql
@ -32,13 +37,116 @@ regionToCountry(RegionID, '') Uses the default dictionary: /opt/geo/regions_
regionToCountry(RegionID, 'ua') Uses the dictionary for the 'ua' key: /opt/geo/regions_hierarchy_ua.txt
```
### regionToCity(id\[, geobase\])
### regionToName
Accepts a UInt32 number the region ID from the geobase. If this region is a city or part of a city, it returns the region ID for the appropriate city. Otherwise, returns 0.
Accepts a region ID and geobase and returns a string of the name of the region in the corresponding language. If the region with the specified ID does not exist, an empty string is returned.
### regionToArea(id\[, geobase\])
**Syntax**
Converts a region to an area (type 5 in the geobase). In every other way, this function is the same as regionToCity.
``` sql
regionToName(id\[, lang\])
```
**Parameters**
- `id` — Region ID from the geobase. [UInt32](../data-types/int-uint).
- `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../data-types/string). Optional.
**Returned value**
- Name of the region in the corresponding language specified by `geobase`. [String](../data-types/string).
- Otherwise, an empty string.
**Example**
Query:
``` sql
SELECT regionToName(number::UInt32,'en') FROM numbers(0,5);
```
Result:
``` text
┌─regionToName(CAST(number, 'UInt32'), 'en')─┐
│ │
│ World │
│ USA │
│ Colorado │
│ Boulder County │
└────────────────────────────────────────────┘
```
### regionToCity
Accepts a region ID from the geobase. If this region is a city or part of a city, it returns the region ID for the appropriate city. Otherwise, returns 0.
**Syntax**
```sql
regionToCity(id [, geobase])
```
**Parameters**
- `id` — Region ID from the geobase. [UInt32](../data-types/int-uint).
- `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../data-types/string). Optional.
**Returned value**
- Region ID for the appropriate city, if it exists. [UInt32](../data-types/int-uint).
- 0, if there is none.
**Example**
Query:
```sql
SELECT regionToName(number::UInt32, 'en'), regionToCity(number::UInt32) AS id, regionToName(id, 'en') FROM numbers(13);
```
Result:
```response
┌─regionToName(CAST(number, 'UInt32'), 'en')─┬─id─┬─regionToName(regionToCity(CAST(number, 'UInt32')), 'en')─┐
│ │ 0 │ │
│ World │ 0 │ │
│ USA │ 0 │ │
│ Colorado │ 0 │ │
│ Boulder County │ 0 │ │
│ Boulder │ 5 │ Boulder │
│ China │ 0 │ │
│ Sichuan │ 0 │ │
│ Chengdu │ 8 │ Chengdu │
│ America │ 0 │ │
│ North America │ 0 │ │
│ Eurasia │ 0 │ │
│ Asia │ 0 │ │
└────────────────────────────────────────────┴────┴──────────────────────────────────────────────────────────┘
```
### regionToArea
Converts a region to an area (type 5 in the geobase). In every other way, this function is the same as [regionToCity](#regiontocity).
**Syntax**
```sql
regionToArea(id [, geobase])
```
**Parameters**
- `id` — Region ID from the geobase. [UInt32](../data-types/int-uint).
- `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../data-types/string). Optional.
**Returned value**
- Region ID for the appropriate area, if it exists. [UInt32](../data-types/int-uint).
- 0, if there is none.
**Example**
Query:
``` sql
SELECT DISTINCT regionToName(regionToArea(toUInt32(number), 'ua'))
@ -46,6 +154,8 @@ FROM system.numbers
LIMIT 15
```
Result:
``` text
┌─regionToName(regionToArea(toUInt32(number), \'ua\'))─┐
│ │
@ -66,16 +176,38 @@ LIMIT 15
└──────────────────────────────────────────────────────┘
```
### regionToDistrict(id\[, geobase\])
### regionToDistrict
Converts a region to a federal district (type 4 in the geobase). In every other way, this function is the same as regionToCity.
**Syntax**
```sql
regionToDistrict(id [, geobase])
```
**Parameters**
- `id` — Region ID from the geobase. [UInt32](../data-types/int-uint).
- `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../data-types/string). Optional.
**Returned value**
- Region ID for the appropriate city, if it exists. [UInt32](../data-types/int-uint).
- 0, if there is none.
**Example**
Query:
``` sql
SELECT DISTINCT regionToName(regionToDistrict(toUInt32(number), 'ua'))
FROM system.numbers
LIMIT 15
```
Result:
``` text
┌─regionToName(regionToDistrict(toUInt32(number), \'ua\'))─┐
│ │
@ -96,17 +228,103 @@ LIMIT 15
└──────────────────────────────────────────────────────────┘
```
### regionToCountry(id\[, geobase\])
### regionToCountry
Converts a region to a country. In every other way, this function is the same as regionToCity.
Example: `regionToCountry(toUInt32(213)) = 225` converts Moscow (213) to Russia (225).
Converts a region to a country (type 3 in the geobase). In every other way, this function is the same as regionToCity.
### regionToContinent(id\[, geobase\])
**Syntax**
Converts a region to a continent. In every other way, this function is the same as regionToCity.
Example: `regionToContinent(toUInt32(213)) = 10001` converts Moscow (213) to Eurasia (10001).
```sql
regionToCountry(id [, geobase])
```
### regionToTopContinent(id\[, geobase\])
**Parameters**
- `id` — Region ID from the geobase. [UInt32](../data-types/int-uint).
- `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../data-types/string). Optional.
**Returned value**
- Region ID for the appropriate country, if it exists. [UInt32](../data-types/int-uint).
- 0, if there is none.
**Example**
Query:
``` sql
SELECT regionToName(number::UInt32, 'en'), regionToCountry(number::UInt32) AS id, regionToName(id, 'en') FROM numbers(13);
```
Result:
``` text
┌─regionToName(CAST(number, 'UInt32'), 'en')─┬─id─┬─regionToName(regionToCountry(CAST(number, 'UInt32')), 'en')─┐
│ │ 0 │ │
│ World │ 0 │ │
│ USA │ 2 │ USA │
│ Colorado │ 2 │ USA │
│ Boulder County │ 2 │ USA │
│ Boulder │ 2 │ USA │
│ China │ 6 │ China │
│ Sichuan │ 6 │ China │
│ Chengdu │ 6 │ China │
│ America │ 0 │ │
│ North America │ 0 │ │
│ Eurasia │ 0 │ │
│ Asia │ 0 │ │
└────────────────────────────────────────────┴────┴─────────────────────────────────────────────────────────────┘
```
### regionToContinent
Converts a region to a continent (type 1 in the geobase). In every other way, this function is the same as regionToCity.
**Syntax**
```sql
regionToContinent(id [, geobase])
```
**Parameters**
- `id` — Region ID from the geobase. [UInt32](../data-types/int-uint).
- `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../data-types/string). Optional.
**Returned value**
- Region ID for the appropriate continent, if it exists. [UInt32](../data-types/int-uint).
- 0, if there is none.
**Example**
Query:
``` sql
SELECT regionToName(number::UInt32, 'en'), regionToContinent(number::UInt32) AS id, regionToName(id, 'en') FROM numbers(13);
```
Result:
``` text
┌─regionToName(CAST(number, 'UInt32'), 'en')─┬─id─┬─regionToName(regionToContinent(CAST(number, 'UInt32')), 'en')─┐
│ │ 0 │ │
│ World │ 0 │ │
│ USA │ 10 │ North America │
│ Colorado │ 10 │ North America │
│ Boulder County │ 10 │ North America │
│ Boulder │ 10 │ North America │
│ China │ 12 │ Asia │
│ Sichuan │ 12 │ Asia │
│ Chengdu │ 12 │ Asia │
│ America │ 9 │ America │
│ North America │ 10 │ North America │
│ Eurasia │ 11 │ Eurasia │
│ Asia │ 12 │ Asia │
└────────────────────────────────────────────┴────┴───────────────────────────────────────────────────────────────┘
```
### regionToTopContinent
Finds the highest continent in the hierarchy for the region.
@ -116,37 +334,175 @@ Finds the highest continent in the hierarchy for the region.
regionToTopContinent(id[, geobase])
```
**Arguments**
**Parameters**
- `id` — Region ID from the geobase. [UInt32](../../sql-reference/data-types/int-uint.md).
- `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../../sql-reference/data-types/string.md). Optional.
- `id` — Region ID from the geobase. [UInt32](../data-types/int-uint).
- `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../data-types/string). Optional.
**Returned value**
- Identifier of the top level continent (the latter when you climb the hierarchy of regions).
- Identifier of the top level continent (the latter when you climb the hierarchy of regions).[UInt32](../data-types/int-uint).
- 0, if there is none.
Type: `UInt32`.
**Example**
### regionToPopulation(id\[, geobase\])
Query:
Gets the population for a region.
The population can be recorded in files with the geobase. See the section “Dictionaries”.
If the population is not recorded for the region, it returns 0.
In the geobase, the population might be recorded for child regions, but not for parent regions.
``` sql
SELECT regionToName(number::UInt32, 'en'), regionToTopContinent(number::UInt32) AS id, regionToName(id, 'en') FROM numbers(13);
```
### regionIn(lhs, rhs\[, geobase\])
Result:
``` text
┌─regionToName(CAST(number, 'UInt32'), 'en')─┬─id─┬─regionToName(regionToTopContinent(CAST(number, 'UInt32')), 'en')─┐
│ │ 0 │ │
│ World │ 0 │ │
│ USA │ 9 │ America │
│ Colorado │ 9 │ America │
│ Boulder County │ 9 │ America │
│ Boulder │ 9 │ America │
│ China │ 11 │ Eurasia │
│ Sichuan │ 11 │ Eurasia │
│ Chengdu │ 11 │ Eurasia │
│ America │ 9 │ America │
│ North America │ 9 │ America │
│ Eurasia │ 11 │ Eurasia │
│ Asia │ 11 │ Eurasia │
└────────────────────────────────────────────┴────┴──────────────────────────────────────────────────────────────────┘
```
### regionToPopulation
Gets the population for a region. The population can be recorded in files with the geobase. See the section [“Dictionaries”](../dictionaries#embedded-dictionaries). If the population is not recorded for the region, it returns 0. In the geobase, the population might be recorded for child regions, but not for parent regions.
**Syntax**
``` sql
regionToPopulation(id[, geobase])
```
**Parameters**
- `id` — Region ID from the geobase. [UInt32](../data-types/int-uint).
- `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../data-types/string). Optional.
**Returned value**
- Population for the region. [UInt32](../data-types/int-uint).
- 0, if there is none.
**Example**
Query:
``` sql
SELECT regionToName(number::UInt32, 'en'), regionToPopulation(number::UInt32) AS id, regionToName(id, 'en') FROM numbers(13);
```
Result:
``` text
┌─regionToName(CAST(number, 'UInt32'), 'en')─┬─population─┐
│ │ 0 │
│ World │ 4294967295 │
│ USA │ 330000000 │
│ Colorado │ 5700000 │
│ Boulder County │ 330000 │
│ Boulder │ 100000 │
│ China │ 1500000000 │
│ Sichuan │ 83000000 │
│ Chengdu │ 20000000 │
│ America │ 1000000000 │
│ North America │ 600000000 │
│ Eurasia │ 4294967295 │
│ Asia │ 4294967295 │
└────────────────────────────────────────────┴────────────┘
```
### regionIn
Checks whether a `lhs` region belongs to a `rhs` region. Returns a UInt8 number equal to 1 if it belongs, or 0 if it does not belong.
**Syntax**
``` sql
regionIn(lhs, rhs\[, geobase\])
```
**Parameters**
- `lhs` — Lhs region ID from the geobase. [UInt32](../../sql-reference/data-types/int-uint).
- `rhs` — Rhs region ID from the geobase. [UInt32](../../sql-reference/data-types/int-uint).
- `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../data-types/string). Optional.
**Returned value**
- 1, if it belongs. [UInt8](../../sql-reference/data-types/int-uint).
- 0, if it doesn't belong.
**Implementation details**
Checks whether a lhs region belongs to a rhs region. Returns a UInt8 number equal to 1 if it belongs, or 0 if it does not belong.
The relationship is reflexive any region also belongs to itself.
### regionHierarchy(id\[, geobase\])
**Example**
Query:
``` sql
SELECT regionToName(n1.number::UInt32, 'en') || (regionIn(n1.number::UInt32, n2.number::UInt32) ? ' is in ' : ' is not in ') || regionToName(n2.number::UInt32, 'en') FROM numbers(1,2) AS n1 CROSS JOIN numbers(1,5) AS n2;
```
Result:
``` text
World is in World
World is not in USA
World is not in Colorado
World is not in Boulder County
World is not in Boulder
USA is in World
USA is in USA
USA is not in Colorado
USA is not in Boulder County
USA is not in Boulder
```
### regionHierarchy
Accepts a UInt32 number the region ID from the geobase. Returns an array of region IDs consisting of the passed region and all parents along the chain.
Example: `regionHierarchy(toUInt32(213)) = [213,1,3,225,10001,10000]`.
### regionToName(id\[, lang\])
**Syntax**
Accepts a UInt32 number the region ID from the geobase. A string with the name of the language can be passed as a second argument. Supported languages are: ru, en, ua, uk, by, kz, tr. If the second argument is omitted, the language ru is used. If the language is not supported, an exception is thrown. Returns a string the name of the region in the corresponding language. If the region with the specified ID does not exist, an empty string is returned.
``` sql
regionHierarchy(id\[, geobase\])
```
`ua` and `uk` both mean Ukrainian.
**Parameters**
- `id` — Region ID from the geobase. [UInt32](../data-types/int-uint).
- `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../data-types/string). Optional.
**Returned value**
- Array of region IDs consisting of the passed region and all parents along the chain. [Array](../data-types/array)([UInt32](../data-types/int-uint)).
**Example**
Query:
``` sql
SELECT regionHierarchy(number::UInt32) AS arr, arrayMap(id -> regionToName(id, 'en'), arr) FROM numbers(5);
```
Result:
``` text
┌─arr────────────┬─arrayMap(lambda(tuple(id), regionToName(id, 'en')), regionHierarchy(CAST(number, 'UInt32')))─┐
│ [] │ [] │
│ [1] │ ['World'] │
│ [2,10,9,1] │ ['USA','North America','America','World'] │
│ [3,2,10,9,1] │ ['Colorado','USA','North America','America','World'] │
│ [4,3,2,10,9,1] │ ['Boulder County','Colorado','USA','North America','America','World'] │
└────────────────┴──────────────────────────────────────────────────────────────────────────────────────────────┘
```

View File

@ -19,25 +19,51 @@ Subquery is another `SELECT` query that may be specified in parenthesis inside `
## FINAL Modifier
When `FINAL` is specified, ClickHouse fully merges the data before returning the result and thus performs all data transformations that happen during merges for the given table engine.
When `FINAL` is specified, ClickHouse fully merges the data before returning the result. This also performs all data transformations that happen during merges for the given table engine.
It is applicable when selecting data from ReplacingMergeTree, SummingMergeTree, AggregatingMergeTree, CollapsingMergeTree and VersionedCollapsingMergeTree tables.
It is applicable when selecting data from from tables using the following table engines:
- `ReplacingMergeTree`
- `SummingMergeTree`
- `AggregatingMergeTree`
- `CollapsingMergeTree`
- `VersionedCollapsingMergeTree`
`SELECT` queries with `FINAL` are executed in parallel. The [max_final_threads](../../../operations/settings/settings.md#max-final-threads) setting limits the number of threads used.
There are drawbacks to using `FINAL` (see below).
### Drawbacks
Queries that use `FINAL` are executed slightly slower than similar queries that do not, because:
Queries that use `FINAL` execute slightly slower than similar queries that do not use `FINAL` because:
- Data is merged during query execution.
- Queries with `FINAL` read primary key columns in addition to the columns specified in the query.
- Queries with `FINAL` may read primary key columns in addition to the columns specified in the query.
`FINAL` requires additional compute and memory resources, as the processing that normally would occur at merge time must occur in memory at the time of the query. However, using FINAL is sometimes necessary in order to produce accurate results, and is less expensive than running `OPTIMIZE` to force a merge. It is also sometimes possible to use different queries that assume the background processes of the `MergeTree` engine havent happened yet and deal with it by applying aggregation (for example, to discard duplicates). If you need to use FINAL in your queries in order to get the required results, then it is okay to do so but be aware of the additional processing required.
`FINAL` requires additional compute and memory resources because the processing that normally would occur at merge time must occur in memory at the time of the query. However, using FINAL is sometimes necessary in order to produce accurate results (as data may not yet be fully merged). It is less expensive than running `OPTIMIZE` to force a merge.
As an alternative to using `FINAL`, it is sometimes possible to use different queries that assume the background processes of the `MergeTree` engine have not yet occurred and deal with it by applying an aggregation (for example, to discard duplicates). If you need to use `FINAL` in your queries in order to get the required results, it is okay to do so but be aware of the additional processing required.
`FINAL` can be applied automatically using [FINAL](../../../operations/settings/settings.md#final) setting to all tables in a query using a session or a user profile.
### Example Usage
**Using the `FINAL` keyword**
```sql
SELECT x, y FROM mytable FINAL WHERE x > 1;
```
**Using `FINAL` as a query-level setting**
```sql
SELECT x, y FROM mytable WHERE x > 1 SETTINGS final = 1;
```
**Using `FINAL` as a session-level setting**
```sql
SET final = 1;
SELECT x, y FROM mytable WHERE x > 1;
```
## Implementation Details
If the `FROM` clause is omitted, data will be read from the `system.one` table.

View File

@ -532,3 +532,15 @@ If there's a refresh in progress for the given view, interrupt and cancel it. Ot
```sql
SYSTEM CANCEL VIEW [db.]name
```
### SYSTEM UNLOAD PRIMARY KEY
Unload the primary keys for the given table or for all tables.
```sql
SYSTEM UNLOAD PRIMARY KEY [db.]name
```
```sql
SYSTEM UNLOAD PRIMARY KEY
```

View File

@ -150,7 +150,7 @@ sidebar_label: "Визуальные интерфейсы от сторонни
### ClickVisual {#clickvisual}
[ClickVisual](https://clickvisual.gocn.vip/) ClickVisual — это легкодоступная платформа для запросов, анализа и уведомлений. Документация на китайском.
[ClickVisual](https://clickvisual.net/) ClickVisual — это легкодоступная платформа для запросов, анализа и уведомлений. Документация на китайском.
Основные возможности:

View File

@ -3447,17 +3447,6 @@ SELECT
FROM fuse_tbl
```
## allow_experimental_database_replicated {#allow_experimental_database_replicated}
Позволяет создавать базы данных с движком [Replicated](../../engines/database-engines/replicated.md).
Возможные значения:
- 0 — Disabled.
- 1 — Enabled.
Значение по умолчанию: `0`.
## database_replicated_initial_query_timeout_sec {#database_replicated_initial_query_timeout_sec}
Устанавливает, как долго начальный DDL-запрос должен ждать, пока реплицированная база данных прецессирует предыдущие записи очереди DDL в секундах.

View File

@ -29,6 +29,16 @@ slug: /ru/operations/system-tables/text_log
- `source_file` (LowCardinality(String)) — исходный файл, из которого была сделана запись.
- `source_line` (UInt64) — исходная строка, из которой была сделана запись.
- `message_format_string` (LowCardinality(String)) — форматная строка, с помощью которой было отформатировано сообщение.
- `value1` (String) - аргумент 1, который использовался для форматирования сообщения.
- `value2` (String) - аргумент 2, который использовался для форматирования сообщения.
- `value3` (String) - аргумент 3, который использовался для форматирования сообщения.
- `value4` (String) - аргумент 4, который использовался для форматирования сообщения.
- `value5` (String) - аргумент 5, который использовался для форматирования сообщения.
- `value6` (String) - аргумент 6, который использовался для форматирования сообщения.
- `value7` (String) - аргумент 7, который использовался для форматирования сообщения.
- `value8` (String) - аргумент 8, который использовался для форматирования сообщения.
- `value9` (String) - аргумент 9, который использовался для форматирования сообщения.
- `value10` (String) - аргумент 10, который использовался для форматирования сообщения.
**Пример**
@ -53,4 +63,14 @@ revision: 54440
source_file: /ClickHouse/src/Interpreters/DNSCacheUpdater.cpp; void DB::DNSCacheUpdater::start()
source_line: 45
message_format_string: Update period {} seconds
value1: 15
value2:
value3:
value4:
value5:
value6:
value7:
value8:
value9:
value10:
```

View File

@ -5,7 +5,7 @@ sidebar_position: 106
# argMax {#agg-function-argmax}
Вычисляет значение `arg` при максимальном значении `val`.
Вычисляет значение `arg` при максимальном значении `val`. Если несколько строк имеют одинаковое `val`, в которых равное значение является максимальным, то возвращаемое `arg` не является детерминированным. Обе части, arg и max, ведут себя как агрегатные функции, они обе пропускают Null во время обработки и возвращают не Null значения, если не Null значения доступны.
**Синтаксис**
@ -49,3 +49,60 @@ SELECT argMax(user, salary), argMax(tuple(user, salary), salary) FROM salary;
│ director │ ('director',5000) │
└──────────────────────┴─────────────────────────────────────┘
```
**Дополнительный пример**
```sql
CREATE TABLE test
(
a Nullable(String),
b Nullable(Int64)
)
ENGINE = Memory AS
SELECT *
FROM VALUES(('a', 1), ('b', 2), ('c', 2), (NULL, 3), (NULL, NULL), ('d', NULL));
select * from test;
┌─a────┬────b─┐
│ a │ 1 │
│ b │ 2 │
│ c │ 2 │
│ ᴺᵁᴸᴸ │ 3 │
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
│ d │ ᴺᵁᴸᴸ │
└──────┴──────┘
SELECT argMax(a, b), max(b) FROM test;
┌─argMax(a, b)─┬─max(b)─┐
│ b │ 3 │ -- argMax = 'b' потому что это первое not Null значение, max(b) из другой строки!
└──────────────┴────────┘
SELECT argMax(tuple(a), b) FROM test;
┌─argMax(tuple(a), b)─┐
│ (NULL) │ -- Кортеж `Tuple`, который содержит только `NULL` значения является не `NULL` кортежем, поэтому агрегатыне функции не будут пропускать эту строку с `NULL` значениями.
└─────────────────────┘
SELECT (argMax((a, b), b) as t).1 argMaxA, t.2 argMaxB FROM test;
┌─argMaxA─┬─argMaxB─┐
│ ᴺᵁᴸᴸ │ 3 │ -- Вы можете использовать кортеж Tuple и получить оба значения для соответсвующего max(b).
└─────────┴─────────┘
SELECT argMax(a, b), max(b) FROM test WHERE a IS NULL AND b IS NULL;
┌─argMax(a, b)─┬─max(b)─┐
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ -- Все агрегированные строки содержат хотя бы одно `NULL` значение, поэтому все строки пропускаются и результатом будет `NULL`.
└──────────────┴────────┘
SELECT argMax(a, (b,a)) FROM test;
┌─argMax(a, tuple(b, a))─┐
│ c │ -- Есть две строки с b=2, кортеж `Tuple` в функции `Max` позволяет получить не первый `arg`.
└────────────────────────┘
SELECT argMax(a, tuple(b)) FROM test;
┌─argMax(a, tuple(b))─┐
│ b │ -- Кортеж `Tuple` может использоваться в `Max`, чтобы не пропускать `NULL` значения в `Max`.
└─────────────────────┘
```
**Смотрите также**
- [Tuple](/docs/ru/sql-reference/data-types/tuple.md)

View File

@ -93,7 +93,7 @@ ClickHouse Web 界面 [Tabix](https://github.com/tabixio/tabix).
### ClickVisual {#clickvisual}
[ClickVisual](https://clickvisual.gocn.vip/) ClickVisual是一款轻量级的开源日志查询、分析、报警的可视化平台
[ClickVisual](https://clickvisual.net/) ClickVisual是一款轻量级的开源日志查询、分析、报警的可视化平台
特征:

View File

@ -918,11 +918,13 @@ bool Client::processWithFuzzing(const String & full_query)
}
void Client::printHelpMessage(const OptionsDescription & options_description)
void Client::printHelpMessage(const OptionsDescription & options_description, bool verbose)
{
std::cout << options_description.main_description.value() << "\n";
std::cout << options_description.external_description.value() << "\n";
std::cout << options_description.hosts_and_ports_description.value() << "\n";
if (verbose)
std::cout << "All settings are documented at https://clickhouse.com/docs/en/operations/settings/settings.\n\n";
std::cout << "In addition, --param_name=value can be specified for substitution of parameters for parametrized queries.\n";
std::cout << "\nSee also: https://clickhouse.com/docs/en/integrations/sql-clients/cli\n";
}

View File

@ -25,7 +25,7 @@ protected:
String getName() const override { return "client"; }
void printHelpMessage(const OptionsDescription & options_description) override;
void printHelpMessage(const OptionsDescription & options_description, bool verbose) override;
void addOptions(OptionsDescription & options_description) override;

View File

@ -729,6 +729,15 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
}
}
/// Don't allow relative paths because install script may cd to / when installing
/// And having path=./ may break the system
if (log_path.is_relative())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Log path is relative: {}", log_path.string());
if (data_path.is_relative())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Data path is relative: {}", data_path.string());
if (pid_path.is_relative())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Pid path is relative: {}", pid_path.string());
/// Create directories for data and log.
if (fs::exists(log_path))

View File

@ -121,9 +121,12 @@ if (BUILD_STANDALONE_KEEPER)
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskType.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/IObjectStorage.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataOperationsHolder.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataFromDiskTransactionState.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageTransactionState.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorage.cpp
@ -137,6 +140,7 @@ if (BUILD_STANDALONE_KEEPER)
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3Capabilities.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/diskSettings.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/DiskS3Utils.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageFactory.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFactory.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp

View File

@ -481,7 +481,6 @@ try
registerFormats();
processConfig();
adjustSettings();
initTTYBuffer(toProgressOption(config().getString("progress", "default")));
ASTAlterCommand::setFormatAlterCommandsWithParentheses(true);
@ -689,6 +688,7 @@ void LocalServer::processConfig()
/// NOTE: it is important to apply any overrides before
/// setDefaultProfiles() calls since it will copy current context (i.e.
/// there is separate context for Buffer tables).
adjustSettings();
applySettingsOverridesForLocal(global_context);
applyCmdOptions(global_context);
@ -774,10 +774,12 @@ void LocalServer::processConfig()
}
void LocalServer::printHelpMessage([[maybe_unused]] const OptionsDescription & options_description)
void LocalServer::printHelpMessage(const OptionsDescription & options_description, bool verbose)
{
std::cout << getHelpHeader() << "\n";
std::cout << options_description.main_description.value() << "\n";
if (verbose)
std::cout << "All settings are documented at https://clickhouse.com/docs/en/operations/settings/settings.\n\n";
std::cout << getHelpFooter() << "\n";
std::cout << "In addition, --param_name=value can be specified for substitution of parameters for parametrized queries.\n";
std::cout << "\nSee also: https://clickhouse.com/docs/en/operations/utilities/clickhouse-local/\n";

View File

@ -36,7 +36,7 @@ protected:
String getName() const override { return "local"; }
void printHelpMessage(const OptionsDescription & options_description) override;
void printHelpMessage(const OptionsDescription & options_description, bool verbose) override;
void addOptions(OptionsDescription & options_description) override;

View File

@ -280,11 +280,11 @@ void AccessControl::setUpFromMainConfig(const Poco::Util::AbstractConfiguration
/// Optional improvements in access control system.
/// The default values are false because we need to be compatible with earlier access configurations
setEnabledUsersWithoutRowPoliciesCanReadRows(config_.getBool("access_control_improvements.users_without_row_policies_can_read_rows", false));
setOnClusterQueriesRequireClusterGrant(config_.getBool("access_control_improvements.on_cluster_queries_require_cluster_grant", false));
setSelectFromSystemDatabaseRequiresGrant(config_.getBool("access_control_improvements.select_from_system_db_requires_grant", false));
setSelectFromInformationSchemaRequiresGrant(config_.getBool("access_control_improvements.select_from_information_schema_requires_grant", false));
setSettingsConstraintsReplacePrevious(config_.getBool("access_control_improvements.settings_constraints_replace_previous", false));
setEnabledUsersWithoutRowPoliciesCanReadRows(config_.getBool("access_control_improvements.users_without_row_policies_can_read_rows", true));
setOnClusterQueriesRequireClusterGrant(config_.getBool("access_control_improvements.on_cluster_queries_require_cluster_grant", true));
setSelectFromSystemDatabaseRequiresGrant(config_.getBool("access_control_improvements.select_from_system_db_requires_grant", true));
setSelectFromInformationSchemaRequiresGrant(config_.getBool("access_control_improvements.select_from_information_schema_requires_grant", true));
setSettingsConstraintsReplacePrevious(config_.getBool("access_control_improvements.settings_constraints_replace_previous", true));
setTableEnginesRequireGrant(config_.getBool("access_control_improvements.table_engines_require_grant", false));
addStoragesFromMainConfig(config_, config_path_, get_zookeeper_function_);
@ -607,7 +607,8 @@ AuthResult AccessControl::authenticate(const Credentials & credentials, const Po
/// We use the same message for all authentication failures because we don't want to give away any unnecessary information for security reasons,
/// only the log will show the exact reason.
throw Exception(PreformattedMessage{message.str(),
"{}: Authentication failed: password is incorrect, or there is no user with such name.{}"},
"{}: Authentication failed: password is incorrect, or there is no user with such name.{}",
std::vector<std::string>{credentials.getUserName()}},
ErrorCodes::AUTHENTICATION_FAILED);
}
}

View File

@ -210,6 +210,7 @@ enum class AccessType
M(SYSTEM_FAILPOINT, "SYSTEM ENABLE FAILPOINT, SYSTEM DISABLE FAILPOINT, SYSTEM WAIT FAILPOINT", GLOBAL, SYSTEM) \
M(SYSTEM_LISTEN, "SYSTEM START LISTEN, SYSTEM STOP LISTEN", GLOBAL, SYSTEM) \
M(SYSTEM_JEMALLOC, "SYSTEM JEMALLOC PURGE, SYSTEM JEMALLOC ENABLE PROFILE, SYSTEM JEMALLOC DISABLE PROFILE, SYSTEM JEMALLOC FLUSH PROFILE", GLOBAL, SYSTEM) \
M(SYSTEM_UNLOAD_PRIMARY_KEY, "SYSTEM UNLOAD PRIMARY KEY", TABLE, SYSTEM) \
M(SYSTEM, "", GROUP, ALL) /* allows to execute SYSTEM {SHUTDOWN|RELOAD CONFIG|...} */ \
\
M(dictGet, "dictHas, dictGetHierarchy, dictIsIn", DICTIONARY, ALL) /* allows to execute functions dictGet(), dictHas(), dictGetHierarchy(), dictIsIn() */\

View File

@ -53,8 +53,9 @@ TEST(AccessRights, Union)
"SHOW ROW POLICIES, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, "
"SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM CLEANUP, SYSTEM VIEWS, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM VIRTUAL PARTS UPDATE, "
"SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, "
"SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*, "
"GRANT TABLE ENGINE ON db1, GRANT SET DEFINER ON db1, GRANT NAMED COLLECTION ADMIN ON db1");
"SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, "
"SYSTEM UNLOAD PRIMARY KEY, dictGet ON db1.*, GRANT TABLE ENGINE ON db1, "
"GRANT SET DEFINER ON db1, GRANT NAMED COLLECTION ADMIN ON db1");
}

View File

@ -56,7 +56,9 @@ void IdentifierNode::updateTreeHashImpl(HashState & state, CompareOptions) const
QueryTreeNodePtr IdentifierNode::cloneImpl() const
{
return std::make_shared<IdentifierNode>(identifier);
auto clone_identifier_node = std::make_shared<IdentifierNode>(identifier);
clone_identifier_node->table_expression_modifiers = table_expression_modifiers;
return clone_identifier_node;
}
ASTPtr IdentifierNode::toASTImpl(const ConvertToASTOptions & /* options */) const

View File

@ -1,9 +1,10 @@
#include <Analyzer/Passes/QueryAnalysisPass.h>
#include <boost/algorithm/string.hpp>
#include <Common/checkStackSize.h>
#include <Common/NamePrompter.h>
#include <Common/ProfileEvents.h>
#include <Analyzer/FunctionSecretArgumentsFinderTreeNode.h>
#include <IO/WriteBuffer.h>
#include <IO/WriteHelpers.h>
@ -81,8 +82,8 @@
#include <Analyzer/QueryTreeBuilder.h>
#include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/Identifier.h>
#include <boost/algorithm/string.hpp>
#include <Analyzer/FunctionSecretArgumentsFinderTreeNode.h>
#include <Analyzer/RecursiveCTE.h>
namespace ProfileEvents
{
@ -740,7 +741,7 @@ struct IdentifierResolveScope
/// Identifier lookup to result
std::unordered_map<IdentifierLookup, IdentifierResolveState, IdentifierLookupHash> identifier_lookup_to_resolve_state;
/// Lambda argument can be expression like constant, column, or it can be function
/// Argument can be expression like constant, column, function or table expression
std::unordered_map<std::string, QueryTreeNodePtr> expression_argument_name_to_node;
/// Alias name to query expression node
@ -1464,7 +1465,8 @@ private:
/// Lambdas that are currently in resolve process
std::unordered_set<IQueryTreeNode *> lambdas_in_resolve_process;
std::unordered_set<std::string_view> cte_in_resolve_process;
/// CTEs that are currently in resolve process
std::unordered_set<std::string_view> ctes_in_resolve_process;
/// Function name to user defined lambda map
std::unordered_map<std::string, QueryTreeNodePtr> function_name_to_user_defined_lambda;
@ -2148,9 +2150,9 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden
else
{
/** Make unique column names for tuple.
*
* Example: SELECT (SELECT 2 AS x, x)
*/
*
* Example: SELECT (SELECT 2 AS x, x)
*/
makeUniqueColumnNamesInBlock(block);
scalar_block.insert({
@ -3981,6 +3983,9 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifierInParentScopes(const
auto * union_node = resolved_identifier->as<UnionNode>();
bool is_cte = (subquery_node && subquery_node->isCTE()) || (union_node && union_node->isCTE());
bool is_table_from_expression_arguments = lookup_result.resolve_place == IdentifierResolvePlace::EXPRESSION_ARGUMENTS &&
resolved_identifier->getNodeType() == QueryTreeNodeType::TABLE;
bool is_valid_table_expression = is_cte || is_table_from_expression_arguments;
/** From parent scopes we can resolve table identifiers only as CTE.
* Example: SELECT (SELECT 1 FROM a) FROM test_table AS a;
@ -3988,14 +3993,10 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifierInParentScopes(const
* During child scope table identifier resolve a, table node test_table with alias a from parent scope
* is invalid.
*/
if (identifier_lookup.isTableExpressionLookup() && !is_cte)
if (identifier_lookup.isTableExpressionLookup() && !is_valid_table_expression)
continue;
if (is_cte)
{
return lookup_result;
}
else if (resolved_identifier->as<ConstantNode>())
if (is_valid_table_expression || resolved_identifier->as<ConstantNode>())
{
return lookup_result;
}
@ -4071,13 +4072,9 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook
if (it->second.resolve_result.isResolved() &&
scope.use_identifier_lookup_to_result_cache &&
!scope.non_cached_identifier_lookups_during_expression_resolve.contains(identifier_lookup))
{
if (!it->second.resolve_result.isResolvedFromCTEs() || !cte_in_resolve_process.contains(identifier_lookup.identifier.getFullName()))
{
return it->second.resolve_result;
}
}
!scope.non_cached_identifier_lookups_during_expression_resolve.contains(identifier_lookup) &&
(!it->second.resolve_result.isResolvedFromCTEs() || !ctes_in_resolve_process.contains(identifier_lookup.identifier.getFullName())))
return it->second.resolve_result;
}
else
{
@ -4150,7 +4147,7 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook
/// To accomplish this behaviour it's not allowed to resolve identifiers to
/// CTE that is being resolved.
if (cte_query_node_it != scope.cte_name_to_query_node.end()
&& !cte_in_resolve_process.contains(full_name))
&& !ctes_in_resolve_process.contains(full_name))
{
resolve_result.resolved_identifier = cte_query_node_it->second;
resolve_result.resolve_place = IdentifierResolvePlace::CTE;
@ -6296,14 +6293,14 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
///
/// In this example argument of function `in` is being resolve here. If CTE `test1` is not forbidden,
/// `test1` is resolved to CTE (not to the table) in `initializeQueryJoinTreeNode` function.
cte_in_resolve_process.insert(cte_name);
ctes_in_resolve_process.insert(cte_name);
if (subquery_node)
resolveQuery(resolved_identifier_node, subquery_scope);
else
resolveUnion(resolved_identifier_node, subquery_scope);
cte_in_resolve_process.erase(cte_name);
ctes_in_resolve_process.erase(cte_name);
}
}
}
@ -7874,7 +7871,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
auto & query_node_typed = query_node->as<QueryNode &>();
if (query_node_typed.isCTE())
cte_in_resolve_process.insert(query_node_typed.getCTEName());
ctes_in_resolve_process.insert(query_node_typed.getCTEName());
bool is_rollup_or_cube = query_node_typed.isGroupByWithRollup() || query_node_typed.isGroupByWithCube();
@ -7956,7 +7953,6 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
auto * union_node = node->as<UnionNode>();
bool subquery_is_cte = (subquery_node && subquery_node->isCTE()) || (union_node && union_node->isCTE());
if (!subquery_is_cte)
continue;
@ -8213,7 +8209,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
query_node_typed.resolveProjectionColumns(std::move(projection_columns));
if (query_node_typed.isCTE())
cte_in_resolve_process.erase(query_node_typed.getCTEName());
ctes_in_resolve_process.erase(query_node_typed.getCTEName());
}
void QueryAnalyzer::resolveUnion(const QueryTreeNodePtr & union_node, IdentifierResolveScope & scope)
@ -8221,13 +8217,56 @@ void QueryAnalyzer::resolveUnion(const QueryTreeNodePtr & union_node, Identifier
auto & union_node_typed = union_node->as<UnionNode &>();
if (union_node_typed.isCTE())
cte_in_resolve_process.insert(union_node_typed.getCTEName());
ctes_in_resolve_process.insert(union_node_typed.getCTEName());
auto & queries_nodes = union_node_typed.getQueries().getNodes();
for (auto & query_node : queries_nodes)
std::optional<RecursiveCTETable> recursive_cte_table;
TableNodePtr recursive_cte_table_node;
if (union_node_typed.isCTE() && union_node_typed.isRecursiveCTE())
{
auto & non_recursive_query = queries_nodes[0];
bool non_recursive_query_is_query_node = non_recursive_query->getNodeType() == QueryTreeNodeType::QUERY;
auto & non_recursive_query_mutable_context = non_recursive_query_is_query_node ? non_recursive_query->as<QueryNode &>().getMutableContext()
: non_recursive_query->as<UnionNode &>().getMutableContext();
IdentifierResolveScope non_recursive_subquery_scope(non_recursive_query, &scope /*parent_scope*/);
non_recursive_subquery_scope.subquery_depth = scope.subquery_depth + 1;
if (non_recursive_query_is_query_node)
resolveQuery(non_recursive_query, non_recursive_subquery_scope);
else
resolveUnion(non_recursive_query, non_recursive_subquery_scope);
auto temporary_table_columns = non_recursive_query_is_query_node
? non_recursive_query->as<QueryNode &>().getProjectionColumns()
: non_recursive_query->as<UnionNode &>().computeProjectionColumns();
auto temporary_table_holder = std::make_shared<TemporaryTableHolder>(
non_recursive_query_mutable_context,
ColumnsDescription{NamesAndTypesList{temporary_table_columns.begin(), temporary_table_columns.end()}},
ConstraintsDescription{},
nullptr /*query*/,
true /*create_for_global_subquery*/);
auto temporary_table_storage = temporary_table_holder->getTable();
recursive_cte_table_node = std::make_shared<TableNode>(temporary_table_storage, non_recursive_query_mutable_context);
recursive_cte_table_node->setTemporaryTableName(union_node_typed.getCTEName());
recursive_cte_table.emplace(std::move(temporary_table_holder), std::move(temporary_table_storage), std::move(temporary_table_columns));
}
size_t queries_nodes_size = queries_nodes.size();
for (size_t i = recursive_cte_table.has_value(); i < queries_nodes_size; ++i)
{
auto & query_node = queries_nodes[i];
IdentifierResolveScope subquery_scope(query_node, &scope /*parent_scope*/);
if (recursive_cte_table_node)
subquery_scope.expression_argument_name_to_node[union_node_typed.getCTEName()] = recursive_cte_table_node;
auto query_node_type = query_node->getNodeType();
if (query_node_type == QueryTreeNodeType::QUERY)
@ -8247,8 +8286,19 @@ void QueryAnalyzer::resolveUnion(const QueryTreeNodePtr & union_node, Identifier
}
}
if (recursive_cte_table && isStorageUsedInTree(recursive_cte_table->storage, union_node.get()))
{
if (union_node_typed.getUnionMode() != SelectUnionMode::UNION_ALL)
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Recursive CTE subquery {} with {} union mode is unsupported, only UNION ALL union mode is supported",
union_node_typed.formatASTForErrorMessage(),
toString(union_node_typed.getUnionMode()));
union_node_typed.setRecursiveCTETable(std::move(*recursive_cte_table));
}
if (union_node_typed.isCTE())
cte_in_resolve_process.erase(union_node_typed.getCTEName());
ctes_in_resolve_process.erase(union_node_typed.getCTEName());
}
}

View File

@ -10,9 +10,10 @@
#include <Interpreters/Context.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/Utils.h>
namespace DB
{
@ -51,13 +52,25 @@ public:
{
const auto & second_const_value = second_const_node->getValue();
if (second_const_value.isNull()
|| (lower_name == "sum" && isInt64OrUInt64FieldType(second_const_value.getType()) && second_const_value.get<UInt64>() == 0))
|| (lower_name == "sum" && isInt64OrUInt64FieldType(second_const_value.getType()) && second_const_value.get<UInt64>() == 0
&& !if_node->getResultType()->isNullable()))
{
/// avg(if(cond, a, null)) -> avgIf(a, cond)
/// avg(if(cond, a, null)) -> avgIf(a::ResultTypeIf, cond)
/// avg(if(cond, nullable_a, null)) -> avgIf(nullable_a, cond)
/// sum(if(cond, a, 0)) -> sumIf(a, cond)
function_arguments_nodes.resize(2);
function_arguments_nodes[0] = std::move(if_arguments_nodes[1]);
function_arguments_nodes[1] = std::move(if_arguments_nodes[0]);
/// sum(if(cond, nullable_a, 0)) **is not** equivalent to sumIfOrNull(cond, nullable_a) as
/// it changes the output when no rows pass the condition (from 0 to NULL)
QueryTreeNodes new_arguments{2};
/// We need to preserve the output type from if()
if (if_arguments_nodes[1]->getResultType()->getName() != if_node->getResultType()->getName())
new_arguments[0] = createCastFunction(std::move(if_arguments_nodes[1]), if_node->getResultType(), getContext());
else
new_arguments[0] = std::move(if_arguments_nodes[1]);
new_arguments[1] = std::move(if_arguments_nodes[0]);
function_arguments_nodes = std::move(new_arguments);
resolveAsAggregateFunctionWithIf(
*function_node, {function_arguments_nodes[0]->getResultType(), function_arguments_nodes[1]->getResultType()});
}
@ -66,19 +79,28 @@ public:
{
const auto & first_const_value = first_const_node->getValue();
if (first_const_value.isNull()
|| (lower_name == "sum" && isInt64OrUInt64FieldType(first_const_value.getType()) && first_const_value.get<UInt64>() == 0))
|| (lower_name == "sum" && isInt64OrUInt64FieldType(first_const_value.getType()) && first_const_value.get<UInt64>() == 0
&& !if_node->getResultType()->isNullable()))
{
/// avg(if(cond, null, a) -> avgIf(a, !cond))
/// avg(if(cond, null, a) -> avgIf(a::ResultTypeIf, !cond))
/// sum(if(cond, 0, a) -> sumIf(a, !cond))
/// sum(if(cond, 0, nullable_a) **is not** sumIf(a, !cond)) -> Same as above
QueryTreeNodes new_arguments{2};
if (if_arguments_nodes[2]->getResultType()->getName() != if_node->getResultType()->getName())
new_arguments[0] = createCastFunction(std::move(if_arguments_nodes[2]), if_node->getResultType(), getContext());
else
new_arguments[0] = std::move(if_arguments_nodes[2]);
auto not_function = std::make_shared<FunctionNode>("not");
auto & not_function_arguments = not_function->getArguments().getNodes();
not_function_arguments.push_back(std::move(if_arguments_nodes[0]));
not_function->resolveAsFunction(
FunctionFactory::instance().get("not", getContext())->build(not_function->getArgumentColumns()));
new_arguments[1] = std::move(not_function);
function_arguments_nodes.resize(2);
function_arguments_nodes[0] = std::move(if_arguments_nodes[2]);
function_arguments_nodes[1] = std::move(not_function);
function_arguments_nodes = std::move(new_arguments);
resolveAsAggregateFunctionWithIf(
*function_node, {function_arguments_nodes[0]->getResultType(), function_arguments_nodes[1]->getResultType()});
}
@ -90,13 +112,9 @@ private:
{
auto result_type = function_node.getResultType();
std::string suffix = "If";
if (result_type->isNullable())
suffix = "OrNullIf";
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(
function_node.getFunctionName() + suffix,
function_node.getFunctionName() + "If",
function_node.getNullsAction(),
argument_types,
function_node.getAggregateFunction()->getParameters(),

View File

@ -14,12 +14,14 @@
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTWithElement.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTSetQuery.h>
#include <Analyzer/Utils.h>
#include <Analyzer/UnionNode.h>
namespace DB
{
@ -107,6 +109,9 @@ void QueryNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s
if (is_cte)
buffer << ", is_cte: " << is_cte;
if (is_recursive_with)
buffer << ", is_recursive_with: " << is_recursive_with;
if (is_distinct)
buffer << ", is_distinct: " << is_distinct;
@ -259,6 +264,7 @@ bool QueryNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const
return is_subquery == rhs_typed.is_subquery &&
is_cte == rhs_typed.is_cte &&
is_recursive_with == rhs_typed.is_recursive_with &&
is_distinct == rhs_typed.is_distinct &&
is_limit_with_ties == rhs_typed.is_limit_with_ties &&
is_group_by_with_totals == rhs_typed.is_group_by_with_totals &&
@ -291,6 +297,7 @@ void QueryNode::updateTreeHashImpl(HashState & state, CompareOptions) const
state.update(projection_column_type_name);
}
state.update(is_recursive_with);
state.update(is_distinct);
state.update(is_limit_with_ties);
state.update(is_group_by_with_totals);
@ -317,19 +324,20 @@ QueryTreeNodePtr QueryNode::cloneImpl() const
{
auto result_query_node = std::make_shared<QueryNode>(context);
result_query_node->is_subquery = is_subquery;
result_query_node->is_cte = is_cte;
result_query_node->is_distinct = is_distinct;
result_query_node->is_limit_with_ties = is_limit_with_ties;
result_query_node->is_group_by_with_totals = is_group_by_with_totals;
result_query_node->is_group_by_with_rollup = is_group_by_with_rollup;
result_query_node->is_group_by_with_cube = is_group_by_with_cube;
result_query_node->is_subquery = is_subquery;
result_query_node->is_cte = is_cte;
result_query_node->is_recursive_with = is_recursive_with;
result_query_node->is_distinct = is_distinct;
result_query_node->is_limit_with_ties = is_limit_with_ties;
result_query_node->is_group_by_with_totals = is_group_by_with_totals;
result_query_node->is_group_by_with_rollup = is_group_by_with_rollup;
result_query_node->is_group_by_with_cube = is_group_by_with_cube;
result_query_node->is_group_by_with_grouping_sets = is_group_by_with_grouping_sets;
result_query_node->is_group_by_all = is_group_by_all;
result_query_node->is_order_by_all = is_order_by_all;
result_query_node->cte_name = cte_name;
result_query_node->projection_columns = projection_columns;
result_query_node->settings_changes = settings_changes;
result_query_node->is_group_by_all = is_group_by_all;
result_query_node->is_order_by_all = is_order_by_all;
result_query_node->cte_name = cte_name;
result_query_node->projection_columns = projection_columns;
result_query_node->settings_changes = settings_changes;
return result_query_node;
}
@ -337,6 +345,7 @@ QueryTreeNodePtr QueryNode::cloneImpl() const
ASTPtr QueryNode::toASTImpl(const ConvertToASTOptions & options) const
{
auto select_query = std::make_shared<ASTSelectQuery>();
select_query->recursive_with = is_recursive_with;
select_query->distinct = is_distinct;
select_query->limit_with_ties = is_limit_with_ties;
select_query->group_by_with_totals = is_group_by_with_totals;
@ -347,7 +356,41 @@ ASTPtr QueryNode::toASTImpl(const ConvertToASTOptions & options) const
select_query->order_by_all = is_order_by_all;
if (hasWith())
select_query->setExpression(ASTSelectQuery::Expression::WITH, getWith().toAST(options));
{
const auto & with = getWith();
auto expression_list_ast = std::make_shared<ASTExpressionList>();
expression_list_ast->children.reserve(with.getNodes().size());
for (const auto & with_node : with)
{
auto with_node_ast = with_node->toAST(options);
expression_list_ast->children.push_back(with_node_ast);
const auto * with_query_node = with_node->as<QueryNode>();
const auto * with_union_node = with_node->as<UnionNode>();
if (!with_query_node && !with_union_node)
continue;
bool is_with_node_cte = with_query_node ? with_query_node->isCTE() : with_union_node->isCTE();
if (!is_with_node_cte)
continue;
const auto & with_node_cte_name = with_query_node ? with_query_node->cte_name : with_union_node->getCTEName();
auto * with_node_ast_subquery = with_node_ast->as<ASTSubquery>();
if (with_node_ast_subquery)
with_node_ast_subquery->cte_name = "";
auto with_element_ast = std::make_shared<ASTWithElement>();
with_element_ast->name = with_node_cte_name;
with_element_ast->subquery = std::move(with_node_ast);
with_element_ast->children.push_back(with_element_ast->subquery);
expression_list_ast->children.back() = std::move(with_element_ast);
}
select_query->setExpression(ASTSelectQuery::Expression::WITH, std::move(expression_list_ast));
}
auto projection_ast = getProjection().toAST(options);
auto & projection_expression_list_ast = projection_ast->as<ASTExpressionList &>();

View File

@ -140,6 +140,18 @@ public:
cte_name = std::move(cte_name_value);
}
/// Returns true if query node has RECURSIVE WITH, false otherwise
bool isRecursiveWith() const
{
return is_recursive_with;
}
/// Set query node RECURSIVE WITH value
void setIsRecursiveWith(bool is_recursive_with_value)
{
is_recursive_with = is_recursive_with_value;
}
/// Returns true if query node has DISTINCT, false otherwise
bool isDistinct() const
{
@ -618,6 +630,7 @@ protected:
private:
bool is_subquery = false;
bool is_cte = false;
bool is_recursive_with = false;
bool is_distinct = false;
bool is_limit_with_ties = false;
bool is_group_by_with_totals = false;

View File

@ -271,6 +271,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q
current_query_tree->setIsSubquery(is_subquery);
current_query_tree->setIsCTE(!cte_name.empty());
current_query_tree->setCTEName(cte_name);
current_query_tree->setIsRecursiveWith(select_query_typed.recursive_with);
current_query_tree->setIsDistinct(select_query_typed.distinct);
current_query_tree->setIsLimitWithTies(select_query_typed.limit_with_ties);
current_query_tree->setIsGroupByWithTotals(select_query_typed.group_by_with_totals);
@ -287,8 +288,22 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q
auto select_with_list = select_query_typed.with();
if (select_with_list)
{
current_query_tree->getWithNode() = buildExpressionList(select_with_list, current_context);
if (select_query_typed.recursive_with)
{
for (auto & with_node : current_query_tree->getWith().getNodes())
{
auto * with_union_node = with_node->as<UnionNode>();
if (!with_union_node)
continue;
with_union_node->setIsRecursiveCTE(true);
}
}
}
auto select_expression_list = select_query_typed.select();
if (select_expression_list)
current_query_tree->getProjectionNode() = buildExpressionList(select_expression_list, current_context);

View File

@ -165,7 +165,6 @@ private:
/** ClickHouse query tree pass manager.
*
* TODO: Support setting optimize_monotonous_functions_in_order_by.
* TODO: Add optimizations based on function semantics. Example: SELECT * FROM test_table WHERE id != id. (id is not nullable column).
*/
@ -259,8 +258,6 @@ void addQueryTreePasses(QueryTreePassManager & manager, bool only_analyze)
manager.addPass(std::make_unique<RewriteSumFunctionWithSumAndCountPass>());
manager.addPass(std::make_unique<CountDistinctPass>());
manager.addPass(std::make_unique<UniqToCountPass>());
manager.addPass(std::make_unique<RewriteAggregateFunctionWithIfPass>());
manager.addPass(std::make_unique<SumIfToCountIfPass>());
manager.addPass(std::make_unique<RewriteArrayExistsToHasPass>());
manager.addPass(std::make_unique<NormalizeCountVariantsPass>());
@ -277,9 +274,12 @@ void addQueryTreePasses(QueryTreePassManager & manager, bool only_analyze)
manager.addPass(std::make_unique<OptimizeGroupByFunctionKeysPass>());
manager.addPass(std::make_unique<OptimizeGroupByInjectiveFunctionsPass>());
/// The order here is important as we want to keep collapsing in order
manager.addPass(std::make_unique<MultiIfToIfPass>());
manager.addPass(std::make_unique<IfConstantConditionPass>());
manager.addPass(std::make_unique<IfChainToMultiIfPass>());
manager.addPass(std::make_unique<RewriteAggregateFunctionWithIfPass>());
manager.addPass(std::make_unique<SumIfToCountIfPass>());
manager.addPass(std::make_unique<ComparisonTupleEliminationPass>());

View File

@ -0,0 +1,21 @@
#include <Analyzer/RecursiveCTE.h>
#include <Storages/IStorage.h>
namespace DB
{
RecursiveCTETable::RecursiveCTETable(TemporaryTableHolderPtr holder_,
StoragePtr storage_,
NamesAndTypes columns_)
: holder(std::move(holder_))
, storage(std::move(storage_))
, columns(std::move(columns_))
{}
StorageID RecursiveCTETable::getStorageID() const
{
return storage->getStorageID();
}
}

View File

@ -0,0 +1,51 @@
#pragma once
#include <Core/NamesAndTypes.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/TableNode.h>
namespace DB
{
/** Recursive CTEs allow to recursively evaluate UNION subqueries.
*
* Overview:
* https://www.postgresql.org/docs/current/queries-with.html#QUERIES-WITH-RECURSIVE
*
* Current implementation algorithm:
*
* During query analysis, when we resolve UNION node that is inside WITH RECURSIVE section of parent query we:
* 1. First resolve non recursive subquery.
* 2. Create temporary table using projection columns of resolved subquery from step 1.
* 3. Create temporary table expression node using storage from step 2.
* 4. Create resolution scope for recursive subquery. In that scope we add node from step 3 as expression argument with UNION node CTE name.
* 5. Resolve recursive subquery.
* 6. If in resolved UNION node temporary table expression storage from step 2 is used, we update UNION query with recursive CTE table.
*
* During query planning if UNION node contains recursive CTE table, we add ReadFromRecursiveCTEStep to query plan. That step is responsible for whole
* recursive CTE query execution.
*
* TODO: Improve locking in ReadFromRecursiveCTEStep.
* TODO: Improve query analysis if query contains aggregates, JOINS, GROUP BY, ORDER BY, LIMIT, OFFSET.
* TODO: Support SEARCH DEPTH FIRST BY, SEARCH BREADTH FIRST BY syntax.
* TODO: Support CYCLE syntax.
* TODO: Support UNION DISTINCT recursive CTE mode.
*/
class RecursiveCTETable
{
public:
RecursiveCTETable(TemporaryTableHolderPtr holder_,
StoragePtr storage_,
NamesAndTypes columns_);
StorageID getStorageID() const;
TemporaryTableHolderPtr holder;
StoragePtr storage;
NamesAndTypes columns;
};
}

View File

@ -33,6 +33,14 @@ TableNode::TableNode(StoragePtr storage_, const ContextPtr & context)
{
}
void TableNode::updateStorage(StoragePtr storage_value, const ContextPtr & context)
{
storage = std::move(storage_value);
storage_id = storage->getStorageID();
storage_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout);
storage_snapshot = storage->getStorageSnapshot(storage->getInMemoryMetadataPtr(), context);
}
void TableNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const
{
buffer << std::string(indent, ' ') << "TABLE id: " << format_state.getNodeId(this);

View File

@ -32,6 +32,11 @@ public:
/// Construct table node with storage, context
explicit TableNode(StoragePtr storage_, const ContextPtr & context);
/** Update table node storage.
* After this call storage, storage_id, storage_lock, storage_snapshot will be updated using new storage.
*/
void updateStorage(StoragePtr storage_value, const ContextPtr & context);
/// Get storage
const StoragePtr & getStorage() const
{

View File

@ -9,6 +9,7 @@
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTWithElement.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
@ -20,6 +21,8 @@
#include <DataTypes/getLeastSupertype.h>
#include <Storages/IStorage.h>
#include <Interpreters/Context.h>
#include <Analyzer/QueryNode.h>
@ -49,6 +52,9 @@ UnionNode::UnionNode(ContextMutablePtr context_, SelectUnionMode union_mode_)
NamesAndTypes UnionNode::computeProjectionColumns() const
{
if (recursive_cte_table)
return recursive_cte_table->columns;
std::vector<NamesAndTypes> projections;
NamesAndTypes query_node_projection;
@ -90,6 +96,9 @@ NamesAndTypes UnionNode::computeProjectionColumns() const
void UnionNode::removeUnusedProjectionColumns(const std::unordered_set<std::string> & used_projection_columns)
{
if (recursive_cte_table)
return;
auto projection_columns = computeProjectionColumns();
size_t projection_columns_size = projection_columns.size();
std::unordered_set<size_t> used_projection_column_indexes;
@ -113,6 +122,9 @@ void UnionNode::removeUnusedProjectionColumns(const std::unordered_set<std::stri
void UnionNode::removeUnusedProjectionColumns(const std::unordered_set<size_t> & used_projection_columns_indexes)
{
if (recursive_cte_table)
return;
auto & query_nodes = getQueries().getNodes();
for (auto & query_node : query_nodes)
{
@ -136,6 +148,12 @@ void UnionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s
if (is_cte)
buffer << ", is_cte: " << is_cte;
if (is_recursive_cte)
buffer << ", is_recursive_cte: " << is_recursive_cte;
if (recursive_cte_table)
buffer << ", recursive_cte_table: " << recursive_cte_table->storage->getStorageID().getNameForLogs();
if (!cte_name.empty())
buffer << ", cte_name: " << cte_name;
@ -149,14 +167,28 @@ bool UnionNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const
{
const auto & rhs_typed = assert_cast<const UnionNode &>(rhs);
return is_subquery == rhs_typed.is_subquery && is_cte == rhs_typed.is_cte && cte_name == rhs_typed.cte_name &&
union_mode == rhs_typed.union_mode;
if (recursive_cte_table && rhs_typed.recursive_cte_table &&
recursive_cte_table->getStorageID() != rhs_typed.recursive_cte_table->getStorageID())
return false;
else if ((recursive_cte_table && !rhs_typed.recursive_cte_table) || (!recursive_cte_table && rhs_typed.recursive_cte_table))
return false;
return is_subquery == rhs_typed.is_subquery && is_cte == rhs_typed.is_cte && is_recursive_cte == rhs_typed.is_recursive_cte
&& cte_name == rhs_typed.cte_name && union_mode == rhs_typed.union_mode;
}
void UnionNode::updateTreeHashImpl(HashState & state, CompareOptions) const
{
state.update(is_subquery);
state.update(is_cte);
state.update(is_recursive_cte);
if (recursive_cte_table)
{
auto full_name = recursive_cte_table->getStorageID().getFullNameNotQuoted();
state.update(full_name.size());
state.update(full_name);
}
state.update(cte_name.size());
state.update(cte_name);
@ -170,6 +202,8 @@ QueryTreeNodePtr UnionNode::cloneImpl() const
result_union_node->is_subquery = is_subquery;
result_union_node->is_cte = is_cte;
result_union_node->is_recursive_cte = is_recursive_cte;
result_union_node->recursive_cte_table = recursive_cte_table;
result_union_node->cte_name = cte_name;
return result_union_node;
@ -183,14 +217,64 @@ ASTPtr UnionNode::toASTImpl(const ConvertToASTOptions & options) const
select_with_union_query->children.push_back(getQueriesNode()->toAST(options));
select_with_union_query->list_of_selects = select_with_union_query->children.back();
if (is_subquery)
ASTPtr result_query = std::move(select_with_union_query);
bool set_subquery_cte_name = true;
if (recursive_cte_table)
{
auto subquery = std::make_shared<ASTSubquery>(std::move(select_with_union_query));
subquery->cte_name = cte_name;
return subquery;
auto recursive_select_query = std::make_shared<ASTSelectQuery>();
recursive_select_query->recursive_with = true;
auto with_element_ast = std::make_shared<ASTWithElement>();
with_element_ast->name = cte_name;
with_element_ast->subquery = std::make_shared<ASTSubquery>(std::move(result_query));
with_element_ast->children.push_back(with_element_ast->subquery);
auto with_expression_list_ast = std::make_shared<ASTExpressionList>();
with_expression_list_ast->children.push_back(std::move(with_element_ast));
recursive_select_query->setExpression(ASTSelectQuery::Expression::WITH, std::move(with_expression_list_ast));
auto select_expression_list_ast = std::make_shared<ASTExpressionList>();
select_expression_list_ast->children.reserve(recursive_cte_table->columns.size());
for (const auto & recursive_cte_table_column : recursive_cte_table->columns)
select_expression_list_ast->children.push_back(std::make_shared<ASTIdentifier>(recursive_cte_table_column.name));
recursive_select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list_ast));
auto table_expression_ast = std::make_shared<ASTTableExpression>();
table_expression_ast->children.push_back(std::make_shared<ASTTableIdentifier>(cte_name));
table_expression_ast->database_and_table_name = table_expression_ast->children.back();
auto tables_in_select_query_element_ast = std::make_shared<ASTTablesInSelectQueryElement>();
tables_in_select_query_element_ast->children.push_back(std::move(table_expression_ast));
tables_in_select_query_element_ast->table_expression = tables_in_select_query_element_ast->children.back();
ASTPtr tables_in_select_query_ast = std::make_shared<ASTTablesInSelectQuery>();
tables_in_select_query_ast->children.push_back(std::move(tables_in_select_query_element_ast));
recursive_select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables_in_select_query_ast));
auto recursive_select_with_union_query = std::make_shared<ASTSelectWithUnionQuery>();
auto recursive_select_with_union_query_list_of_selects = std::make_shared<ASTExpressionList>();
recursive_select_with_union_query_list_of_selects->children.push_back(std::move(recursive_select_query));
recursive_select_with_union_query->children.push_back(std::move(recursive_select_with_union_query_list_of_selects));
recursive_select_with_union_query->list_of_selects = recursive_select_with_union_query->children.back();
result_query = std::move(recursive_select_with_union_query);
set_subquery_cte_name = false;
}
return select_with_union_query;
if (is_subquery)
{
auto subquery = std::make_shared<ASTSubquery>(std::move(result_query));
if (set_subquery_cte_name)
subquery->cte_name = cte_name;
result_query = std::move(subquery);
}
return result_query;
}
}

View File

@ -9,6 +9,7 @@
#include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/ListNode.h>
#include <Analyzer/TableExpressionModifiers.h>
#include <Analyzer/RecursiveCTE.h>
#include <Interpreters/Context_fwd.h>
@ -84,6 +85,42 @@ public:
is_cte = is_cte_value;
}
/// Returns true if union node CTE is specified in WITH RECURSIVE, false otherwise
bool isRecursiveCTE() const
{
return is_recursive_cte;
}
/// Set union node is recursive CTE value
void setIsRecursiveCTE(bool is_recursive_cte_value)
{
is_recursive_cte = is_recursive_cte_value;
}
/// Returns true if union node has recursive CTE table, false otherwise
bool hasRecursiveCTETable() const
{
return recursive_cte_table.has_value();
}
/// Returns optional recursive CTE table
const std::optional<RecursiveCTETable> & getRecursiveCTETable() const
{
return recursive_cte_table;
}
/// Returns optional recursive CTE table
std::optional<RecursiveCTETable> & getRecursiveCTETable()
{
return recursive_cte_table;
}
/// Set union node recursive CTE table value
void setRecursiveCTETable(RecursiveCTETable recursive_cte_table_value)
{
recursive_cte_table.emplace(std::move(recursive_cte_table_value));
}
/// Get union node CTE name
const std::string & getCTEName() const
{
@ -154,6 +191,8 @@ protected:
private:
bool is_subquery = false;
bool is_cte = false;
bool is_recursive_cte = false;
std::optional<RecursiveCTETable> recursive_cte_table;
std::string cte_name;
ContextMutablePtr context;
SelectUnionMode union_mode;

View File

@ -5,6 +5,7 @@
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTFunction.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeArray.h>
@ -15,6 +16,8 @@
#include <Functions/FunctionHelpers.h>
#include <Functions/FunctionFactory.h>
#include <Storages/IStorage.h>
#include <Interpreters/Context.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
@ -61,6 +64,36 @@ bool isNodePartOfTree(const IQueryTreeNode * node, const IQueryTreeNode * root)
return false;
}
bool isStorageUsedInTree(const StoragePtr & storage, const IQueryTreeNode * root)
{
std::vector<const IQueryTreeNode *> nodes_to_process;
nodes_to_process.push_back(root);
while (!nodes_to_process.empty())
{
const auto * subtree_node = nodes_to_process.back();
nodes_to_process.pop_back();
const auto * table_node = subtree_node->as<TableNode>();
const auto * table_function_node = subtree_node->as<TableFunctionNode>();
if (table_node || table_function_node)
{
const auto & table_storage = table_node ? table_node->getStorage() : table_function_node->getStorage();
if (table_storage->getStorageID() == storage->getStorageID())
return true;
}
for (const auto & child : subtree_node->getChildren())
{
if (child)
nodes_to_process.push_back(child.get());
}
}
return false;
}
bool isNameOfInFunction(const std::string & function_name)
{
bool is_special_function_in = function_name == "in" ||
@ -808,26 +841,87 @@ QueryTreeNodePtr getExpressionSource(const QueryTreeNodePtr & node)
return source;
}
QueryTreeNodePtr buildSubqueryToReadColumnsFromTableExpression(QueryTreeNodePtr table_node, const ContextPtr & context)
/** There are no limits on the maximum size of the result for the subquery.
* Since the result of the query is not the result of the entire query.
*/
void updateContextForSubqueryExecution(ContextMutablePtr & mutable_context)
{
/** The subquery in the IN / JOIN section does not have any restrictions on the maximum size of the result.
* Because the result of this query is not the result of the entire query.
* Constraints work instead
* max_rows_in_set, max_bytes_in_set, set_overflow_mode,
* max_rows_in_join, max_bytes_in_join, join_overflow_mode,
* which are checked separately (in the Set, Join objects).
*/
Settings subquery_settings = mutable_context->getSettings();
subquery_settings.max_result_rows = 0;
subquery_settings.max_result_bytes = 0;
/// The calculation of extremes does not make sense and is not necessary (if you do it, then the extremes of the subquery can be taken for whole query).
subquery_settings.extremes = false;
mutable_context->setSettings(subquery_settings);
}
QueryTreeNodePtr buildQueryToReadColumnsFromTableExpression(const NamesAndTypes & columns,
const QueryTreeNodePtr & table_expression,
ContextMutablePtr & context)
{
auto projection_columns = columns;
QueryTreeNodes subquery_projection_nodes;
subquery_projection_nodes.reserve(projection_columns.size());
for (const auto & column : projection_columns)
subquery_projection_nodes.push_back(std::make_shared<ColumnNode>(column, table_expression));
if (subquery_projection_nodes.empty())
{
auto constant_data_type = std::make_shared<DataTypeUInt64>();
subquery_projection_nodes.push_back(std::make_shared<ConstantNode>(1UL, constant_data_type));
projection_columns.push_back({"1", std::move(constant_data_type)});
}
updateContextForSubqueryExecution(context);
auto query_node = std::make_shared<QueryNode>(std::move(context));
query_node->getProjection().getNodes() = std::move(subquery_projection_nodes);
query_node->resolveProjectionColumns(projection_columns);
query_node->getJoinTree() = table_expression;
return query_node;
}
QueryTreeNodePtr buildSubqueryToReadColumnsFromTableExpression(const NamesAndTypes & columns,
const QueryTreeNodePtr & table_expression,
ContextMutablePtr & context)
{
auto result = buildQueryToReadColumnsFromTableExpression(columns, table_expression, context);
result->as<QueryNode &>().setIsSubquery(true);
return result;
}
QueryTreeNodePtr buildQueryToReadColumnsFromTableExpression(const NamesAndTypes & columns,
const QueryTreeNodePtr & table_expression,
const ContextPtr & context)
{
auto context_copy = Context::createCopy(context);
return buildQueryToReadColumnsFromTableExpression(columns, table_expression, context_copy);
}
QueryTreeNodePtr buildSubqueryToReadColumnsFromTableExpression(const NamesAndTypes & columns,
const QueryTreeNodePtr & table_expression,
const ContextPtr & context)
{
auto context_copy = Context::createCopy(context);
return buildSubqueryToReadColumnsFromTableExpression(columns, table_expression, context_copy);
}
QueryTreeNodePtr buildSubqueryToReadColumnsFromTableExpression(const QueryTreeNodePtr & table_node, const ContextPtr & context)
{
const auto & storage_snapshot = table_node->as<TableNode>()->getStorageSnapshot();
auto columns_to_select = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::Ordinary));
size_t columns_to_select_size = columns_to_select.size();
auto column_nodes_to_select = std::make_shared<ListNode>();
column_nodes_to_select->getNodes().reserve(columns_to_select_size);
NamesAndTypes projection_columns;
projection_columns.reserve(columns_to_select_size);
for (auto & column : columns_to_select)
{
column_nodes_to_select->getNodes().emplace_back(std::make_shared<ColumnNode>(column, table_node));
projection_columns.emplace_back(column.name, column.type);
}
auto subquery_for_table = std::make_shared<QueryNode>(Context::createCopy(context));
subquery_for_table->setIsSubquery(true);
subquery_for_table->getProjectionNode() = std::move(column_nodes_to_select);
subquery_for_table->getJoinTree() = std::move(table_node);
subquery_for_table->resolveProjectionColumns(std::move(projection_columns));
return subquery_for_table;
auto columns_to_select_list = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::Ordinary));
NamesAndTypes columns_to_select(columns_to_select_list.begin(), columns_to_select_list.end());
return buildSubqueryToReadColumnsFromTableExpression(columns_to_select, table_node, context);
}
}

View File

@ -1,9 +1,13 @@
#pragma once
#include <Analyzer/IQueryTreeNode.h>
#include <Core/NamesAndTypes.h>
#include <Storages/IStorage_fwd.h>
#include <Interpreters/Context_fwd.h>
#include <Analyzer/IQueryTreeNode.h>
namespace DB
{
@ -12,6 +16,9 @@ class FunctionNode;
/// Returns true if node part of root tree, false otherwise
bool isNodePartOfTree(const IQueryTreeNode * node, const IQueryTreeNode * root);
/// Returns true if storage is used in tree, false otherwise
bool isStorageUsedInTree(const StoragePtr & storage, const IQueryTreeNode * root);
/// Returns true if function name is name of IN function or its variations, false otherwise
bool isNameOfInFunction(const std::string & function_name);
@ -108,7 +115,41 @@ QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_ty
/// Checks that node has only one source and returns it
QueryTreeNodePtr getExpressionSource(const QueryTreeNodePtr & node);
/// Build subquery which we execute for `IN table` function.
QueryTreeNodePtr buildSubqueryToReadColumnsFromTableExpression(QueryTreeNodePtr table_node, const ContextPtr & context);
/// Update mutable context for subquery execution
void updateContextForSubqueryExecution(ContextMutablePtr & mutable_context);
/** Build query to read specified columns from table expression.
* Specified mutable context will be used as query context.
*/
QueryTreeNodePtr buildQueryToReadColumnsFromTableExpression(const NamesAndTypes & columns,
const QueryTreeNodePtr & table_expression,
ContextMutablePtr & context);
/** Build subquery to read specified columns from table expression.
* Specified mutable context will be used as query context.
*/
QueryTreeNodePtr buildSubqueryToReadColumnsFromTableExpression(const NamesAndTypes & columns,
const QueryTreeNodePtr & table_expression,
ContextMutablePtr & context);
/** Build query to read specified columns from table expression.
* Specified context will be copied and used as query context.
*/
QueryTreeNodePtr buildQueryToReadColumnsFromTableExpression(const NamesAndTypes & columns,
const QueryTreeNodePtr & table_expression,
const ContextPtr & context);
/** Build subquery to read specified columns from table expression.
* Specified context will be copied and used as query context.
*/
QueryTreeNodePtr buildSubqueryToReadColumnsFromTableExpression(const NamesAndTypes & columns,
const QueryTreeNodePtr & table_expression,
const ContextPtr & context);
/** Build subquery to read all columns from table expression.
* Specified context will be copied and used as query context.
*/
QueryTreeNodePtr buildSubqueryToReadColumnsFromTableExpression(const QueryTreeNodePtr & table_node, const ContextPtr & context);
}

View File

@ -221,7 +221,8 @@ std::unique_ptr<WriteBuffer> BackupWriterAzureBlobStorage::writeFile(const Strin
key,
DBMS_DEFAULT_BUFFER_SIZE,
write_settings,
settings);
settings,
threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"));
}
void BackupWriterAzureBlobStorage::removeFile(const String & file_name)

View File

@ -109,7 +109,7 @@ RestorerFromBackup::~RestorerFromBackup()
if (getNumFutures() > 0)
{
LOG_INFO(log, "Waiting for {} tasks to finish", getNumFutures());
waitFutures();
waitFutures(/* throw_if_error= */ false);
}
}
@ -161,7 +161,7 @@ void RestorerFromBackup::run(Mode mode)
setStage(Stage::COMPLETED);
}
void RestorerFromBackup::waitFutures()
void RestorerFromBackup::waitFutures(bool throw_if_error)
{
std::exception_ptr error;
@ -176,11 +176,7 @@ void RestorerFromBackup::waitFutures()
if (futures_to_wait.empty())
break;
/// Wait for all tasks.
for (auto & future : futures_to_wait)
future.wait();
/// Check if there is an exception.
/// Wait for all tasks to finish.
for (auto & future : futures_to_wait)
{
try
@ -197,7 +193,12 @@ void RestorerFromBackup::waitFutures()
}
if (error)
std::rethrow_exception(error);
{
if (throw_if_error)
std::rethrow_exception(error);
else
tryLogException(error, log);
}
}
size_t RestorerFromBackup::getNumFutures() const

View File

@ -130,7 +130,7 @@ private:
/// Waits until all tasks are processed (including the tasks scheduled while we're waiting).
/// Throws an exception if any of the tasks throws an exception.
void waitFutures();
void waitFutures(bool throw_if_error = true);
/// Throws an exception if the RESTORE query was cancelled.
void checkIsQueryCancelled() const;

View File

@ -2955,7 +2955,8 @@ void ClientBase::init(int argc, char ** argv)
/// Common options for clickhouse-client and clickhouse-local.
options_description.main_description->add_options()
("help", "produce help message")
("help", "print usage summary, combine with --verbose to display all options")
("verbose", "print query and other debugging info")
("version,V", "print version information and exit")
("version-clean", "print version in machine-readable format and exit")
@ -2979,7 +2980,6 @@ void ClientBase::init(int argc, char ** argv)
("time,t", "print query execution time to stderr in non-interactive mode (for benchmarks)")
("echo", "in batch mode, print query before execution")
("verbose", "print query and other debugging info")
("log-level", po::value<std::string>(), "log level")
("server_logs_file", po::value<std::string>(), "put server logs into specified file")
@ -3008,6 +3008,8 @@ void ClientBase::init(int argc, char ** argv)
addOptions(options_description);
OptionsDescription options_description_non_verbose = options_description;
auto getter = [](const auto & op)
{
String op_long_name = op->long_name();
@ -3042,11 +3044,17 @@ void ClientBase::init(int argc, char ** argv)
exit(0); // NOLINT(concurrency-mt-unsafe)
}
if (options.count("verbose"))
config().setBool("verbose", true);
/// Output of help message.
if (options.count("help")
|| (options.count("host") && options["host"].as<std::string>() == "elp")) /// If user writes -help instead of --help.
{
printHelpMessage(options_description);
if (config().getBool("verbose", false))
printHelpMessage(options_description, true);
else
printHelpMessage(options_description_non_verbose, false);
exit(0); // NOLINT(concurrency-mt-unsafe)
}
@ -3113,8 +3121,6 @@ void ClientBase::init(int argc, char ** argv)
config().setBool("highlight", options["highlight"].as<bool>());
if (options.count("history_file"))
config().setString("history_file", options["history_file"].as<std::string>());
if (options.count("verbose"))
config().setBool("verbose", true);
if (options.count("interactive"))
config().setBool("interactive", true);
if (options.count("pager"))

View File

@ -121,7 +121,7 @@ protected:
};
virtual void updateLoggerLevel(const String &) {}
virtual void printHelpMessage(const OptionsDescription & options_description) = 0;
virtual void printHelpMessage(const OptionsDescription & options_description, bool verbose) = 0;
virtual void addOptions(OptionsDescription & options_description) = 0;
virtual void processOptions(const OptionsDescription & options_description,
const CommandLineOptions & options,

View File

@ -125,7 +125,7 @@ void highlight(const String & query, std::vector<replxx::Replxx::Color> & colors
const char * begin = query.data();
const char * end = begin + query.size();
Tokens tokens(begin, end, 1000, true);
Tokens tokens(begin, end, 10000, true);
IParser::Pos token_iterator(tokens, static_cast<uint32_t>(1000), static_cast<uint32_t>(10000));
Expected expected;
expected.enable_highlighting = true;

View File

@ -28,28 +28,6 @@ namespace ErrorCodes
extern const int USER_SESSION_LIMIT_EXCEEDED;
}
Suggest::Suggest()
{
/// Keywords may be not up to date with ClickHouse parser.
addWords({"CREATE", "DATABASE", "IF", "NOT", "EXISTS", "TEMPORARY", "TABLE", "ON",
"CLUSTER", "DEFAULT", "MATERIALIZED", "ALIAS", "ENGINE", "AS", "VIEW", "POPULATE",
"SETTINGS", "ATTACH", "DETACH", "DROP", "RENAME", "TO", "ALTER", "ADD",
"MODIFY", "CLEAR", "COLUMN", "AFTER", "COPY", "PROJECT", "PRIMARY", "KEY",
"CHECK", "PARTITION", "PART", "FREEZE", "FETCH", "FROM", "SHOW", "INTO",
"OUTFILE", "FORMAT", "TABLES", "DATABASES", "LIKE", "PROCESSLIST", "CASE", "WHEN",
"THEN", "ELSE", "END", "DESCRIBE", "DESC", "USE", "SET", "OPTIMIZE",
"FINAL", "DEDUPLICATE", "INSERT", "VALUES", "SELECT", "DISTINCT", "SAMPLE", "ARRAY",
"JOIN", "GLOBAL", "LOCAL", "ANY", "ALL", "INNER", "LEFT", "RIGHT",
"FULL", "OUTER", "CROSS", "USING", "PREWHERE", "WHERE", "GROUP", "BY",
"WITH", "TOTALS", "HAVING", "ORDER", "COLLATE", "LIMIT", "UNION", "AND",
"OR", "ASC", "IN", "KILL", "QUERY", "SYNC", "ASYNC", "TEST",
"BETWEEN", "TRUNCATE", "USER", "ROLE", "PROFILE", "QUOTA", "POLICY", "ROW",
"GRANT", "REVOKE", "OPTION", "ADMIN", "EXCEPT", "REPLACE", "IDENTIFIED", "HOST",
"NAME", "READONLY", "WRITABLE", "PERMISSIVE", "FOR", "RESTRICTIVE", "RANDOMIZED", "INTERVAL",
"LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE", "CLEANUP", "APPEND",
"IGNORE NULLS", "RESPECT NULLS", "OVER", "PASTE", "WINDOW", "QUALIFY"});
}
static String getLoadSuggestionQuery(Int32 suggestion_limit, bool basic_suggestion)
{
/// NOTE: Once you will update the completion list,
@ -82,6 +60,7 @@ static String getLoadSuggestionQuery(Int32 suggestion_limit, bool basic_suggesti
add_column("name", "data_type_families", false, {});
add_column("name", "merge_tree_settings", false, {});
add_column("name", "settings", false, {});
add_column("keyword", "keywords", false, {});
if (!basic_suggestion)
{

View File

@ -17,7 +17,7 @@ namespace DB
class Suggest : public LineReader::Suggest, boost::noncopyable
{
public:
Suggest();
Suggest() = default;
~Suggest()
{

View File

@ -1048,20 +1048,16 @@ public:
template <typename Date>
requires std::is_same_v<Date, DayNum> || std::is_same_v<Date, ExtendedDayNum>
auto toStartOfWeekInterval(Date d, UInt64 weeks, UInt8 week_mode) const
auto toStartOfWeekInterval(Date d, UInt64 weeks) const
{
if (weeks == 1)
return toFirstDayNumOfWeek(d, week_mode);
bool monday_first_mode = week_mode & static_cast<UInt8>(WeekModeFlag::MONDAY_FIRST);
// January 1st 1970 was Thursday so we need this 4-days offset to make weeks start on Monday, or
// 3 days to start on Sunday.
auto offset = monday_first_mode ? 4 : 3;
return toFirstDayNumOfWeek(d);
UInt64 days = weeks * 7;
// January 1st 1970 was Thursday so we need this 4-days offset to make weeks start on Monday.
if constexpr (std::is_same_v<Date, DayNum>)
return DayNum(offset + (d - offset) / days * days);
return DayNum(4 + (d - 4) / days * days);
else
return ExtendedDayNum(static_cast<Int32>(offset + (d - offset) / days * days));
return ExtendedDayNum(static_cast<Int32>(4 + (d - 4) / days * days));
}
template <typename Date>

View File

@ -598,6 +598,7 @@
M(717, EXPERIMENTAL_FEATURE_ERROR) \
M(718, TOO_SLOW_PARSING) \
M(719, QUERY_CACHE_USED_WITH_SYSTEM_TABLE) \
M(720, USER_EXPIRED) \
\
M(900, DISTRIBUTED_CACHE_ERROR) \
M(901, CANNOT_USE_DISTRIBUTED_CACHE) \

View File

@ -391,6 +391,7 @@ PreformattedMessage getCurrentExceptionMessageAndPattern(bool with_stacktrace, b
{
WriteBufferFromOwnString stream;
std::string_view message_format_string;
std::vector<std::string> message_format_string_args;
try
{
@ -402,6 +403,7 @@ PreformattedMessage getCurrentExceptionMessageAndPattern(bool with_stacktrace, b
<< (with_extra_info ? getExtraExceptionInfo(e) : "")
<< " (version " << VERSION_STRING << VERSION_OFFICIAL << ")";
message_format_string = e.tryGetMessageFormatString();
message_format_string_args = e.getMessageFormatStringArgs();
}
catch (const Poco::Exception & e)
{
@ -462,7 +464,7 @@ PreformattedMessage getCurrentExceptionMessageAndPattern(bool with_stacktrace, b
catch (...) {} // NOLINT(bugprone-empty-catch)
}
return PreformattedMessage{stream.str(), message_format_string};
return PreformattedMessage{stream.str(), message_format_string, message_format_string_args};
}
@ -581,7 +583,7 @@ PreformattedMessage getExceptionMessageAndPattern(const Exception & e, bool with
}
catch (...) {} // NOLINT(bugprone-empty-catch)
return PreformattedMessage{stream.str(), e.tryGetMessageFormatString()};
return PreformattedMessage{stream.str(), e.tryGetMessageFormatString(), e.getMessageFormatStringArgs()};
}
std::string getExceptionMessage(std::exception_ptr e, bool with_stacktrace)

View File

@ -13,6 +13,7 @@
#include <memory>
#include <vector>
#include <fmt/core.h>
#include <fmt/format.h>
#include <Poco/Exception.h>
@ -59,6 +60,7 @@ public:
std::terminate();
capture_thread_frame_pointers = thread_frame_pointers;
message_format_string = msg.format_string;
message_format_string_args = msg.format_string_args;
}
Exception(PreformattedMessage && msg, int code): Exception(std::move(msg.text), code)
@ -67,6 +69,7 @@ public:
std::terminate();
capture_thread_frame_pointers = thread_frame_pointers;
message_format_string = msg.format_string;
message_format_string_args = msg.format_string_args;
}
/// Collect call stacks of all previous jobs' schedulings leading to this thread job's execution
@ -107,12 +110,7 @@ public:
// Format message with fmt::format, like the logging functions.
template <typename... Args>
Exception(int code, FormatStringHelper<Args...> fmt, Args &&... args)
: Exception(fmt::format(fmt.fmt_str, std::forward<Args>(args)...), code)
{
capture_thread_frame_pointers = thread_frame_pointers;
message_format_string = fmt.message_format_string;
}
Exception(int code, FormatStringHelper<Args...> fmt, Args &&... args) : Exception(fmt.format(std::forward<Args>(args)...), code) {}
struct CreateFromPocoTag {};
struct CreateFromSTDTag {};
@ -152,6 +150,8 @@ public:
std::string_view tryGetMessageFormatString() const { return message_format_string; }
std::vector<std::string> getMessageFormatStringArgs() const { return message_format_string_args; }
private:
#ifndef STD_EXCEPTION_HAS_STACK_TRACE
StackTrace trace;
@ -162,6 +162,7 @@ private:
protected:
std::string_view message_format_string;
std::vector<std::string> message_format_string_args;
/// Local copy of static per-thread thread_frame_pointers, should be mutable to be unpoisoned on printout
mutable std::vector<StackTrace::FramePointers> capture_thread_frame_pointers;
};
@ -193,26 +194,29 @@ public:
// Format message with fmt::format, like the logging functions.
template <typename... Args>
ErrnoException(int code, FormatStringHelper<Args...> fmt, Args &&... args)
: Exception(fmt::format(fmt.fmt_str, std::forward<Args>(args)...), code), saved_errno(errno)
: Exception(fmt.format(std::forward<Args>(args)...), code), saved_errno(errno)
{
addMessage(", {}", errnoToString(saved_errno));
}
template <typename... Args>
ErrnoException(int code, int with_errno, FormatStringHelper<Args...> fmt, Args &&... args)
: Exception(fmt.format(std::forward<Args>(args)...), code), saved_errno(with_errno)
{
capture_thread_frame_pointers = thread_frame_pointers;
message_format_string = fmt.message_format_string;
addMessage(", {}", errnoToString(saved_errno));
}
template <typename... Args>
[[noreturn]] static void throwWithErrno(int code, int with_errno, FormatStringHelper<Args...> fmt, Args &&... args)
{
auto e = ErrnoException(fmt::format(fmt.fmt_str, std::forward<Args>(args)...), code, with_errno);
e.message_format_string = fmt.message_format_string;
auto e = ErrnoException(code, with_errno, std::move(fmt), std::forward<Args>(args)...);
throw e; /// NOLINT
}
template <typename... Args>
[[noreturn]] static void throwFromPath(int code, const std::string & path, FormatStringHelper<Args...> fmt, Args &&... args)
{
auto e = ErrnoException(fmt::format(fmt.fmt_str, std::forward<Args>(args)...), code, errno);
e.message_format_string = fmt.message_format_string;
auto e = ErrnoException(code, errno, std::move(fmt), std::forward<Args>(args)...);
e.path = path;
throw e; /// NOLINT
}
@ -221,8 +225,7 @@ public:
[[noreturn]] static void
throwFromPathWithErrno(int code, const std::string & path, int with_errno, FormatStringHelper<Args...> fmt, Args &&... args)
{
auto e = ErrnoException(fmt::format(fmt.fmt_str, std::forward<Args>(args)...), code, with_errno);
e.message_format_string = fmt.message_format_string;
auto e = ErrnoException(code, with_errno, std::move(fmt), std::forward<Args>(args)...);
e.path = path;
throw e; /// NOLINT
}

View File

@ -39,6 +39,7 @@ static struct InitFiu
REGULAR(replicated_merge_tree_commit_zk_fail_when_recovering_from_hw_fault) \
REGULAR(use_delayed_remote_source) \
REGULAR(cluster_discovery_faults) \
REGULAR(replicated_sends_failpoint) \
ONCE(smt_commit_merge_mutate_zk_fail_after_op) \
ONCE(smt_commit_merge_mutate_zk_fail_before_op) \
ONCE(smt_commit_write_zk_fail_after_op) \

View File

@ -2,8 +2,11 @@
#include <base/defines.h>
#include <base/types.h>
#include <fmt/args.h>
#include <fmt/core.h>
#include <fmt/format.h>
#include <mutex>
#include <type_traits>
#include <unordered_map>
#include <Poco/Logger.h>
#include <Poco/Message.h>
@ -14,6 +17,10 @@ struct PreformattedMessage;
consteval void formatStringCheckArgsNumImpl(std::string_view str, size_t nargs);
template <typename T> constexpr std::string_view tryGetStaticFormatString(T && x);
[[maybe_unused]] inline void tryGetFormattedArgs(std::vector<std::string>&) {};
template <typename T, typename... Ts> [[maybe_unused]] inline void tryGetFormattedArgs(std::vector<std::string>&, T &&, Ts && ...);
template <typename... Args> inline std::string tryGetArgsAndFormat(std::vector<std::string>&, fmt::format_string<Args...>, Args && ...);
/// Extract format string from a string literal and constructs consteval fmt::format_string
template <typename... Args>
struct FormatStringHelperImpl
@ -39,6 +46,7 @@ struct PreformattedMessage
{
std::string text;
std::string_view format_string;
std::vector<std::string> format_string_args;
template <typename... Args>
static PreformattedMessage create(FormatStringHelper<Args...> fmt, Args &&... args);
@ -47,22 +55,26 @@ struct PreformattedMessage
operator std::string () && { return std::move(text); } /// NOLINT
operator fmt::format_string<> () const { UNREACHABLE(); } /// NOLINT
void apply(std::string & out_text, std::string_view & out_format_string) const &
void apply(std::string & out_text, std::string_view & out_format_string, std::vector<std::string> & out_format_string_args) const &
{
out_text = text;
out_format_string = format_string;
out_format_string_args = format_string_args;
}
void apply(std::string & out_text, std::string_view & out_format_string) &&
void apply(std::string & out_text, std::string_view & out_format_string, std::vector<std::string> & out_format_string_args) &&
{
out_text = std::move(text);
out_format_string = format_string;
out_format_string_args = std::move(format_string_args);
}
};
template <typename... Args>
PreformattedMessage FormatStringHelperImpl<Args...>::format(Args && ...args) const
{
return PreformattedMessage{fmt::format(fmt_str, std::forward<Args>(args)...), message_format_string};
std::vector<std::string> out_format_string_args;
std::string msg_text = tryGetArgsAndFormat(out_format_string_args, fmt_str, std::forward<Args>(args)...);
return PreformattedMessage{msg_text, message_format_string, out_format_string_args};
}
template <typename... Args>
@ -113,12 +125,23 @@ template <typename T> constexpr std::string_view tryGetStaticFormatString(T && x
}
}
template <typename T, typename... Ts> void tryGetFormattedArgs(std::vector<std::string>& out, T && x, Ts && ...rest)
{
if constexpr (std::is_base_of_v<fmt::detail::view, std::decay_t<T>>)
out.push_back(fmt::format("{}", std::remove_reference_t<T>(x)));
else
out.push_back(fmt::format("{}", std::forward<T>(x)));
tryGetFormattedArgs(out, std::forward<Ts>(rest)...);
}
/// Constexpr ifs are not like ifdefs, and compiler still checks that unneeded code can be compiled
/// This template is useful to avoid compilation failures when condition of some "constexpr if" is false
template<bool enable> struct ConstexprIfsAreNotIfdefs
{
template <typename T> constexpr static std::string_view getStaticFormatString(T &&) { return {}; }
template <typename T> static PreformattedMessage getPreformatted(T &&) { return {}; }
template <typename... Args> static std::string getArgsAndFormat(std::vector<std::string>&, fmt::format_string<Args...>, Args &&...) { return {}; }
};
template<> struct ConstexprIfsAreNotIfdefs<true>
@ -133,8 +156,19 @@ template<> struct ConstexprIfsAreNotIfdefs<true>
}
template <typename T> static T && getPreformatted(T && x) { return std::forward<T>(x); }
template <typename... Args> static std::string getArgsAndFormat(std::vector<std::string>& out, fmt::format_string<Args...> fmt_str, Args && ...args)
{
return tryGetArgsAndFormat(out, std::move(fmt_str), std::forward<Args>(args)...);
}
};
template <typename... Args> inline std::string tryGetArgsAndFormat(std::vector<std::string>& out, fmt::format_string<Args...> fmt_str, Args && ...args)
{
tryGetFormattedArgs(out, args...);
return fmt::format(fmt_str, std::forward<Args>(args)...);
}
template <typename... Ts> constexpr size_t numArgs(Ts &&...) { return sizeof...(Ts); }
template <typename T, typename... Ts> constexpr auto firstArg(T && x, Ts &&...) { return std::forward<T>(x); }
/// For implicit conversion of fmt::basic_runtime<> to char* for std::string ctor

View File

@ -14,10 +14,7 @@ public:
, re_gen(key_template)
{
}
DB::ObjectStorageKey generate(const String &) const override
{
return DB::ObjectStorageKey::createAsAbsolute(re_gen.generate());
}
DB::ObjectStorageKey generate(const String &, bool) const override { return DB::ObjectStorageKey::createAsAbsolute(re_gen.generate()); }
private:
String key_template;
@ -32,7 +29,7 @@ public:
: key_prefix(std::move(key_prefix_))
{}
DB::ObjectStorageKey generate(const String &) const override
DB::ObjectStorageKey generate(const String &, bool) const override
{
/// Path to store the new S3 object.
@ -63,7 +60,7 @@ public:
: key_prefix(std::move(key_prefix_))
{}
DB::ObjectStorageKey generate(const String & path) const override
DB::ObjectStorageKey generate(const String & path, bool) const override
{
return DB::ObjectStorageKey::createAsRelative(key_prefix, path);
}

View File

@ -1,7 +1,7 @@
#pragma once
#include "ObjectStorageKey.h"
#include <memory>
#include "ObjectStorageKey.h"
namespace DB
{
@ -9,8 +9,9 @@ namespace DB
class IObjectStorageKeysGenerator
{
public:
virtual ObjectStorageKey generate(const String & path) const = 0;
virtual ~IObjectStorageKeysGenerator() = default;
virtual ObjectStorageKey generate(const String & path, bool is_directory) const = 0;
};
using ObjectStorageKeysGeneratorPtr = std::shared_ptr<IObjectStorageKeysGenerator>;

Some files were not shown because too many files have changed in this diff Show More